# Prepare reference gene file

This notebook creates a reference gene ranking file to use to compare SOPHIE generated gene ranking. The reference ranking information is obtained from [this repository](https://github.com/DartmouthStantonLab/GAPE). This [RDS object](https://github.com/DartmouthStantonLab/GAPE/blob/main/Pa_GPL84_refine_ANOVA_List_unzip.rds) contains 73 experiments. For each experiment, we will identify DEGs using log2FC > 1 and FDR < 0.05. We will rank genes by the proportion that they appeared as DE.

In [1]:
%load_ext autoreload
%autoreload 2
%load_ext rpy2.ipython

import os
import pandas as pd
import rpy2.robjects as ro
from rpy2.robjects import pandas2ri
from rpy2.robjects.conversion import localconverter

from ponyo import utils

pandas2ri.activate()

In [2]:
# Read in config variables
base_dir = os.path.abspath(os.path.join(os.getcwd(), "../"))

config_filename = os.path.abspath(
    os.path.join(base_dir, "configs", "config_pseudomonas_33245.tsv")
)
params = utils.read_config(config_filename)

In [3]:
# Load params
local_dir = params["local_dir"]
reference_gene_filename = os.path.join(
    local_dir, "Pa_GPL84_refine_ANOVA_List_unzip.rds"
)

In [4]:
readRDS = ro.r["readRDS"]

In [5]:
reference_Robject = readRDS(reference_gene_filename)

In [6]:
# For each experiment get df
# For each df, if label gene as DEGs based on log2FC>1 and FDR<0.05
# Concatenate series
num_experiments = len(reference_Robject)
reference_stats_df = pd.DataFrame()

for i in range(num_experiments):
    print(i)
    # Get df for experiment
    reference_df = ro.conversion.rpy2py(reference_Robject[i])

    reference_df = reference_df.set_index("ProbeID")
    print(reference_df.head())

    # Find DEGs
    degs_ids = list(
        reference_df[
            (abs(reference_df["Log2FC"]) > 1) & (reference_df["FDR"] < 0.05)
        ].index
    )
    reference_df["DEG"] = 0
    reference_df.loc[degs_ids, "DEG"] = 1
    print(reference_df.head())

    # Create df with labels for if gene is DE or not
    if i == 0:
        reference_stats_df = reference_df["DEG"].to_frame("experiment_0")
    else:
        reference_stats_df = pd.merge(
            reference_stats_df,
            reference_df["DEG"].to_frame(f"experiment_{i}"),
            left_index=True,
            right_index=True,
            how="left",
        )
reference_stats_df

0
                                                                GeneTitle  \
ProbeID                                                                     
AFFX-Athal_actin_at                                               actin 7   
AFFX-Athal_GAPDH_at     glyceraldehyde-3-phosphate dehydrogenase C sub...   
AFFX-Athal_ubq_at                                                           
AFFX-Bsubtilis_dapB_at                                                      
AFFX-Bsubtilis_lys_at                                                       

                       Symbol LocusTag IntergenicSpotID  ANOVAPvalue  \
ProbeID                                                                
AFFX-Athal_actin_at      ACT7                 --control     0.925179   
AFFX-Athal_GAPDH_at     GAPC1                 --control     0.390903   
AFFX-Athal_ubq_at                             --control     0.098989   
AFFX-Bsubtilis_dapB_at                        --control     0.000146   
AFFX-Bsubtilis_lys_at     

                                                                GeneTitle  \
ProbeID                                                                     
AFFX-Athal_actin_at                                               actin 7   
AFFX-Athal_GAPDH_at     glyceraldehyde-3-phosphate dehydrogenase C sub...   
AFFX-Athal_ubq_at                                                           
AFFX-Bsubtilis_dapB_at                                                      
AFFX-Bsubtilis_lys_at                                                       

                       Symbol LocusTag IntergenicSpotID  ANOVAPvalue  \
ProbeID                                                                
AFFX-Athal_actin_at      ACT7                 --control     0.691582   
AFFX-Athal_GAPDH_at     GAPC1                 --control     0.835736   
AFFX-Athal_ubq_at                             --control     0.910003   
AFFX-Bsubtilis_dapB_at                        --control     0.060899   
AFFX-Bsubtilis_lys_at       

                                                                GeneTitle  \
ProbeID                                                                     
AFFX-Athal_actin_at                                               actin 7   
AFFX-Athal_GAPDH_at     glyceraldehyde-3-phosphate dehydrogenase C sub...   
AFFX-Athal_ubq_at                                                           
AFFX-Bsubtilis_dapB_at                                                      
AFFX-Bsubtilis_lys_at                                                       

                       Symbol LocusTag IntergenicSpotID  ANOVAPvalue  \
ProbeID                                                                
AFFX-Athal_actin_at      ACT7                 --control     0.071010   
AFFX-Athal_GAPDH_at     GAPC1                 --control     0.604158   
AFFX-Athal_ubq_at                             --control     0.192904   
AFFX-Bsubtilis_dapB_at                        --control     0.327454   
AFFX-Bsubtilis_lys_at       

                                                                GeneTitle  \
ProbeID                                                                     
AFFX-Athal_actin_at                                               actin 7   
AFFX-Athal_GAPDH_at     glyceraldehyde-3-phosphate dehydrogenase C sub...   
AFFX-Athal_ubq_at                                                           
AFFX-Bsubtilis_dapB_at                                                      
AFFX-Bsubtilis_lys_at                                                       

                       Symbol LocusTag IntergenicSpotID  ANOVAPvalue  \
ProbeID                                                                
AFFX-Athal_actin_at      ACT7                 --control     0.012258   
AFFX-Athal_GAPDH_at     GAPC1                 --control     0.001709   
AFFX-Athal_ubq_at                             --control     0.024282   
AFFX-Bsubtilis_dapB_at                        --control     0.004839   
AFFX-Bsubtilis_lys_at       

                                                                GeneTitle  \
ProbeID                                                                     
AFFX-Athal_actin_at                                               actin 7   
AFFX-Athal_GAPDH_at     glyceraldehyde-3-phosphate dehydrogenase C sub...   
AFFX-Athal_ubq_at                                                           
AFFX-Bsubtilis_dapB_at                                                      
AFFX-Bsubtilis_lys_at                                                       

                       Symbol LocusTag IntergenicSpotID  ANOVAPvalue  \
ProbeID                                                                
AFFX-Athal_actin_at      ACT7                 --control     0.737790   
AFFX-Athal_GAPDH_at     GAPC1                 --control     0.737964   
AFFX-Athal_ubq_at                             --control     0.594757   
AFFX-Bsubtilis_dapB_at                        --control     0.258264   
AFFX-Bsubtilis_lys_at       

                                                                GeneTitle  \
ProbeID                                                                     
AFFX-Athal_actin_at                                               actin 7   
AFFX-Athal_GAPDH_at     glyceraldehyde-3-phosphate dehydrogenase C sub...   
AFFX-Athal_ubq_at                                                           
AFFX-Bsubtilis_dapB_at                                                      
AFFX-Bsubtilis_lys_at                                                       

                       Symbol LocusTag IntergenicSpotID   ANOVAPvalue  \
ProbeID                                                                 
AFFX-Athal_actin_at      ACT7                 --control  5.596473e-09   
AFFX-Athal_GAPDH_at     GAPC1                 --control  6.662417e-06   
AFFX-Athal_ubq_at                             --control  4.294044e-05   
AFFX-Bsubtilis_dapB_at                        --control  6.675527e-08   
AFFX-Bsubtilis_lys_at 

                                                                GeneTitle  \
ProbeID                                                                     
AFFX-Athal_actin_at                                               actin 7   
AFFX-Athal_GAPDH_at     glyceraldehyde-3-phosphate dehydrogenase C sub...   
AFFX-Athal_ubq_at                                                           
AFFX-Bsubtilis_dapB_at                                                      
AFFX-Bsubtilis_lys_at                                                       

                       Symbol LocusTag IntergenicSpotID  ANOVAPvalue  \
ProbeID                                                                
AFFX-Athal_actin_at      ACT7                 --control     0.913541   
AFFX-Athal_GAPDH_at     GAPC1                 --control     0.442443   
AFFX-Athal_ubq_at                             --control     0.948950   
AFFX-Bsubtilis_dapB_at                        --control     0.929461   
AFFX-Bsubtilis_lys_at       

                                                                GeneTitle  \
ProbeID                                                                     
AFFX-Athal_GAPDH_at     glyceraldehyde-3-phosphate dehydrogenase C sub...   
AFFX-Athal_ubq_at                                                           
AFFX-Bsubtilis_dapB_at                                                      
AFFX-Bsubtilis_pheB_at                                                      
AFFX-Bsubtilis_thrC_at                                                      

                       Symbol LocusTag IntergenicSpotID  ANOVAPvalue  \
ProbeID                                                                
AFFX-Athal_GAPDH_at     GAPC1                 --control     0.009146   
AFFX-Athal_ubq_at                             --control     0.017631   
AFFX-Bsubtilis_dapB_at                        --control     0.239420   
AFFX-Bsubtilis_pheB_at                        --control     0.000348   
AFFX-Bsubtilis_thrC_at      

                                                                GeneTitle  \
ProbeID                                                                     
AFFX-Athal_actin_at                                               actin 7   
AFFX-Athal_GAPDH_at     glyceraldehyde-3-phosphate dehydrogenase C sub...   
AFFX-Athal_ubq_at                                                           
AFFX-Bsubtilis_dapB_at                                                      
AFFX-Bsubtilis_lys_at                                                       

                       Symbol LocusTag IntergenicSpotID  ANOVAPvalue  \
ProbeID                                                                
AFFX-Athal_actin_at      ACT7                 --control     0.321601   
AFFX-Athal_GAPDH_at     GAPC1                 --control     0.256846   
AFFX-Athal_ubq_at                             --control     0.036846   
AFFX-Bsubtilis_dapB_at                        --control     0.823982   
AFFX-Bsubtilis_lys_at       

                                                                GeneTitle  \
ProbeID                                                                     
AFFX-Athal_actin_at                                               actin 7   
AFFX-Athal_GAPDH_at     glyceraldehyde-3-phosphate dehydrogenase C sub...   
AFFX-Athal_ubq_at                                                           
AFFX-Bsubtilis_dapB_at                                                      
AFFX-Bsubtilis_lys_at                                                       

                       Symbol LocusTag IntergenicSpotID  ANOVAPvalue  \
ProbeID                                                                
AFFX-Athal_actin_at      ACT7                 --control     0.003972   
AFFX-Athal_GAPDH_at     GAPC1                 --control     0.069466   
AFFX-Athal_ubq_at                             --control     0.001375   
AFFX-Bsubtilis_dapB_at                        --control     0.024426   
AFFX-Bsubtilis_lys_at       

                                                                GeneTitle  \
ProbeID                                                                     
AFFX-Athal_actin_at                                               actin 7   
AFFX-Athal_GAPDH_at     glyceraldehyde-3-phosphate dehydrogenase C sub...   
AFFX-Athal_ubq_at                                                           
AFFX-Bsubtilis_dapB_at                                                      
AFFX-Bsubtilis_lys_at                                                       

                       Symbol LocusTag IntergenicSpotID  ANOVAPvalue  \
ProbeID                                                                
AFFX-Athal_actin_at      ACT7                 --control     0.866167   
AFFX-Athal_GAPDH_at     GAPC1                 --control     0.027325   
AFFX-Athal_ubq_at                             --control     0.262710   
AFFX-Bsubtilis_dapB_at                        --control     0.003131   
AFFX-Bsubtilis_lys_at       

                                                                GeneTitle  \
ProbeID                                                                     
AFFX-Athal_actin_at                                               actin 7   
AFFX-Athal_GAPDH_at     glyceraldehyde-3-phosphate dehydrogenase C sub...   
AFFX-Athal_ubq_at                                                           
AFFX-Bsubtilis_dapB_at                                                      
AFFX-Bsubtilis_lys_at                                                       

                       Symbol LocusTag IntergenicSpotID  ANOVAPvalue  \
ProbeID                                                                
AFFX-Athal_actin_at      ACT7                 --control     0.285543   
AFFX-Athal_GAPDH_at     GAPC1                 --control     0.789324   
AFFX-Athal_ubq_at                             --control     0.953777   
AFFX-Bsubtilis_dapB_at                        --control     0.786633   
AFFX-Bsubtilis_lys_at       

                                                                GeneTitle  \
ProbeID                                                                     
AFFX-Athal_actin_at                                               actin 7   
AFFX-Athal_GAPDH_at     glyceraldehyde-3-phosphate dehydrogenase C sub...   
AFFX-Athal_ubq_at                                                           
AFFX-Bsubtilis_dapB_at                                                      
AFFX-Bsubtilis_lys_at                                                       

                       Symbol LocusTag IntergenicSpotID  ANOVAPvalue  \
ProbeID                                                                
AFFX-Athal_actin_at      ACT7                 --control     0.426821   
AFFX-Athal_GAPDH_at     GAPC1                 --control     0.222276   
AFFX-Athal_ubq_at                             --control     0.861573   
AFFX-Bsubtilis_dapB_at                        --control     0.032640   
AFFX-Bsubtilis_lys_at       

                                                                GeneTitle  \
ProbeID                                                                     
AFFX-Athal_actin_at                                               actin 7   
AFFX-Athal_GAPDH_at     glyceraldehyde-3-phosphate dehydrogenase C sub...   
AFFX-Athal_ubq_at                                                           
AFFX-Bsubtilis_dapB_at                                                      
AFFX-Bsubtilis_lys_at                                                       

                       Symbol LocusTag IntergenicSpotID  ANOVAPvalue  \
ProbeID                                                                
AFFX-Athal_actin_at      ACT7                 --control     0.607940   
AFFX-Athal_GAPDH_at     GAPC1                 --control     0.553911   
AFFX-Athal_ubq_at                             --control     0.176891   
AFFX-Bsubtilis_dapB_at                        --control     0.706494   
AFFX-Bsubtilis_lys_at       

                                                                GeneTitle  \
ProbeID                                                                     
AFFX-Athal_GAPDH_at     glyceraldehyde-3-phosphate dehydrogenase C sub...   
AFFX-Athal_ubq_at                                                           
AFFX-Bsubtilis_dapB_at                                                      
AFFX-Bsubtilis_lys_at                                                       
AFFX-Bsubtilis_pheB_at                                                      

                       Symbol LocusTag IntergenicSpotID  ANOVAPvalue  \
ProbeID                                                                
AFFX-Athal_GAPDH_at     GAPC1                 --control     0.225842   
AFFX-Athal_ubq_at                             --control     0.109570   
AFFX-Bsubtilis_dapB_at                        --control     0.510394   
AFFX-Bsubtilis_lys_at                         --control     0.836197   
AFFX-Bsubtilis_pheB_at      

                                                                GeneTitle  \
ProbeID                                                                     
AFFX-Athal_actin_at                                               actin 7   
AFFX-Athal_GAPDH_at     glyceraldehyde-3-phosphate dehydrogenase C sub...   
AFFX-Athal_ubq_at                                                           
AFFX-Bsubtilis_dapB_at                                                      
AFFX-Bsubtilis_lys_at                                                       

                       Symbol LocusTag IntergenicSpotID  ANOVAPvalue  \
ProbeID                                                                
AFFX-Athal_actin_at      ACT7                 --control     0.709313   
AFFX-Athal_GAPDH_at     GAPC1                 --control     0.414666   
AFFX-Athal_ubq_at                             --control     0.581572   
AFFX-Bsubtilis_dapB_at                        --control     0.512790   
AFFX-Bsubtilis_lys_at       

                                                                GeneTitle  \
ProbeID                                                                     
AFFX-Athal_GAPDH_at     glyceraldehyde-3-phosphate dehydrogenase C sub...   
AFFX-Athal_ubq_at                                                           
AFFX-Bsubtilis_dapB_at                                                      
AFFX-Bsubtilis_lys_at                                                       
AFFX-Bsubtilis_pheB_at                                                      

                       Symbol LocusTag IntergenicSpotID  ANOVAPvalue  \
ProbeID                                                                
AFFX-Athal_GAPDH_at     GAPC1                 --control     0.123491   
AFFX-Athal_ubq_at                             --control     0.533780   
AFFX-Bsubtilis_dapB_at                        --control     0.460360   
AFFX-Bsubtilis_lys_at                         --control     0.064704   
AFFX-Bsubtilis_pheB_at      

                                                                GeneTitle  \
ProbeID                                                                     
AFFX-Athal_actin_at                                               actin 7   
AFFX-Athal_GAPDH_at     glyceraldehyde-3-phosphate dehydrogenase C sub...   
AFFX-Athal_ubq_at                                                           
AFFX-Bsubtilis_dapB_at                                                      
AFFX-Bsubtilis_lys_at                                                       

                       Symbol LocusTag IntergenicSpotID  ANOVAPvalue  \
ProbeID                                                                
AFFX-Athal_actin_at      ACT7                 --control     0.780541   
AFFX-Athal_GAPDH_at     GAPC1                 --control     0.665710   
AFFX-Athal_ubq_at                             --control     0.771920   
AFFX-Bsubtilis_dapB_at                        --control     0.528584   
AFFX-Bsubtilis_lys_at       

                                                                GeneTitle  \
ProbeID                                                                     
AFFX-Athal_actin_at                                               actin 7   
AFFX-Athal_GAPDH_at     glyceraldehyde-3-phosphate dehydrogenase C sub...   
AFFX-Athal_ubq_at                                                           
AFFX-Bsubtilis_dapB_at                                                      
AFFX-Bsubtilis_lys_at                                                       

                       Symbol LocusTag IntergenicSpotID  ANOVAPvalue  \
ProbeID                                                                
AFFX-Athal_actin_at      ACT7                 --control     0.470474   
AFFX-Athal_GAPDH_at     GAPC1                 --control     0.311360   
AFFX-Athal_ubq_at                             --control     0.522738   
AFFX-Bsubtilis_dapB_at                        --control     0.767733   
AFFX-Bsubtilis_lys_at       

                                                                GeneTitle  \
ProbeID                                                                     
AFFX-Athal_actin_at                                               actin 7   
AFFX-Athal_GAPDH_at     glyceraldehyde-3-phosphate dehydrogenase C sub...   
AFFX-Athal_ubq_at                                                           
AFFX-Bsubtilis_dapB_at                                                      
AFFX-Bsubtilis_lys_at                                                       

                       Symbol LocusTag IntergenicSpotID  ANOVAPvalue  \
ProbeID                                                                
AFFX-Athal_actin_at      ACT7                 --control     0.916583   
AFFX-Athal_GAPDH_at     GAPC1                 --control     0.894893   
AFFX-Athal_ubq_at                             --control     0.867912   
AFFX-Bsubtilis_dapB_at                        --control     0.953814   
AFFX-Bsubtilis_lys_at       

                                              GeneTitle Symbol LocusTag  \
ProbeID                                                                   
PA0001_dnaA_at    chromosome replication initiator DnaA   dnaA   PA0001   
PA0002_dnaN_at          DNA polymerase III subunit beta   dnaN   PA0002   
PA0003_recF_at  DNA replication and repair protein RecF   recF   PA0003   
PA0004_gyrB_at                     DNA gyrase subunit B   gyrB   PA0004   
PA0005_at         lysophosphatidic acid acyltransferase   lptA   PA0005   

               IntergenicSpotID  ANOVAPvalue    Log2FC       FDR  
ProbeID                                                           
PA0001_dnaA_at                      0.360599  0.114539  0.420124  
PA0002_dnaN_at                      0.031939  0.317634  0.059077  
PA0003_recF_at                      0.290270  0.339363  0.349563  
PA0004_gyrB_at                      0.000489  0.734892  0.003431  
PA0005_at                           0.013713  1.133838  0.031151  
     

                                              GeneTitle Symbol LocusTag  \
ProbeID                                                                   
PA0001_dnaA_at    chromosome replication initiator DnaA   dnaA   PA0001   
PA0002_dnaN_at          DNA polymerase III subunit beta   dnaN   PA0002   
PA0003_recF_at  DNA replication and repair protein RecF   recF   PA0003   
PA0004_gyrB_at                     DNA gyrase subunit B   gyrB   PA0004   
PA0005_at         lysophosphatidic acid acyltransferase   lptA   PA0005   

               IntergenicSpotID   ANOVAPvalue    Log2FC           FDR  
ProbeID                                                                
PA0001_dnaA_at                   4.517336e-04  0.215629  7.779856e-04  
PA0002_dnaN_at                   5.835877e-09  0.676461  2.855457e-08  
PA0003_recF_at                   9.645185e-08  0.699288  3.602115e-07  
PA0004_gyrB_at                   2.034779e-06  0.305169  5.669502e-06  
PA0005_at                        3.190850e

Unnamed: 0_level_0,experiment_0,experiment_1,experiment_2,experiment_3,experiment_4,experiment_5,experiment_6,experiment_7,experiment_8,experiment_9,...,experiment_63,experiment_64,experiment_65,experiment_66,experiment_67,experiment_68,experiment_69,experiment_70,experiment_71,experiment_72
ProbeID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
AFFX-Athal_GAPDH_at,0,0,0,0,0,,0,0.0,0,0.0,...,0,,,,,,,,,
AFFX-Athal_actin_at,0,0,0,0,0,,0,0.0,0,0.0,...,0,,,,,,,,,
AFFX-Athal_ubq_at,0,0,0,0,0,0.0,0,0.0,1,0.0,...,0,,,,,,,,,
AFFX-Bsubtilis_dapB_at,1,0,1,0,0,0.0,0,0.0,0,0.0,...,0,,,,,,,,,
AFFX-Bsubtilis_lys_at,0,0,1,0,0,0.0,0,0.0,1,0.0,...,0,,,,,,,,,
AFFX-Bsubtilis_pheB_at,1,0,1,0,0,0.0,0,0.0,0,0.0,...,0,,,,,,,,,
AFFX-Bsubtilis_thrC_at,1,0,1,0,0,0.0,0,0.0,0,0.0,...,0,,,,,,,,,
AFFX-Bsubtilis_trpD_at,0,0,1,0,0,,0,,0,0.0,...,0,,,,,,,,,
AFFX-YEL002C_WPB1_at,0,0,0,0,0,,0,,0,,...,0,,,,,,,,,
AFFX-YEL018W_at,0,0,0,0,0,,0,,0,,...,0,,,,,,,,,


In [7]:
# Map `ProbeID` to `IntergenicSpotID` that contains PA#### IDs
example_reference_df = ro.conversion.rpy2py(reference_Robject[0])
example_reference_df.set_index("ProbeID", inplace=True)

merged_df = pd.merge(
    reference_stats_df, example_reference_df, left_index=True, right_index=True
)

# Get relevant columns (`IntergenicSpotID` and `experiment_*`)
experiment_cols = [col for col in merged_df.columns if "experiment_" in col]
merged_df = merged_df[experiment_cols]

merged_df.sum(axis=1)

ProbeID
AFFX-Athal_GAPDH_at        6.0
AFFX-Athal_actin_at        6.0
AFFX-Athal_ubq_at          7.0
AFFX-Bsubtilis_dapB_at    11.0
AFFX-Bsubtilis_lys_at     12.0
AFFX-Bsubtilis_pheB_at    13.0
AFFX-Bsubtilis_thrC_at    13.0
AFFX-Bsubtilis_trpD_at    10.0
AFFX-YEL002C_WPB1_at       8.0
AFFX-YEL018W_at            9.0
AFFX-YEL024W_RIP1_at       9.0
AFFX-YER022W_SRB4_at       7.0
AFFX-YER148W_SPT15_at     11.0
AFFX-YFL039C_ACT1_at       7.0
PA0001_dnaA_at            23.0
PA0002_dnaN_at            16.0
PA0003_recF_at            22.0
PA0004_gyrB_at            17.0
PA0005_at                 16.0
PA0006_at                 13.0
PA0007_at                 22.0
PA0008_glyS_at            22.0
PA0009_glyQ_at            24.0
PA0010_tag_at              7.0
PA0011_at                 11.0
PA0012_at                 12.0
PA0013_at                 12.0
PA0014_at                  9.0
PA0015_at                 13.0
PA0016_trkA_at            13.0
                          ... 
ig_5991130_5992382_at     13.0


In [8]:
# Aggregate to get ranking of genes
merged_df["prop DEGs"] = merged_df.sum(axis=1) / num_experiments

In [9]:
# Extract PA#### ids from `ProbeID`
# This will be used to compare against SOPHIE ranked genes
pao1_ids = [str_ls[0] for str_ls in merged_df.index.str.split("_")]
merged_df["gene id"] = pao1_ids

In [10]:
# Save file
# Here are the names that we will use for the comparison in notebook 2_identify_generic_genes_pathways.ipynb
# DE_prior_filename = output_filename
# ref_gene_col = "gene id"
# ref_rank_col = "prop DEGs"
merged_df.to_csv("GAPE_proportions.txt", sep="\t")