## Comparison of GS fraction new RNA from old vs new gtf for KDii samples
Author: Robert Ietswaart  
Date: 20240308  
License: BSD2.  
Load modules j3dl and activate virtual environment using j4RNAdecay on O2.  
Python v3.7.4

Source: `Comparison_GS_KDii_20240229.ipynb`  
For Subcellular Timelapse seq project. 

In [1]:
import os
import re
import copy
import numpy as np
import pandas as pd
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
import logging
import argparse
import matplotlib.pyplot as plt
plt.rcParams['pdf.fonttype'] = 42
plt.rcParams['ps.fonttype'] = 42
import seaborn as sns
from scipy.cluster import hierarchy
from scipy.stats import gmean, gstd, spearmanr, mode
import new_total_ratio_jit as ntr
import fit

from scipy.stats import fisher_exact, chi2_contingency, mannwhitneyu

from __init__ import default_logger_format, default_date_format

In [2]:
# def main():
np.random.seed(12345)

parser = argparse.ArgumentParser(
    description='Compare GS MAP value between old and new gtf in KDii samples')

args = parser.parse_args("")


path_old = os.path.join('/n','groups','churchman','ri23','bseq','GS20230110_RBPii')
path_new = os.path.join('/n','groups','churchman','ri23','bseq','GS20240305_KD_ii')
outpath = path_new
path_de = os.path.join('/n','groups','churchman','ri23','bseq','GS20240122_KD_ii','DEseq2')
path_wt = os.path.join('/n','groups','churchman','ri23','bseq','GS20231201_K562')
organisms = ['m','h']
org_map = {'m': 'mouse', 'h': 'human'}
org_red_reps = {'m': ['G_R','H_S'], 'h': ['T', 'U']}
RBPs = ['scramble', 'DIS3', 'EXOSC10', 'PABPN1', 'ZFC3H1']
reps = ['1', '2']
reps_K562 = ['T','U']
fracs = ['nuc', 'tot']
read_types = ['exons_bamlist_v2.0.5d_oml'] #['exons', 'exons_bamlist_v2.0.5d_oml', 'unspliced_junctions','introns', 'not_introns', 
#               'protein_coding', 'retained_introns', 'retained_introns_with_exons',
#               'no_retained_introns','gene']
PC_KDs=['0.015', '0.025', '0.035']
RATIO_PC_SCR_KD=['0.2', '0.4', '0.6', '0.8', '1.0', '1.2', '1.4', '1.6']

contrast_types = ['compartment_tot_vs_nuc',
                  'label_time_60_vs_UL',
                  'condition_DIS3_vs_scramble',
                  'condition_EXOSC10_vs_scramble',
                  'condition_PABPN1_vs_scramble',
                  'condition_ZFC3H1_vs_scramble']

# Add a logger specific to the project and processing stage
logger = logging.getLogger('GS_final')
log_file = os.path.join(outpath,'LogErr', 'Comparison_GS20240229_KD_ii.log')
formatter = logging.Formatter(default_logger_format,
                              datefmt=default_date_format)
log_handler = logging.FileHandler(log_file)
log_handler.setFormatter(formatter)
logger.addHandler(log_handler)

time_id = [str(i) for i in range(2)]
time_mins = ['UL', '60']

OUT_TYPES = ['.Mean', '.MAP', '.0.025.quantile', '.0.975.quantile']

logger.info('Load GS outputs')
GS = dict() #o = old n = new


for rbp in RBPs:
    for r in reps:
        for fr in fracs:
            filename_old = 'GS20230110_' + rbp + '_' + fr + '_rep' + r + '.tsv'
            GS['o'+rbp+r+fr] = pd.read_csv(os.path.join(path_old, filename_old), sep='\t')
            for rt in read_types:            
                if rbp == 'scramble':
                    for pc_kd in PC_KDs: 
                        for ratio in RATIO_PC_SCR_KD: 
                            pc = str(round(float(ratio) * float(pc_kd),3))
                            logger.info('pc_kd %s, ratio %s, pc %s' % (pc_kd, ratio, pc))
                            filename_new = 'GS20240305_' + rbp + '_' + fr + '_rep' + r + '_' + rt + \
                                           '_pc_' + pc + '_version_pc_kd_' + pc_kd + '.tsv'
                            
                            if os.path.exists(os.path.join(path_new, filename_new)):
                                GS[rt+rbp+r+fr+pc_kd+str(ratio)] = pd.read_csv(os.path.join(path_new, filename_new), sep='\t')
                            else:
                                logger.info('not present: %s' % filename_new)
                else: #KD
                    for pc in PC_KDs:
                        filename_new = 'GS20240305_' + rbp + '_' + fr + '_rep' + r + '_' + rt + \
                                       '_pc_' + pc + '.tsv'
                        if os.path.exists(os.path.join(path_new, filename_new)):
                            GS[rt+rbp+r+fr+pc] = pd.read_csv(os.path.join(path_new, filename_new), sep='\t')
                        else:
                            logger.info('not present: %s' % filename_new)            
            

            
for i, rr in enumerate(reps_K562):
    r = reps[i]
    for fr in fracs:
        filename_wt = 'GS20231201_' + rr + '_' + fr + '.tsv'
        GS['nK562'+r+fr] = pd.read_csv(os.path.join(path_wt, filename_wt), sep='\t')        
                    
DE = dict()
logger.info('Load DE outputs')
for fr in ['nuc','all']:
    for rt in read_types:
        for ct in contrast_types:
            filename_de = 'GS20240122_KD_ii_DEseq2_contrast_' + fr + '_' + rt +'_' +ct + '.csv'
            if os.path.exists(os.path.join(path_de, filename_de)):
                DE[fr+rt+ct] = pd.read_csv(os.path.join(path_de, filename_de))
            else:
                logger.info('not present: %s' % filename_de)
                
                
logger.info('Load Bayes Rates and BayesFactor outputs')
path_b = dict()
path_b['h'] = os.path.join('/n','groups','churchman','ri23','bseq','Bayes20240120_K562')
path_b['m'] = os.path.join('/n','groups','churchman','ri23','bseq','Bayes20240120_3T3')
path_k = os.path.join('/n','groups','churchman','ri23','bseq','BayesFactor20240112')

B = dict()          #Bayes fits file
K = dict()          #Bayes Factor
for o in organisms:
    filename_b = 'Bayes_Rates_20240120_'+ org_map[o] + '_final.tsv'
    filename_k = 'Bayes_factor_20240112_' + org_map[o] + '_final.tsv'
    B[o] = pd.read_csv(os.path.join(path_b[o], filename_b), sep='\t')
    K[o] = pd.read_csv(os.path.join(path_k, filename_k), sep='\t')


# logger.info('Load Detained intron containing gene lists')    
# DI = dict()
# DI_genes_n = ['64','33']
# for n in DI_genes_n:
#     DI[n] = pd.read_csv(os.path.join(outpath, 'GenesDev2015Sharp_DIgenes_' + n + '.csv'),
#                         header=None, names=['Symbol'])
    
    
o = 'h' 

INFO: [2024-03-11 12:16:28] GS_final - Load GS outputs
INFO: [2024-03-11 12:16:28] GS_final - pc_kd 0.015, ratio 0.2, pc 0.003
INFO: [2024-03-11 12:16:28] GS_final - pc_kd 0.015, ratio 0.4, pc 0.006
INFO: [2024-03-11 12:16:28] GS_final - pc_kd 0.015, ratio 0.6, pc 0.009
INFO: [2024-03-11 12:16:28] GS_final - pc_kd 0.015, ratio 0.8, pc 0.012
INFO: [2024-03-11 12:16:28] GS_final - pc_kd 0.015, ratio 1.0, pc 0.015
INFO: [2024-03-11 12:16:29] GS_final - pc_kd 0.015, ratio 1.2, pc 0.018
INFO: [2024-03-11 12:16:29] GS_final - pc_kd 0.015, ratio 1.4, pc 0.021
INFO: [2024-03-11 12:16:29] GS_final - pc_kd 0.015, ratio 1.6, pc 0.024
INFO: [2024-03-11 12:16:29] GS_final - pc_kd 0.025, ratio 0.2, pc 0.005
INFO: [2024-03-11 12:16:29] GS_final - pc_kd 0.025, ratio 0.4, pc 0.01
INFO: [2024-03-11 12:16:29] GS_final - pc_kd 0.025, ratio 0.6, pc 0.015
INFO: [2024-03-11 12:16:29] GS_final - pc_kd 0.025, ratio 0.8, pc 0.02
INFO: [2024-03-11 12:16:29] GS_final - pc_kd 0.025, ratio 1.0, pc 0.025
INFO: [2024

INFO: [2024-03-11 12:16:41] GS_final - not present: GS20240122_KD_ii_DEseq2_contrast_all_exons_bamlist_v2.0.5d_oml_condition_EXOSC10_vs_scramble.csv
INFO: [2024-03-11 12:16:41] GS_final - not present: GS20240122_KD_ii_DEseq2_contrast_all_exons_bamlist_v2.0.5d_oml_condition_PABPN1_vs_scramble.csv
INFO: [2024-03-11 12:16:41] GS_final - not present: GS20240122_KD_ii_DEseq2_contrast_all_exons_bamlist_v2.0.5d_oml_condition_ZFC3H1_vs_scramble.csv
INFO: [2024-03-11 12:16:41] GS_final - Load Bayes Rates and BayesFactor outputs


In [4]:
# GS.keys()

In [5]:
# DE.keys()

### Estimating ratio value that has lowest difference between KD and scr

In [4]:
order10 = 4
x_range = [10**(i) for i in range(-order10, order10+2, 2)]


corrs = dict()
corrs['read_type'] = []
corrs['rbp'] = []
corrs['frac'] = []
corrs['rep'] = []
corrs['time'] = []
corrs['out_type'] = []
corrs['pc_KD'] = []
corrs['ratio'] = []
corrs['pc_scr'] = []
corrs['n_genes'] = []
# corrs['r_pearson'] = []#Pearson correlation
corrs['median_delta'] = []
corrs['median_scr'] = []
corrs['median_KD'] = []
# corrs['rho_spearman'] = []#Spearman rank correlation coefficient
# corrs['ci_overlap'] = []

t = time_mins[1]
rt = read_types[0]
ot = OUT_TYPES[1]
for fr in fracs:  
    for rbp in RBPs[1:]:
        for r in reps:
            for pc_kd in PC_KDs:
                min_med = np.inf
                ratio_hat = -1
                pc_scr_hat = -1
                med_x_hat = -1
                med_y_hat = -1
                gkey1 = rt+rbp+r+fr+pc_kd
                for ratio in RATIO_PC_SCR_KD: 
                    gkey2 = rt+'scramble'+r+fr+pc_kd+str(ratio)
                    pc_scr = str(round(float(ratio) * float(pc_kd),3))
                    if fr == 'tot':
                        key1 = rbp+'_'+t+ot.replace('.',' ')
                        key2 = 'scramble_'+t+ot.replace('.',' ')
                    else:
                        key1 = rbp+'_'+t+'_'+fr+'_rep'+r+'_sort.human_noMT'+ot.replace('.',' ')
                        key2 = 'scramble_'+t+'_'+fr+'_rep'+r+'_sort.human_noMT'+ot.replace('.',' ') 
                    
                    genes = set(GS[gkey1]['Gene'])
                    genes = genes.intersection(set(GS[gkey2]['Gene']))
                    genes = genes.intersection(set(GS[gkey1][~GS[gkey1][key1].isna()]['Gene']))
                    genes = genes.intersection(set(GS[gkey2][~GS[gkey2][key2].isna()]['Gene']))
                    genes = genes.intersection(set(B[o][~B[o]['T.half_life_nuc.Mean'].isna()]['Gene']))
                    genes = genes.intersection(set(B[o][~B[o]['U.half_life_nuc.Mean'].isna()]['Gene']))
                    
                    x = GS[gkey1][GS[gkey1]['Gene'].isin(genes)].sort_values(by='Gene')[key1].values
                    y = GS[gkey2][GS[gkey2]['Gene'].isin(genes)].sort_values(by='Gene')[key2].values

                    med_x = np.median(x)
                    med_y = np.median(y)
                    med = np.median(x-y)
#                     med = (med_x-med_y) / (0.5 * (med_x+med_y)) 
                    
                    if abs(med) < min_med:
                        min_med = abs(med)
                        ratio_hat = ratio
                        pc_scr_hat = pc_scr
                        med_x_hat = med_x
                        med_y_hat = med_y
                    
                    #Correlation statistics between replicates for each stat
                    Rpearson_cov = np.corrcoef(x,y)#Pearson correlation coefficient
        #                 Rho, pval = spearmanr(x,y)#,nan_policy='omit') #Spearman rank correlation coefficient

                    #append stats
# #                     corrs['r_pearson'].append(Rpearson_cov[0,1])#Pearson correlation
#                     corrs['n_genes'].append(len(x))
#                     corrs['rbp'].append(rbp)
#                     corrs['frac'].append(fr)
#                     corrs['rep'].append(r)
#                     corrs['read_type'].append(rt)
#                     corrs['out_type'].append(ot)
#                     corrs['time'].append(t)
#                     corrs['median_delta'].append(med)
#                     corrs['median_KD'].append(med_x)
#                     corrs['median_scr'].append(med_y)
#                     corrs['pc_KD'].append(pc_kd)
#                     corrs['ratio'].append(ratio)
#                     corrs['pc_scr'].append(str(pc_scr))
                
                corrs['n_genes'].append(len(x))
                corrs['rbp'].append(rbp)
                corrs['frac'].append(fr)
                corrs['rep'].append(r)
                corrs['read_type'].append(rt)
                corrs['out_type'].append(ot)
                corrs['time'].append(t)
                corrs['median_delta'].append(min_med)
                corrs['median_KD'].append(med_x_hat)
                corrs['median_scr'].append(med_y_hat)
                corrs['pc_KD'].append(pc_kd)
                corrs['ratio'].append(ratio_hat)
                corrs['pc_scr'].append(str(pc_scr_hat))
                    
                logger.info('%s %s %s pc_kd %s, ratio %s, pc_scr %s' % (fr, rbp, r, 
                                                                   pc_kd, ratio_hat, pc_scr_hat))
                    
corrs = pd.DataFrame.from_dict(corrs)
filename = 'GS20240305_KD_ii_median_difference_w_scr.tsv'
# corrs.to_csv(os.path.join(outpath, filename), sep='\t',index=False)

INFO: [2024-03-11 12:17:38] GS_final - nuc DIS3 1 pc_kd 0.015, ratio 0.6, pc_scr 0.009
INFO: [2024-03-11 12:17:39] GS_final - nuc DIS3 1 pc_kd 0.025, ratio 0.6, pc_scr 0.015
INFO: [2024-03-11 12:17:40] GS_final - nuc DIS3 1 pc_kd 0.035, ratio 0.6, pc_scr 0.021
INFO: [2024-03-11 12:17:40] GS_final - nuc DIS3 2 pc_kd 0.015, ratio 1.0, pc_scr 0.015
INFO: [2024-03-11 12:17:41] GS_final - nuc DIS3 2 pc_kd 0.025, ratio 1.0, pc_scr 0.025
INFO: [2024-03-11 12:17:42] GS_final - nuc DIS3 2 pc_kd 0.035, ratio 1.0, pc_scr 0.035
INFO: [2024-03-11 12:17:43] GS_final - nuc EXOSC10 1 pc_kd 0.015, ratio 0.6, pc_scr 0.009
INFO: [2024-03-11 12:17:43] GS_final - nuc EXOSC10 1 pc_kd 0.025, ratio 0.6, pc_scr 0.015
INFO: [2024-03-11 12:17:44] GS_final - nuc EXOSC10 1 pc_kd 0.035, ratio 0.6, pc_scr 0.021
INFO: [2024-03-11 12:17:45] GS_final - nuc EXOSC10 2 pc_kd 0.015, ratio 0.4, pc_scr 0.006
INFO: [2024-03-11 12:17:46] GS_final - nuc EXOSC10 2 pc_kd 0.025, ratio 0.4, pc_scr 0.01
INFO: [2024-03-11 12:17:46] G

In [5]:
corrs

Unnamed: 0,read_type,rbp,frac,rep,time,out_type,pc_KD,ratio,pc_scr,n_genes,median_delta,median_scr,median_KD
0,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.015,0.2,0.003,12616,-0.3414,1.0,0.41085
1,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.015,0.4,0.006,12616,-0.0939,0.55225,0.41085
2,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.015,0.6,0.009,12616,0.0011,0.3657,0.41085
3,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.015,0.8,0.012,12616,0.0839,0.27965,0.41085
4,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.015,1.0,0.015,12616,0.1365,0.2297,0.41085
5,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.015,1.2,0.018,12616,0.17205,0.19715,0.41085
6,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.015,1.4,0.021,12616,0.1995,0.1735,0.41085
7,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.015,1.6,0.024,12616,0.2203,0.15565,0.41085
8,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.025,0.2,0.005,12616,-0.32455,0.6748,0.2748
9,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.025,0.4,0.01,12616,-0.05805,0.33105,0.2748


In [23]:
corrs #med = np.median(x-y)

Unnamed: 0,read_type,rbp,frac,rep,time,out_type,pc_KD,ratio,pc_scr,n_genes,median_delta,median_scr,median_KD
0,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.015,0.6,0.009,12616,0.0011,0.3657,0.41085
1,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.025,0.6,0.015,12616,0.0165,0.2297,0.2748
2,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.035,0.6,0.021,12616,0.0193,0.1735,0.21485
3,exons_bamlist_v2.0.5d_oml,DIS3,nuc,2,60,.MAP,0.015,1.0,0.015,12449,0.0,0.1858,0.1926
4,exons_bamlist_v2.0.5d_oml,DIS3,nuc,2,60,.MAP,0.025,1.0,0.025,12449,0.0,0.1188,0.1244
5,exons_bamlist_v2.0.5d_oml,DIS3,nuc,2,60,.MAP,0.035,1.0,0.035,12449,0.0,0.0885,0.0939
6,exons_bamlist_v2.0.5d_oml,EXOSC10,nuc,1,60,.MAP,0.015,0.6,0.009,12635,0.0,0.3654,0.3704
7,exons_bamlist_v2.0.5d_oml,EXOSC10,nuc,1,60,.MAP,0.025,0.6,0.015,12635,0.0037,0.2297,0.2496
8,exons_bamlist_v2.0.5d_oml,EXOSC10,nuc,1,60,.MAP,0.035,0.6,0.021,12635,0.0094,0.1734,0.1971
9,exons_bamlist_v2.0.5d_oml,EXOSC10,nuc,2,60,.MAP,0.015,0.4,0.006,12489,0.0307,0.4558,0.4099


In [20]:
genes = set(B[o][~B[o]['T.half_life_nuc.Mean'].isna()]['Gene'])
genes = genes.intersection(set(B[o][~B[o]['U.half_life_nuc.Mean'].isna()]['Gene']))
r = '1'
fr = 'nuc'
logger.info(GS['nK562'+r+fr]['T4 MAP'].median())
logger.info(GS['nK562'+r+fr][GS['nK562'+r+fr]['Gene'].isin(genes)]['T4 MAP'].median())
r = '2'
logger.info(GS['nK562'+r+fr]['U4 MAP'].median())
logger.info(GS['nK562'+r+fr][GS['nK562'+r+fr]['Gene'].isin(genes)]['U4 MAP'].median())

INFO: [2024-03-09 23:25:09] GS_final - 0.455475
INFO: [2024-03-09 23:25:10] GS_final - 0.46365
INFO: [2024-03-09 23:25:10] GS_final - 0.4097
INFO: [2024-03-09 23:25:10] GS_final - 0.4203


In [21]:
genes = set(B[o][~B[o]['T.half_life_nuc.Mean'].isna()]['Gene'])
genes = genes.intersection(set(B[o][~B[o]['U.half_life_nuc.Mean'].isna()]['Gene']))
r = '1'
fr = 'tot'
logger.info(GS['nK562'+r+fr]['T4 MAP'].median())
logger.info(GS['nK562'+r+fr][GS['nK562'+r+fr]['Gene'].isin(genes)]['T4 MAP'].median())
r = '2'
logger.info(GS['nK562'+r+fr]['U4 MAP'].median())
logger.info(GS['nK562'+r+fr][GS['nK562'+r+fr]['Gene'].isin(genes)]['U4 MAP'].median())

INFO: [2024-03-09 23:34:44] GS_final - 0.16770000000000002
INFO: [2024-03-09 23:34:44] GS_final - 0.1718
INFO: [2024-03-09 23:34:44] GS_final - 0.19215
INFO: [2024-03-09 23:34:44] GS_final - 0.1967


In [5]:
corrs

Unnamed: 0,read_type,rbp,frac,rep,time,out_type,pc_KD,ratio,pc_scr,n_genes,median_delta,median_scr,median_KD
0,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.015,0.6,0.009,19683,0.0,0.3596,0.4213
1,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.025,0.6,0.015,19683,0.0,0.2256,0.2813
2,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.035,0.6,0.021,19683,0.0041,0.1702,0.2205
3,exons_bamlist_v2.0.5d_oml,DIS3,nuc,2,60,.MAP,0.015,0.8,0.012,18610,0.0,0.21195,0.1966
4,exons_bamlist_v2.0.5d_oml,DIS3,nuc,2,60,.MAP,0.025,0.8,0.02,18610,0.0,0.13395,0.1269
5,exons_bamlist_v2.0.5d_oml,DIS3,nuc,2,60,.MAP,0.035,0.8,0.028,18610,0.0,0.0999,0.09525
6,exons_bamlist_v2.0.5d_oml,EXOSC10,nuc,1,60,.MAP,0.015,0.6,0.009,19935,0.0,0.3549,0.3916
7,exons_bamlist_v2.0.5d_oml,EXOSC10,nuc,1,60,.MAP,0.025,0.6,0.015,19935,0.0,0.2229,0.2644
8,exons_bamlist_v2.0.5d_oml,EXOSC10,nuc,1,60,.MAP,0.035,0.6,0.021,19935,0.0006,0.1677,0.2089
9,exons_bamlist_v2.0.5d_oml,EXOSC10,nuc,2,60,.MAP,0.015,0.4,0.006,19184,0.0,0.4153,0.4353


In [11]:
corrs

Unnamed: 0,read_type,rbp,frac,rep,time,out_type,pc_KD,ratio,pc_scr,n_genes,median_delta
0,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.015,0.6,0.009,19683,0.0
1,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.025,0.6,0.015,19683,0.0
2,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.035,0.6,0.021,19683,0.0041
3,exons_bamlist_v2.0.5d_oml,DIS3,nuc,2,60,.MAP,0.015,0.8,0.012,18610,0.0
4,exons_bamlist_v2.0.5d_oml,DIS3,nuc,2,60,.MAP,0.025,0.8,0.02,18610,0.0
5,exons_bamlist_v2.0.5d_oml,DIS3,nuc,2,60,.MAP,0.035,0.8,0.028,18610,0.0
6,exons_bamlist_v2.0.5d_oml,EXOSC10,nuc,1,60,.MAP,0.015,0.6,0.009,19935,0.0
7,exons_bamlist_v2.0.5d_oml,EXOSC10,nuc,1,60,.MAP,0.025,0.6,0.015,19935,0.0
8,exons_bamlist_v2.0.5d_oml,EXOSC10,nuc,1,60,.MAP,0.035,0.6,0.021,19935,0.0006
9,exons_bamlist_v2.0.5d_oml,EXOSC10,nuc,2,60,.MAP,0.015,0.4,0.006,19184,0.0


In [47]:
corrs

Unnamed: 0,read_type,rbp,frac,rep,time,out_type,pc_KD,ratio,pc_scr,n_genes,r_pearson,median_delta
0,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.015,0.2,0.003,19683,0.349714,-0.2375
1,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.015,0.4,0.006,19683,0.453194,-0.0329
2,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.015,0.6,0.009,19683,0.474137,0.0
3,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.015,0.8,0.012,19683,0.466447,0.0565
4,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.015,1.0,0.015,19683,0.451303,0.1094
5,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.015,1.2,0.018,19683,0.43532,0.1468
6,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.015,1.4,0.021,19683,0.420379,0.1742
7,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.015,1.6,0.024,19683,0.407079,0.1964
8,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.025,0.2,0.005,19683,0.36043,-0.2538
9,exons_bamlist_v2.0.5d_oml,DIS3,nuc,1,60,.MAP,0.025,0.4,0.01,19683,0.431072,-0.0299


# OLD

In [2]:
!pip freeze

asteval==0.9.23
attrs==19.3.0
backcall==0.1.0
backports.zoneinfo==0.2.1
bleach==3.1.4
certifi==2021.5.30
cffi==1.15.0
charset-normalizer==2.0.1
cycler==0.10.0
decorator==4.4.2
defusedxml==0.6.0
dill==0.3.6
docopt==0.6.2
entrypoints==0.3
future==0.18.2
goatools==1.1.6
gtfparse==1.2.1
idna==3.2
importlib-metadata==1.5.2
ipykernel==5.2.0
ipython==7.13.0
ipython-genutils==0.2.0
ipywidgets==7.5.1
jedi==0.16.0
Jinja2==2.11.1
joblib==1.1.0
jsonschema==3.2.0
jupyter==1.0.0
jupyter-client==6.1.2
jupyter-console==6.1.0
jupyter-core==4.6.3
kiwisolver==1.1.0
latexify-py==0.2.0
llvmlite==0.36.0
lmfit==1.0.2
MarkupSafe==1.1.1
matplotlib==3.2.1
mistune==0.8.4
mpmath==1.2.1
nbconvert==5.6.1
nbformat==5.0.4
networkx==2.4
notebook==6.0.3
numba==0.53.1
numba-scipy==0.3.0
numpy==1.16.5
pandas==1.0.3
pandocfilters==1.4.2
parso==0.6.2
patsy==0.5.2
pexpect==4.8.0
pickleshare==0.7.5
Pillow==7.0.0
prometheus-client==0.7.1
prompt-toolkit==3.0.4
ptyprocess==