## Analysis of cross-regulation predictions of SRSF7
### Different parameters

In [55]:
# packages
import pandas as pd
import seaborn as sns

In [56]:
# data
srsf7_targets_500_11 = pd.read_table('./data/outputs/cross_peaks_nmd_SRSF7_500_11.tsv', header=None)
srsf7_targets_500_11p3 = pd.read_table('./data/outputs/cross_peaks_nmd_SRSF7_500_11p3.tsv', header=None)
NMD_KD = pd.read_table('./data/input_data/upf1xrn1_deltaPSI.tsv')
NMD_KD.head()
for_eCLIPs = pd.read_table('./data/outputs/exon_peaks.bed', header=None)
# gcn_data
srsf7_gcn = pd.read_table('./srsf7.tsv')

In [57]:
# nmd_kd parsing
nmd_kd = NMD_KD[['id', 'gene', 'deltaPSI.UPF1','p.UPF1','q.UPF1','deltaPSI.SMG6', 'p.SMG6', 'q.SMG6']]
nmd_kd = nmd_kd.rename(columns={'id':'exon'})
#nmd_kd[nmd_kd.gene == 'SRSF7']

In [58]:
# eCLIPs parsing
eCLIPs = for_eCLIPs.iloc[:, [3,6,10,12,13,14,15,16]]
eCLIPs.columns = ['exon', 'target', 'factor', 'strand', 'eCLIPs cell line', 'rep', 'eCLIP_p-value', 'eCLIP_logFC']
eCLIPs.head()

Unnamed: 0,exon,target,factor,strand,eCLIPs cell line,rep,eCLIP_p-value,eCLIP_logFC
0,chr1_137621_139379_-,AL627309.1,ILF3,-,K562,rep02,2.421904,5.174009
1,chr1_137621_139379_-,AL627309.1,HNRNPK,-,K562,rep02,3.064845,5.305207
2,chr1_137621_139379_-,AL627309.1,BUD13,-,K562,rep02,2.653889,3.067285
3,chr1_137621_139379_-,AL627309.1,HNRNPK,-,K562,rep01,1.620957,1.052859
4,chr1_137621_139379_-,AL627309.1,PUM1,-,K562,rep02,3.858528,5.736128


In [59]:
srsf7_targets_500_11.columns = ['factor', 'cell line', 'exon', 'target', 'deltaPSI', 'deltaPSIc', 'z', 'p', 'q', 'KDFC']
srsf7_targets_500_11p3.columns = ['factor', 'cell line', 'exon', 'target', 'deltaPSI', 'deltaPSIc', 'z', 'p', 'q', 'KDFC']
srsf7_targets_500_11.head()

Unnamed: 0,factor,cell line,exon,target,deltaPSI,deltaPSIc,z,p,q,KDFC
0,SRSF7,HepG2,chr17_45008465_45008573_+,GOSR2,-0.31,-0.31,-3.99,4.48,1.83,0.54
1,SRSF7,HepG2,chr14_45579097_45579196_+,PRPF39,-0.29,-0.29,-3.73,4.02,1.58,0.54
2,SRSF7,HepG2,chr9_125048318_125048445_+,MRRF,0.26,0.26,3.5,3.63,1.3,0.54
3,SRSF7,HepG2,chr12_51447561_51447643_+,LETMD1,0.18,0.18,3.72,4.0,1.58,0.54
4,SRSF7,HepG2,chr1_24304401_24304763_-,SRSF10,0.21,0.21,4.33,5.13,2.29,0.54


In [60]:
# merging two datasets for q-values and eCLIP thresholds = 1
two_kds_500_11 = pd.merge(nmd_kd, srsf7_targets_500_11, on='exon')
two_kds_500_11short = two_kds_500_11[['exon', 'target', 'factor', 'deltaPSI.UPF1', 'p.UPF1', 'q.UPF1', \
                'deltaPSI.SMG6', 'p.SMG6','q.SMG6', 'cell line', 'deltaPSI', 'p', 'q']]

In [61]:
# merging two datasets for q-values for KDs and eCLIP thresholds = 1.3 and 1
two_kds_500_11p3 = pd.merge(nmd_kd, srsf7_targets_500_11p3, on='exon')
two_kds_500_11p3short = two_kds_500_11p3[['exon', 'target', 'factor', 'deltaPSI.UPF1', 'p.UPF1', 'q.UPF1', \
                'deltaPSI.SMG6', 'p.SMG6','q.SMG6', 'cell line', 'deltaPSI', 'p', 'q']]

In [62]:
# merging two_kds with eCLIPs
kds_eCLIPS_500_11 = pd.merge(two_kds_500_11short, eCLIPs, on=['exon', 'target', 'factor'])
kds_eCLIPS_500_11p3 = pd.merge(two_kds_500_11p3short, eCLIPs, on=['exon', 'target', 'factor'])

In [69]:
# semi-final
cross_peaks_500_11 = kds_eCLIPS_500_11[['exon', 'target', 'factor', 'deltaPSI.UPF1', 'p.UPF1', 'q.UPF1', \
                'deltaPSI.SMG6', 'p.SMG6','q.SMG6', 'deltaPSI', 'p', 'q', 'eCLIP_p-value', 'eCLIP_logFC']]
cross_peaks_500_11p3 = kds_eCLIPS_500_11p3[['exon', 'target', 'factor', 'deltaPSI.UPF1', 'p.UPF1', 'q.UPF1', \
                'deltaPSI.SMG6', 'p.SMG6','q.SMG6', 'deltaPSI', 'p', 'q', 'eCLIP_p-value', 'eCLIP_logFC']]

In [80]:
# merging with gcn data
cross_peaks_500_11_gcn = pd.merge(cross_peaks_500_11, srsf7_gcn, how='left', on=['factor','target'])
cross_peaks_500_11p3_gcn = pd.merge(cross_peaks_500_11p3, srsf7_gcn, how='left', on=['factor','target'])



In [85]:
cross_peaks_500_11_gcn

Unnamed: 0,exon,target,factor,deltaPSI.UPF1,p.UPF1,q.UPF1,deltaPSI.SMG6,p.SMG6,q.SMG6,deltaPSI,p,q,eCLIP_p-value,eCLIP_logFC,value
0,chr1_24304401_24304763_-,SRSF10,SRSF7,0.59,12.39,8.77,0.48,12.52,8.56,0.21,5.13,2.29,1.672946,1.252367,-0.003808
1,chr1_24304401_24304763_-,SRSF10,SRSF7,0.59,12.39,8.77,0.48,12.52,8.56,0.21,5.13,2.29,2.004966,1.802407,-0.003808
2,chr1_24304401_24304763_-,SRSF10,SRSF7,0.59,12.39,8.77,0.48,12.52,8.56,0.21,5.13,2.29,1.935981,1.016932,-0.003808
3,chr1_24304401_24304763_-,SRSF10,SRSF7,0.59,12.39,8.77,0.48,12.52,8.56,0.21,5.13,2.29,2.420004,1.108442,-0.003808
4,chr1_70697542_70697658_+,SRSF11,SRSF7,0.48,9.33,6.06,0.38,8.58,5.32,0.19,4.35,1.74,2.567621,4.819845,-0.009645
5,chr1_70697542_70697658_+,SRSF11,SRSF7,0.48,9.33,6.06,0.38,8.58,5.32,0.19,4.35,1.74,1.672946,1.468608,-0.009645
6,chr1_70697542_70697658_+,SRSF11,SRSF7,0.48,9.33,6.06,0.38,8.58,5.32,0.19,4.35,1.74,2.567621,4.819845,-0.009645
7,chr1_70697542_70697658_+,SRSF11,SRSF7,0.48,9.33,6.06,0.38,8.58,5.32,0.19,4.35,1.74,1.672946,1.468608,-0.009645
8,chr1_70697542_70697658_+,SRSF11,SRSF7,0.48,9.33,6.06,0.38,8.58,5.32,0.19,4.35,1.74,2.567621,4.819845,-0.009645
9,chr1_70697542_70697658_+,SRSF11,SRSF7,0.48,9.33,6.06,0.38,8.58,5.32,0.19,4.35,1.74,1.672946,1.468608,-0.009645


In [84]:
# saving output as tsv
cross_peaks_500_11_gcn.to_csv('data/outputs/final_table_500_11.tsv', sep = '\t')
cross_peaks_500_11p3_gcn.to_csv('data/outputs/final_table_500_11p3.tsv', sep = '\t')
