In [1]:
import pandas as pd
pd.options.display.max_columns=50
from scipy import stats as st
import numpy as np
from statsmodels.sandbox.stats.multicomp import fdrcorrection0
import matplotlib.pyplot as plt

In [2]:
def get_label(row):
    if ((not(row["CANCER_SYMBOL"]) is np.nan) and row["CANCER_SYMBOL"]!="EGFR") or ((not(row["CANCER_SYMBOL"]) is np.nan) and ("." in row["gene"])):
        gene=row["CANCER_SYMBOL"].split("_")[0] # get only one
    else:
        gene=row["gene"].split("_")[0] # gget only one
    treatment=row["drug"]
    ttype=row["cancertype"]
    return f"{gene} - {treatment} ({ttype})"

In [3]:
comb = pd.read_excel("../data/filtered_peak_overview_ploidy_plus_2.5_qvalue05_final_NOV2022.xlsx") # Output from GISTIC, see scripts for more information
comb["log_qvalue"] = -np.log10(comb["q.value_TT_fdr"].values)
comb["representative_mech"] = comb["Mark"] == "X"
d_maxv_mc=comb[(comb["q.value_TT_fdr"]<0.05)&(comb["log2_Fisher_Est"]>0)].groupby(["gene","cancertype"]).agg({"q.value_TT_fdr":np.nanmin}).to_dict()["q.value_TT_fdr"]
comb["representative_mech"] = comb.apply(lambda row: d_maxv_mc[(row["gene"],row["cancertype"])] == row["q.value_TT_fdr"] if ((row["gene"],row["cancertype"]) in d_maxv_mc and row["q.value_TT_fdr"] < 0.05) else False,axis=1)
comb["annotated"] = comb["Known"] == "Yes"
comb["type_alt"] = comb["SV_type"]
comb["label"] = comb.apply(lambda row: get_label(row),axis=1)
comb["cancer_type_code"] =comb["cancertype"].str.replace("-","_")
comb["not_gene"] = comb.apply(lambda row: (row["gene"].startswith("RP") or row["gene"].startswith("RN") or row["gene"].startswith("RP")) and (str(row["CANCER_SYMBOL"])=="nan"),axis=1 )

### Save the data

In [4]:
comb[["gene","drug","cancer_type_code","drug_cancer_gene","label","type_alt","q.value_TT_fdr","log_qvalue","log2_Fisher_Est","HMF_treat_2","PCAWG_notreat_2","annotated","representative_mech","not_gene"]].rename(
    columns={"q.value_TT_fdr":"qvalue","log2_Fisher_Est":"log_odds_ratio","HMF_treat_2":"mutated_t","PCAWG_notreat_2":"mutated_c"}).to_csv("../data/cnv_results_resistance.tsv.gz",sep="\t",index=False)

### Save only significant

In [15]:
comb[(comb["q.value_TT_fdr"]<0.05)&(comb["log2_Fisher_Est"]>0)&(comb["not_gene"]==False)].to_csv("../data/significant_hits_cnv.tsv",sep="\t",index=False)

In [19]:
comb[(comb["q.value_TT_fdr"]<0.05)&(comb["log2_Fisher_Est"]>0)&(comb["not_gene"]==False)]

Unnamed: 0,drug,cancertype,gene,newpeakID,CANCER_SYMBOL,SV_type,length,HMF_treat_2,HMF_treat_0,PCAWG_notreat_2,PCAWG_notreat_0,rank,q.values,q.value_TT_fdr,Fisher_Est,log2_Fisher_Est,n_fraction,nn_fraction,delft_fraction,HMFsamples,drug_cancer_gene,Mark,Known,log_qvalue,representative_mech,annotated,type_alt,label,cancer_type_code,not_gene
0,Anti_HER2,BRCA,ERBB2,Amp_peak_chr17_1,ERBB2_CLTC,Amp,101246,52,26,29,172,1,5.91e-77,1.519737e-14,11.715947,3.550402,0.666667,0.144279,0.522388,78,Anti_HER2::ERBB2 (BRCA::ERBB2_CLTC),X,Yes,13.818231,True,True,Amp,ERBB2 - Anti_HER2 (BRCA),BRCA,False
1,mTOR_inhibitor,BRCA,ZNF583,Amp_peak_chr19_5,,Amp,72881,10,57,1,200,1,0.00384,0.0003757087,34.565882,5.111277,0.149254,0.004975,0.144279,67,mTOR_inhibitor::ZNF583 (BRCA::),,,3.425149,True,False,Amp,ZNF583 - mTOR_inhibitor (BRCA),BRCA,False
2,mTOR_inhibitor,BRCA,MYEOV,Amp_peak_chr11_3,,Amp,24086,24,43,28,173,1,4.31e-22,0.01594226,3.429443,1.777974,0.358209,0.139303,0.218905,67,mTOR_inhibitor::MYEOV (BRCA::),,,1.79745,True,False,Amp,MYEOV - mTOR_inhibitor (BRCA),BRCA,False
3,Anti_AR__GnRH,BRCA,CLTC,Amp_peak_chr17_3,ERBB2_CLTC,Amp,36384,21,57,19,182,1,1.79e-11,0.02592857,3.509958,1.811454,0.269231,0.094527,0.174703,78,Anti_AR__GnRH::CLTC (BRCA::ERBB2_CLTC),,,1.586221,True,False,Amp,ERBB2 - Anti_AR__GnRH (BRCA),BRCA,False
4,Pyrimidine_antagonist,BRCA,TYMS,Amp_peak_chr18_1,,Amp,680141,25,263,3,198,1,0.0078,0.02677465,6.256276,2.645304,0.086806,0.014925,0.07188,288,Pyrimidine_antagonist::TYMS (BRCA::),X,,1.572276,True,False,Amp,TYMS - Pyrimidine_antagonist (BRCA),BRCA,False
5,Pyrimidine_antagonist,BRCA,ZNF784,Amp_peak_chr19_4,,Amp,121038,17,271,1,200,1,0.0319,0.03283497,12.50763,3.644737,0.059028,0.004975,0.054053,288,Pyrimidine_antagonist::ZNF784 (BRCA::),,,1.483663,True,False,Amp,ZNF784 - Pyrimidine_antagonist (BRCA),BRCA,False
6,Taxane,BRCA,AC005808.3,Amp_peak_chr20_1,,Amp,277441,74,293,20,181,1,6.97e-22,0.03877677,2.282586,1.190669,0.201635,0.099502,0.102132,367,Taxane::AC005808.3 (BRCA::),,,1.411428,True,False,Amp,AC005808.3 - Taxane (BRCA),BRCA,False
8,Anti_AR__GnRH,PRAD,PRNCR1,Amp_peak_chr8_1,,Amp,42711,116,208,1,152,1,6.88e-23,9.896910999999999e-20,84.336698,6.398089,0.358025,0.006536,0.351489,324,Anti_AR__GnRH::PRNCR1 (PRAD::),X,Yes,19.0045,True,True,Amp,PRNCR1 - Anti_AR__GnRH (PRAD),PRAD,False
9,Alkylating,BRCA,TSHZ2,Amp_peak_chr20_3,,Amp,411546,63,322,15,186,1,6.43e-19,0.04569219,2.422816,1.276685,0.163636,0.074627,0.089009,385,Alkylating::TSHZ2 (BRCA::),,,1.340158,True,False,Amp,TSHZ2 - Alkylating (BRCA),BRCA,False
10,Multikinase_inhibitor,KIRC,RP11-145E5.5,Del_peak_chr9_1,CDKN2A,Del,587789,5,17,3,106,1,4.14e-07,0.03422311,10.090384,3.334909,0.227273,0.027523,0.19975,22,Multikinase_inhibitor::RP11-145E5.5 (KIRC::CDK...,,,1.465681,True,False,Del,CDKN2A - Multikinase_inhibitor (KIRC),KIRC,False
