In [1]:
import pandas as pd
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats

import sys
sys.path.append('../src')
from data_imports import *

In [2]:
BIOSAMPLES = import_biosamples()

In [6]:
def import_cbtn_TP53():
    path="data/pbta_TP53_alterations.xlsx"
    df = pd.read_excel(path)
    df = df.replace("not profiled",np.nan)
    df = df.dropna(subset=df.columns[4:], how='all')
    # aggregate to patient level
    gb = df.groupby('Patient ID')['Altered'].sum().reset_index()
    gb['TP53_altered'] = gb.Altered > 0
    gb = gb.set_index("Patient ID")
    return gb['TP53_altered']

def import_cbtn_TP53_pathway():
    path="data/alterations_across_samples.tsv"
    df = pd.read_csv(path,sep='\t')
    df = df.replace("not profiled",np.nan)
    df = df.dropna(subset=df.columns[4:], how='all')
    gb = df.groupby('Patient ID')['Altered'].sum().reset_index()
    gb['TP53_altered'] = gb.Altered > 0
    gb = gb.set_index("Patient ID")
    return gb['TP53_altered']

def import_tp53_pt():
    pt = import_patients()
    pt = pt.merge(import_cbtn_TP53_pathway(),how='inner',left_index=True,right_index=True)
    return pt



In [13]:
pt = import_tp53_pt()
contingency_tbl = pd.crosstab(pt.amplicon_class,pt.TP53_altered)
print(
    'ecDNA vs intrachromosomal:',
    scipy.stats.chi2_contingency(contingency_tbl.loc[['ecDNA','intrachromosomal']])
)
print(
    'ecDNA vs no amplification',
    scipy.stats.chi2_contingency(contingency_tbl.loc[['ecDNA','no amplification']])
)

contingency_tbl

ecDNA vs intrachromosomal: Chi2ContingencyResult(statistic=np.float64(0.08321174266101466), pvalue=np.float64(0.77299123426763), dof=1, expected_freq=array([[53.51923077, 67.48076923],
       [38.48076923, 48.51923077]]))
ecDNA vs no amplification Chi2ContingencyResult(statistic=np.float64(218.0105142660559), pvalue=np.float64(2.4566875970788638e-49), dof=1, expected_freq=array([[ 105.25574874,   15.74425126],
       [1445.74425126,  216.25574874]]))


TP53_altered,False,True
amplicon_class,Unnamed: 1_level_1,Unnamed: 2_level_1
ecDNA,52,69
intrachromosomal,40,47
no amplification,1499,163


In [8]:
pt

Unnamed: 0,sex,age_at_diagnosis,cohort,cancer_type,cancer_subclass,amplicon_class,OS_status,OS_months,TP53_altered
PT_00G007DM,Male,464.0,PBTA-X00,ETMR,C19MC,ecDNA,Alive,150.570842,False
PT_01MZ62KG,Male,546.0,PBTA-X00,ETMR,C19MC,ecDNA,Deceased,9.954825,False
PT_01SH4F1X,Male,3838.0,PBTA-X01,GNT,WT,no amplification,Alive,45.305955,False
PT_02J5CWN5,Male,3722.0,PBTA-X00,LGG,MAPK,no amplification,Deceased,96.821355,False
PT_02MVZZSW,,4666.0,PBTA-X00,MNG,,no amplification,Alive,45.371663,False
PT_02QZ7Z0H,Male,632.0,PBTA-X01,GCT,TT,no amplification,Alive,62.718686,False
PT_0324HWD5,Female,3121.0,PBTA-X01,HGG,HGG_H3WT,no amplification,Alive,,False
PT_0449KSQ3,Female,3900.0,PBTA-X01,LGG,NOS,no amplification,Alive,21.650924,False
PT_047YGDRW,Male,1258.0,PBTA-X01,LGG,KIAA1549-BRAF,no amplification,Alive,,False
PT_04V47WFC,Male,6710.0,PBTA-X01,EPN,NOS,no amplification,Deceased,49.379877,False
