# Imports

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy.stats as scp
from scipy.cluster.hierarchy import linkage, dendrogram
import statsmodels.api as sm
import statsmodels.stats.multitest as multi

In [None]:
from lifelines import KaplanMeierFitter
from lifelines.statistics import logrank_test, multivariate_logrank_test
from lifelines.plotting import add_at_risk_counts

In [None]:
from matplotlib.pyplot import FormatStrFormatter

In [None]:
import matplotlib.patches as mpatches

In [None]:
# Toggle as needed
import warnings
warnings.filterwarnings("ignore")

# Functions

In [None]:
def response_rate_logit(x,y):
    log_reg = sm.Logit(y,sm.add_constant(x)).fit(method='lbfgs',maxiter=99999,disp=0)
    return log_reg.pvalues,log_reg.params

In [None]:
def survival_curve_pair(df,var_label,durations_label,event_observed_label):
    
    fig=plt.figure(figsize=(6,4))
    ax = fig.add_subplot(111)
    
    med_pfs = []
    df_trim = df.dropna(subset=[var_label,durations_label,event_observed_label])
    df_onehot = pd.get_dummies(df_trim[var_label])
    
    kmf_exp = KaplanMeierFitter()
    df_surv = df_trim[df_trim[var_label]==df_onehot.columns[0]][[durations_label,event_observed_label]]
    ax = kmf_exp.fit(df_surv[durations_label],df_surv[event_observed_label],label=df_onehot.columns[0]).\
            plot_survival_function(ax=ax,show_censors=False,ci_show=False)
    med_pfs.append([var_label,df_onehot.columns[0],kmf_exp.median_survival_time_])
    
    kmf_ctl = KaplanMeierFitter()
    df_surv = df_trim[df_trim[var_label]==df_onehot.columns[1]][[durations_label,event_observed_label]]
    ax = kmf_ctl.fit(df_surv[durations_label],df_surv[event_observed_label],label=df_onehot.columns[1]).\
            plot_survival_function(ax=ax,show_censors=False,ci_show=False)
    
    med_pfs.append([var_label,df_onehot.columns[1],kmf_ctl.median_survival_time_])
    add_at_risk_counts(kmf_exp, kmf_ctl, ax=ax)
    
    return med_pfs, ax

# Read in Data

## Set Paths

In [None]:
# Set this to your source_data directory
source_data_path = ".../Source Data/"

In [None]:
source_data_path_clinical = source_data_path + 'Clinical/'
source_data_path_exome = source_data_path + 'Exome/'
source_data_path_rna = source_data_path + 'RNA/'
source_data_path_ref = source_data_path + 'Reference/'
source_data_path_int = source_data_path + 'Integrative/'
source_data_path_out = source_data_path + 'Output/'

## Read in Reference Data

In [None]:
panlung_drivers = pd.read_csv(source_data_path_ref + 'panlung_drivers.txt',sep='\t',header=None).iloc[:,0].to_list()
panlung_amps = pd.read_csv(source_data_path_ref + 'panlung_amps.txt',sep='\t',header=None).iloc[:,0].to_list()
panlung_dels = pd.read_csv(source_data_path_ref + 'panlung_dels.txt',sep='\t',header=None).iloc[:,0].to_list()

## Read in Clinical Data

In [None]:
annot_file = 'Table_S1_Clinical_Annotations.xlsx'
su2c_clinical = pd.read_excel(source_data_path_clinical + annot_file,skiprows=2)

In [None]:
su2c_clinical_extra = pd.read_csv\
    (source_data_path_clinical+'SU2C-MARK_Harmonized_Clinical_Annotations_Supplement_v1.txt',sep='\t')

## Read in Exome Data

In [None]:
su2c_val_harm_dam_pv_bin = pd.read_csv(source_data_path_exome + 'SU2C-MARK_Harmonized_Validated_Damaging_Pivot_Bin_v1.txt',sep='\t')
su2c_val_harm_dam_pv_drivers = su2c_val_harm_dam_pv_bin.loc[:,su2c_val_harm_dam_pv_bin.columns.isin(panlung_drivers)]
su2c_val_harm_dam_pv_drivers['Tumor_Sample_Barcode'] = su2c_val_harm_dam_pv_bin['Tumor_Sample_Barcode']

In [None]:
su2c_tmb_sig_tmb_harm = pd.read_csv(source_data_path_exome + 'SU2C-MARK_Harmonized_Exome_TMB_Signatures_v1.txt',sep = '\t')
su2c_tmb_sig_tmb_harm_log = su2c_tmb_sig_tmb_harm.set_index('Harmonized_SU2C_WES_Tumor_Sample_ID_v2').\
    applymap(lambda x: np.log10(x+1))
su2c_tmb_sig_tmb_harm_log.rename(columns=lambda x: 'log_'+x,inplace=True)

In [None]:
su2c_cnv_gene = pd.read_csv(source_data_path_exome + 'SU2C-MARK_Harmonized_Gistic_Gene_v1.txt',sep='\t')
su2c_cnv_gene_sig = su2c_cnv_gene[su2c_cnv_gene['Gene Symbol'].isin(panlung_amps + panlung_dels)]
su2c_cnv_gene_sig_t = su2c_cnv_gene_sig.iloc[:,3:].T
su2c_cnv_gene_sig_t.columns = su2c_cnv_gene_sig['Gene Symbol']

In [None]:
su2c_cnv_peak = pd.read_csv(source_data_path_exome + 'SU2C-MARK_Harmonized_Gistic_Focal_v1.txt',sep='\t')
su2c_cnv_peak_actual = su2c_cnv_peak[su2c_cnv_peak['Amplitude Threshold']=='Actual Copy Change Given']
su2c_cnv_peak_actual['Descriptor'] = su2c_cnv_peak_actual.apply(lambda x: 'Amp_' + x['Descriptor'] if 'Amp' in x['Unique Name'] \
                           else 'Del_' + x['Descriptor'],axis=1)
su2c_cnv_peak_actual_t = su2c_cnv_peak_actual.set_index('Descriptor').iloc[:,8:].T

In [None]:
su2c_cnv_arm = pd.read_csv(source_data_path_exome + 'SU2C-MARK_Harmonized_Gistic_Arm_v1.txt',sep='\t')
su2c_cnv_arm_t = su2c_cnv_arm.set_index('Chromosome Arm').T

In [None]:
su2c_cnv_totals = pd.read_csv(source_data_path_exome + 'SU2C-MARK_Harmonized_Total_Amps_Dels_v1.txt',sep='\t')

In [None]:
su2c_hla_summary = pd.read_csv(source_data_path_exome + 'SU2C-MARK_Harmonized_Antigen_Presentation_v1.txt',sep='\t')

In [None]:
su2c_wes_mixcr_harm = pd.read_csv(source_data_path_exome + 'SU2C-MARK_Harmonized_Mixcr_v1.txt',sep='\t')

In [None]:
su2c_wes_abs_harm = pd.read_csv(source_data_path_exome + 'SU2C-MARK_Harmonized_Absolute_Purity_Ploidy_v1.txt',sep='\t')

## Read in RNA Data

In [None]:
su2c_bnmf_harm = pd.read_csv(source_data_path_rna + 'SU2C-MARK_Harmonized_Tumor_Clusters_v1.txt',sep='\t')

In [None]:
su2c_ssbnmf_harm = pd.read_csv(source_data_path_rna + 'SU2C-MARK_Harmonized_Integrative_Clusters_v1.txt',sep='\t')

In [None]:
su2c_is_hm_harm = pd.read_csv(source_data_path_rna + 'SU2C-MARK_Harmonized_Curated_Sets_HM_v1.txt',sep='\t')

In [None]:
su2c_is_dh_harm = pd.read_csv(source_data_path_rna + 'SU2C-MARK_Harmonized_Curated_Sets_DH_v1.txt',sep='\t')

In [None]:
su2c_is_sf_harm = pd.read_csv(source_data_path_rna + 'SU2C-MARK_Harmonized_Curated_Sets_SF_v1.txt',sep='\t')

In [None]:
su2c_is_zi_harm = pd.read_csv(source_data_path_rna + 'SU2C-MARK_Harmonized_Curated_Sets_ZI_v1.txt',sep='\t')

In [None]:
su2c_is_zi_ext_harm = pd.read_csv(source_data_path_rna + 'SU2C-MARK_Harmonized_Curated_Sets_ZI_Extended_v1.txt',sep='\t')

In [None]:
su2c_rna_harm = pd.read_csv(source_data_path_rna + 'SU2C-MARK_Harmonized_rnaseqc_tpm_v1.gct',skiprows=2,sep='\t')

In [None]:
su2c_limma_harm = pd.read_csv(source_data_path_rna + 'SU2C-MARK_Harmonized_Limma_All_v1.txt',sep='\t')

In [None]:
su2c_tme_gsea_harm = pd.read_csv(source_data_path_rna + 'SU2C-MARK_TME_Cluster_GSEA.tsv',sep='\t')

## Merge Data

In [None]:
su2c_merge_master = su2c_clinical.merge(su2c_clinical_extra,\
        left_on='Harmonized_SU2C_Participant_ID_v2',right_on='Harmonized_SU2C_Participant_ID_v2',how='inner')\
    .merge(su2c_val_harm_dam_pv_drivers.rename(columns = lambda x: x+'_MUT'),\
        left_on='Harmonized_SU2C_WES_Tumor_Sample_ID_v2',right_on='Tumor_Sample_Barcode_MUT',how='left').drop('Tumor_Sample_Barcode_MUT',axis=1)\
    .merge(su2c_cnv_gene_sig_t.rename(columns=lambda x: x+'_CNV'),\
        left_on='Harmonized_SU2C_WES_Tumor_Sample_ID_v2',right_index=True,how='left')\
    .merge(su2c_cnv_peak_actual_t,\
        left_on='Harmonized_SU2C_WES_Tumor_Sample_ID_v2',right_index=True,how='left')\
    .merge(su2c_cnv_arm_t,\
        left_on='Harmonized_SU2C_WES_Tumor_Sample_ID_v2',right_index=True,how='left')\
    .merge(su2c_cnv_totals,\
        left_on='Harmonized_SU2C_WES_Tumor_Sample_ID_v2',right_on='Harmonized_SU2C_WES_Tumor_Sample_ID_v2',how='left')\
    .merge(su2c_tmb_sig_tmb_harm,\
        left_on='Harmonized_SU2C_WES_Tumor_Sample_ID_v2',right_on='Harmonized_SU2C_WES_Tumor_Sample_ID_v2',how='left')\
    .merge(su2c_tmb_sig_tmb_harm_log,\
        left_on='Harmonized_SU2C_WES_Tumor_Sample_ID_v2',right_index=True,how='left')\
    .merge(su2c_hla_summary,\
        left_on='Harmonized_SU2C_WES_Tumor_Sample_ID_v2',right_on='Harmonized_SU2C_WES_Tumor_Sample_ID_v2',how='left')\
    .merge(su2c_wes_mixcr_harm,\
        left_on='Harmonized_SU2C_WES_Tumor_Sample_ID_v2',right_on='Harmonized_SU2C_WES_Tumor_Sample_ID_v2',how='left')\
    .merge(su2c_wes_abs_harm,\
          left_on='Harmonized_SU2C_WES_Tumor_Sample_ID_v2',right_on='Harmonized_SU2C_WES_Tumor_Sample_ID_v2',how='left')\
    .merge(su2c_bnmf_harm,\
          left_on='Harmonized_SU2C_RNA_Tumor_Sample_ID_v2',right_on='Harmonized_SU2C_RNA_Tumor_Sample_ID_v2',how='left')\
    .merge(su2c_ssbnmf_harm,\
          left_on='Harmonized_SU2C_RNA_Tumor_Sample_ID_v2',right_on='Harmonized_SU2C_RNA_Tumor_Sample_ID_v2',how='left')\
    .merge(su2c_is_hm_harm.rename(columns = lambda x: x + '_HM'),\
          left_on='Harmonized_SU2C_RNA_Tumor_Sample_ID_v2',right_on='Harmonized_SU2C_RNA_Tumor_Sample_ID_v2_HM',how='left')\
          .drop('Harmonized_SU2C_RNA_Tumor_Sample_ID_v2_HM',axis=1)\
    .merge(su2c_is_dh_harm.rename(columns = lambda x: x + '_DH'),\
          left_on='Harmonized_SU2C_RNA_Tumor_Sample_ID_v2',right_on='Harmonized_SU2C_RNA_Tumor_Sample_ID_v2_DH',how='left')\
          .drop('Harmonized_SU2C_RNA_Tumor_Sample_ID_v2_DH',axis=1)\
    .merge(su2c_is_sf_harm.rename(columns = lambda x: x + '_SF'),\
          left_on='Harmonized_SU2C_RNA_Tumor_Sample_ID_v2',right_on='Harmonized_SU2C_RNA_Tumor_Sample_ID_v2_SF',how='left')\
          .drop('Harmonized_SU2C_RNA_Tumor_Sample_ID_v2_SF',axis=1)\
    .merge(su2c_is_zi_harm.rename(columns = lambda x: x + '_ZI'),\
          left_on='Harmonized_SU2C_RNA_Tumor_Sample_ID_v2',right_on='Harmonized_SU2C_RNA_Tumor_Sample_ID_v2_ZI',how='left')\
          .drop('Harmonized_SU2C_RNA_Tumor_Sample_ID_v2_ZI',axis=1)\
    .merge(su2c_is_zi_ext_harm.rename(columns = lambda x: x + '_ZI'),\
          left_on='Harmonized_SU2C_RNA_Tumor_Sample_ID_v2',right_on='Harmonized_SU2C_RNA_Tumor_Sample_ID_v2_ZI',how='left')\
          .drop('Harmonized_SU2C_RNA_Tumor_Sample_ID_v2_ZI',axis=1)

In [None]:
su2c_merge_master['log_TMB'] = su2c_merge_master.dropna(subset=['TMB'])['TMB'].apply(lambda x: np.log10((x+1)/33))

# Significance Testing

## Feature List

In [None]:
select_cols = \
['PDL1_TPS'] +\
['APC_MUT',
 'ARID1A_MUT',
 'ATM_MUT',
 'BRAF_MUT',
 'CDKN2A_MUT',
 'COL5A2_MUT',
 'EGFR_MUT',
 'FAT1_MUT',
 'KEAP1_MUT',
 'KMT2C_MUT',
 'KMT2D_MUT',
 'KRAS_MUT',
 'MGA_MUT',
 'NF1_MUT',
 'NFE2L2_MUT',
 'NOTCH1_MUT',
 'PIK3CA_MUT',
 'PLXNB2_MUT',
 'RBM10_MUT',
 'SETD2_MUT',
 'SMARCA4_MUT',
 'STK11_MUT',
 'TP53_MUT'] +\
['Amp_1q21.3  ',
 'Amp_3q26.2  ',
 'Amp_3q27.1  ',
 'Amp_5p15.33 ',
 'Amp_7p15.3  ',
 'Amp_7p11.2  ',
 'Amp_7q21.12 ',
 'Amp_8p11.23 ',
 'Amp_8q24.21 ',
 'Amp_11q13.3 ',
 'Amp_12p12.1 ',
 'Amp_14q13.3 ',
 'Amp_19q12   ',
 'Amp_20q13.33'] +\
['MCL1_CNV',
 'MECOM_CNV',
 'TERC_CNV',
 'SOX2_CNV',
 'TERT_CNV',
 'EGFR_CNV',
 'FGFR1_CNV',
 'KAT6A_CNV',
 'MYC_CNV',
 'CDKN2A_CNV',
 'CCND1_CNV',
 'KRAS_CNV',
 'CCNE1_CNV',
 'ZNF217_CNV'] +\
[ 'log_TMB',
 'log_TMB_clonal',
 'log_TMB_subclonal',
 'log_TMB_indel',
 'log_Neoantigens',
 'log_Neoantigens_clonal',
 'log_Neoantigens_subclonal',
 'Subclone_count',
 'log_Aging_Signature',
 'log_Smoking_Signature',
 'log_APOBEC_Signature',
 'log_DNA_BCR_burden',
 'log_DNA_TCR_burden',
 'Total_dels',
 'Total_amps',
 'HLA_LOH_present',
 'HLA_hom_present',
 'B2M_altered',
 'T1_norm',
 'T2_norm',
 'T3_norm',
 'T4_norm',
 'I1_norm',
 'I2_norm',
 'I3_norm',
 'B-cells_SF',
 'Cytotoxic cells_SF',
 'DC_SF',
 'Exhausted CD8_SF',
 'Exhausted/HS CD8_SF',
 'Lymphocytes_SF',
 'Lymphocytes exhausted/cell cycle_SF',
 'Macrophages/Monocytes_SF',
 'Memory T cells_SF',
 'Plasma_SF',
 'Treg_SF',
 'hMø1_ZI',
 'hMø4_ZI',
 'hMø5_ZI',
 'hMø6_ZI',
 'hMø7_ZI',
 'hMø8_ZI',
 'hMø9_ZI',
 'hMono1_ZI',
 'hMono2_ZI',
 'hMono3_ZI']+\
['hN1_ZI', 
'hN2_ZI',
'hN3_ZI',
'hN5_ZI',
'hDC1_ZI',
'hDC2_ZI',
'hDC3_ZI',
'hpDC_ZI']

## Logistic Regression Testing

In [None]:
log_reg = []
df = pd.DataFrame()
df_test = su2c_merge_master
for feature in select_cols:
    X = df_test.set_index('Harmonized_SU2C_Participant_ID_v2').loc[:,[feature]]
    if X.iloc[:,0].dtype.name!='float64':
        Xd = pd.get_dummies(X,dummy_na=True).iloc[:,0:-1]
        df = pd.concat([df,Xd],axis=1)
        for col in Xd.columns:
            y = df_test.set_index('Harmonized_SU2C_Participant_ID_v2')['Harmonized_Confirmed_BOR_Bin']
            Xi = Xd[[col]]
            Xy = Xi.merge(y,left_index=True,right_index=True,how='inner').dropna()
            Xi = Xy.iloc[:,[0]]
            y = Xy.iloc[:,[1]]
            p, param = response_rate_logit(Xi,y)
            log_reg.append([Xi.columns[0],p[1],param[1]])
    else:
        df = pd.concat([df,X],axis=1)
        y = df_test.set_index('Harmonized_SU2C_Participant_ID_v2')['Harmonized_Confirmed_BOR_Bin']
        Xy = X.merge(y,left_index=True,right_index=True,how='inner').dropna()
        X = Xy.iloc[:,[0]]
        y = Xy.iloc[:,[1]]
        p, param = response_rate_logit(X,y)
        log_reg.append([feature,p[1],param[1]])
log_reg_df = pd.DataFrame(log_reg,columns=['Feature','p_value','coeff']).set_index('Feature')

In [None]:
log_reg_df['q_value'] = multi.multipletests(log_reg_df['p_value'],method='fdr_bh')[1]

# Cohort and Mutations

## Mutations and TMB

In [None]:
p = sns.swarmplot(data = su2c_merge_master.dropna(subset=['log_TMB']),x = 'Harmonized_Confirmed_BOR_3_Cat',y='log_TMB',order=['CR/PR','SD','PD'])
p.set_xticklabels(['PR/CR\n(N = 121)','SD\n(N = 84)','PD\n(N = 104)'],font='Arial',weight='bold',fontsize=14)
p.set_yticklabels([str(i) for i in p.get_yticks()],font='Arial',weight='bold',fontsize=14)
p.set_ylabel('$\mathregular{log_{10}(TMB)}$',font='Arial',weight='bold',fontsize=16,labelpad=10)
p.set_xlabel('')
p.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))

## Driver Survival Curves

In [None]:
su2c_val_harm_dam_pv = pd.read_csv(source_data_path_exome + 'SU2C-MARK_Harmonized_Validated_Damaging_Pivot_v1.txt',sep='\t')

In [None]:
su2c_clinical_dam_merge = su2c_clinical.merge(su2c_clinical_extra,\
        left_on='Harmonized_SU2C_Participant_ID_v2',right_on='Harmonized_SU2C_Participant_ID_v2',how='inner')\
        .merge(su2c_val_harm_dam_pv,left_on='Harmonized_SU2C_WES_Tumor_Sample_ID_v2',
                    right_on='Tumor_Sample_Barcode',how='inner')

In [None]:
# Plot PFS by Gene

feature_col = 'EGFR'
duration_col = 'Harmonized_PFS_Months'
event_col = 'Harmonized_PFS_Event'

df = su2c_clinical_dam_merge.copy()
df_filt = df[[feature_col,duration_col,event_col]].dropna()
medpfs, ax = survival_curve_pair(df_filt,feature_col,duration_col,event_col)

ax.set_ylabel("PFS Probability",font='Arial',weight='bold',fontsize=16,labelpad = 14)
ax.set_xlabel("Months",font='Arial',weight='bold',fontsize=16,labelpad=14)
ax.set_xticklabels(ax.get_xticks(),font='Arial',weight='bold',fontsize=14)
ax.set_yticklabels(np.round(ax.get_yticks(),1),font='Arial',weight='bold',fontsize=14)
ax.xaxis.set_major_formatter(FormatStrFormatter('%.0f'))

L = ax.legend(frameon=False,prop={'size':14,'weight':'bold'})
L.get_texts()[0].set_text(feature_col + " unmutated")
L.get_texts()[1].set_text(feature_col + " mutated")

In [None]:
# Plot PFS by Gene

feature_col = 'KRAS/STK11'
duration_col = 'Harmonized_PFS_Months'
event_col = 'Harmonized_PFS_Event'

df = su2c_clinical_dam_merge.copy()
df = df[df['KRAS']==1]
df['KRAS/STK11'] = df['KRAS'] * df['STK11']
df_filt = df[[feature_col,duration_col,event_col]].dropna()
medpfs, ax = survival_curve_pair(df_filt,feature_col,duration_col,event_col)

ax.set_ylabel("PFS Probability",font='Arial',weight='bold',fontsize=16,labelpad = 14)
ax.set_xlabel("Months",font='Arial',weight='bold',fontsize=16,labelpad=14)
ax.set_xticklabels(np.round(ax.get_xticks()),font='Arial',weight='bold',fontsize=14)
ax.set_yticklabels(np.round(ax.get_yticks(),1),font='Arial',weight='bold',fontsize=14)
ax.xaxis.set_major_formatter(FormatStrFormatter('%.0f'))

L = ax.legend(frameon=False,prop={'size':14,'weight':'bold'})
L.get_texts()[0].set_text("KRAS mutant; STK11 unmutated")
L.get_texts()[1].set_text("KRAS mutant; STK11 mutated")

## Logistic Regression Across Exome Drivers

In [None]:
log_reg_df_mut = log_reg_df.loc[log_reg_df.index.to_series().apply(lambda x: '_MUT' in x)]

In [None]:
df = log_reg_df_mut
x = df['coeff']
y = df['q_value']

fig,ax = plt.subplots(figsize=(6,6))

ax.axhline(linewidth=1,y=-np.log10(0.1),linestyle='-',c='r')
ax.axhline(linewidth=1,y=-np.log10(0.25),linestyle='dashed',c='purple')
ax.axhline(linewidth=1,y=0,linestyle='dotted',c='k')

sns.scatterplot(x=x,y=-np.log10(y),ax=ax,s=100)
plt.xlim(-2,2)
plt.ylim(0,1.6)

for i in range(0,len(df.index)):
    
    xi = x[i]
    yi = -np.log10(y[i])
    gene = df.index[i].split('_')[0]
    
    if yi>(-np.log10(0.25)):
        if gene == 'ATM':
            plt.annotate(gene,xy=(x[i],-np.log10(y[i])),\
                         xycoords='data',xytext=(0,20),textcoords='offset points', va='center',ha='left',\
                        arrowprops=dict(arrowstyle='-',lw=2,color='k'),size=12,style='italic')
        elif gene == 'ARID1A':
            plt.annotate(gene,xy=(x[i],-np.log10(y[i])),\
                         xycoords='data',xytext=(15,15),textcoords='offset points', va='center',ha='left',\
                        arrowprops=dict(arrowstyle='-',lw=2,color='k'),size=12,style='italic')
        elif gene == 'KEAP1':
            plt.annotate(gene,xy=(x[i],-np.log10(y[i])),\
                         xycoords='data',xytext=(-5,15),textcoords='offset points', va='center',ha='right',\
                        arrowprops=dict(arrowstyle='-',color='k',lw=2),size=12,style='italic')
        elif gene == 'SMARCA4':
            plt.annotate(gene,xy=(x[i],-np.log10(y[i])),\
                         xycoords='data',xytext=(15,10),textcoords='offset points', va='center',ha='left',\
                        arrowprops=dict(arrowstyle='-',color='k',lw=2),size=12,style='italic')
        elif gene == 'TP53':
            plt.annotate(gene,xy=(x[i],-np.log10(y[i])),\
                         xycoords='data',xytext=(0,25),textcoords='offset points', va='center',ha='left',\
                        arrowprops=dict(arrowstyle='-',color='k',lw=2),size=12,style='italic')
        elif gene == 'RBM10':
            plt.annotate(gene,xy=(x[i],-np.log10(y[i])),\
                         xycoords='data',xytext=(-30,25),textcoords='offset points', va='center',ha='left',\
                        arrowprops=dict(arrowstyle='-',color='k',lw=2),size=12,style='italic')
        elif gene == 'EGFR':
            plt.annotate(gene,xy=(x[i],-np.log10(y[i])),\
                         xycoords='data',xytext=(10,25),textcoords='offset points', va='center',ha='right',\
                        arrowprops=dict(arrowstyle='-',color='k',lw=2),size=12,style='italic')


_ = ax.set_xlabel("ln(Odds Ratio)",font='Arial',weight='bold',fontsize=16,labelpad = 14)
_ = ax.set_ylabel("$\mathregular{-log_{10}(q-value)}$",font='Arial',weight='bold',fontsize=16,labelpad=14)
_ = ax.set_xticklabels(np.round(ax.get_xticks()),font='Arial',weight='bold',fontsize=14)
_ = ax.set_yticklabels(np.round(ax.get_yticks(),1),font='Arial',weight='bold',fontsize=14)            

ax.xaxis.set_major_formatter(FormatStrFormatter('%.1f'))

## Copy Number and Response

In [None]:
common_peaks_index = np.where(((su2c_cnv_peak.iloc[0:68,9:]>1).sum(axis=1)/309)>0.05)[0]

In [None]:
common_peaks_freq = su2c_cnv_peak_actual_t.columns[common_peaks_index].to_list()

In [None]:
df = log_reg_df.loc[common_peaks_freq]
x = df['coeff']
y = df['q_value']

fig,ax = plt.subplots(figsize=(6,6))
sns.scatterplot(x=x,y=-np.log10(y),ax=ax,s=100)
plt.xlim(-0.6,0.6)
plt.ylim(0,1.6)

ax.axhline(linewidth=1,y=-np.log10(0.1),linestyle='-',c='r')
ax.axhline(linewidth=1,y=-np.log10(0.25),linestyle='dashed',c='purple')
ax.axhline(linewidth=1,y=0,linestyle='dotted',c='k')

for i in range(0,len(df.index)):
    
    xi = x[i]
    yi = -np.log10(y[i])
    gene = df.index[i]
    
    if y[i]<1:
        if gene == 'Amp_5p15.33 ':
            plt.annotate(df.index[i],xy=(x[i],-np.log10(y[i])),\
                         xycoords='data',xytext=(15,15),textcoords='offset points', va='center',ha='left',\
                        arrowprops=dict(arrowstyle='-',lw=2,color='k'),size=12)
            
_ = ax.set_xlabel("ln(Odds Ratio)",font='Arial',weight='bold',fontsize=16,labelpad = 14)
_ = ax.set_ylabel("$\mathregular{-log_{10}(q-value)}$",font='Arial',weight='bold',fontsize=16,labelpad=14)
_ = ax.set_xticklabels(np.round(ax.get_xticks(),1),font='Arial',weight='bold',fontsize=14)
_ = ax.set_yticklabels(np.round(ax.get_yticks(),1),font='Arial',weight='bold',fontsize=14)            

## Gene CNVs and Response

In [None]:
common_cnv_gene_freq = su2c_cnv_gene_sig_t.columns[((abs(su2c_cnv_gene_sig_t)>0.9).sum()/309)>0.05].to_list()

In [None]:
df = log_reg_df.loc[[gene + '_CNV' for gene in common_cnv_gene_freq]]

x = df['coeff']
y = df['q_value']

fig,ax = plt.subplots(figsize=(6,6))
sns.scatterplot(x=x,y=-np.log10(y),ax=ax,s=100)
plt.xlim(-3.3,3.3)
plt.ylim(0,1.6)

ax.axhline(linewidth=1,y=-np.log10(0.1),linestyle='-',c='r')
ax.axhline(linewidth=1,y=-np.log10(0.25),linestyle='dashed',c='purple')
ax.axhline(linewidth=1,y=0,linestyle='dotted',c='k')

for i in range(0,len(df.index)):
    
    xi = x[i]
    yi = -np.log10(y[i])
    gene = df.index[i].split('_')[0]
    
    if y[i]<0.3:
        if gene == 'TERT':
            plt.annotate(gene,xy=(x[i],-np.log10(y[i])),\
                         xycoords='data',xytext=(-25,25),textcoords='offset points', va='center',ha='left',\
                        arrowprops=dict(arrowstyle='-',lw=2,color='k'),size=12,style='italic')
    elif gene == 'CCNE1':
            plt.annotate(gene,xy=(x[i],-np.log10(y[i])),\
                         xycoords='data',xytext=(5,-15),textcoords='offset points', va='center',ha='left',\
                        arrowprops=dict(arrowstyle='-',lw=2,color='k'),size=12,style='italic')
    elif gene == 'B2M':
            plt.annotate(gene,xy=(x[i],-np.log10(y[i])),\
                         xycoords='data',xytext=(-30,15),textcoords='offset points', va='center',ha='left',\
                        arrowprops=dict(arrowstyle='-',lw=2,color='k'),size=12,style='italic')    
    elif gene == 'ZNF217':
            plt.annotate(gene,xy=(x[i],-np.log10(y[i])),\
                         xycoords='data',xytext=(-45,20),textcoords='offset points', va='center',ha='left',\
                        arrowprops=dict(arrowstyle='-',lw=2,color='k'),size=12,style='italic')
    elif gene == 'CDKN2A':
            plt.annotate(gene,xy=(x[i],-np.log10(y[i])),\
                         xycoords='data',xytext=(15,15),textcoords='offset points', va='center',ha='left',\
                        arrowprops=dict(arrowstyle='-',lw=2,color='k'),size=12,style='italic')
    elif gene == 'MYC':
            plt.annotate(gene,xy=(x[i],-np.log10(y[i])),\
                         xycoords='data',xytext=(-5,25),textcoords='offset points', va='center',ha='left',\
                        arrowprops=dict(arrowstyle='-',lw=2,color='k'),size=12,style='italic')
    elif gene == 'CCND1':
            plt.annotate(gene,xy=(x[i],-np.log10(y[i])),\
                         xycoords='data',xytext=(-45,20),textcoords='offset points', va='center',ha='left',\
                        arrowprops=dict(arrowstyle='-',lw=2,color='k'),size=12,style='italic')
            
            
_ = ax.set_xlabel("ln(Odds Ratio)",font='Arial',weight='bold',fontsize=16,labelpad = 14)
_ = ax.set_ylabel("$\mathregular{-log_{10}(q-value)}$",font='Arial',weight='bold',fontsize=16,labelpad=14)
_ = ax.set_xticklabels(np.round(ax.get_xticks()),font='Arial',weight='bold',fontsize=14)
_ = ax.set_yticklabels(np.round(ax.get_yticks(),1),font='Arial',weight='bold',fontsize=14)

## Signature Totals

In [None]:
sig_cols = ['log_Neoantigens',
       'log_Neoantigens_clonal', 'log_Neoantigens_subclonal',
       'Subclone_count', 'log_Aging_Signature', 'log_Smoking_Signature',
       'log_APOBEC_Signature', 'log_TMB_indel','HLA_LOH_present', 'HLA_hom_present', 'B2M_altered',
            'log_DNA_BCR_burden', 'log_DNA_TCR_burden']

In [None]:
sig_cols_rename = ['Neoantigens', 'Neoantigens clonal',
       'Neoantigens subclonal', 'Subclone count', 'Aging burden', 'Smoking burden',
       'APOBEC burden', 'Indel burden','HLA LOH','HLA homozygosity','B2M alteration','DNA BCR burden',
       'DNA TCR burden']

In [None]:
df_plot = log_reg_df.loc[sig_cols]

In [None]:
df_plot['-log10(q-value)'] = -np.log10(df_plot['q_value'])

In [None]:
df_plot.index = sig_cols_rename

In [None]:
plt.figure(figsize=(6,6))
h = sns.barplot(y=sig_cols_rename,x=df_plot['-log10(q-value)'],orient = 'h')

for item in h.get_yticklabels():
    item.set_font('Arial')
    item.set_fontweight('bold')
    item.set_fontsize(14)
    ax=plt.gca()
    
ax.axvline(linewidth=1,x=-np.log10(0.1),linestyle='-',c='r')
ax.axvline(linewidth=1,x=-np.log10(0.25),linestyle='dashed',c='purple')

_ = ax.set_xlabel("$\mathregular{-log_{10}(q-value)}$",font='Arial',weight='bold',fontsize=14,labelpad=14)
_ = ax.set_xticklabels(np.round(ax.get_yticks(),1),font='Arial',weight='bold',fontsize=14)

# Expression Analysis

## Immune Cell Subsets

In [None]:
sf_subsets = ['B-cells_y', 'Cytotoxic cells_y', 'DC_y',
       'Exhausted CD8_y', 'Exhausted/HS CD8', 'Lymphocytes',
       'Lymphocytes exhausted/cell cycle', 'Macrophages/Monocytes',
       'Memory T cells', 'Plasma', 'Treg_y']

In [None]:
df_plot = log_reg_df.loc[su2c_is_sf_harm.columns[1:] + '_SF']

In [None]:
df_plot = df_plot.sort_values('coeff',ascending=False)

In [None]:
sf_cols_rename=['Exhausted CD8+ T-cells (G6)',
                'Regulatory T-cells (G7)',
                'Lymphocytes exhausted/cell-cycle (G11)',
                'Exhausted/HS CD8+ T-cells (G9)',
                'Cytotoxic lymphocytes (G8)',
                'B-cells (G1)',                
                'Lymphocytes (G5)',
                'Plasma cells (G2)',
                'Memory T-cells (G10)',
                'Dendritic cells (G4)',
                'Monocytes/Macrophages (G3)']

In [None]:
df_plot['Signed log10(q-value)'] = -np.log10(df_plot['q_value'])*\
    np.sign(df_plot['coeff'])

In [None]:
df_plot = df_plot.sort_values('Signed log10(q-value)',ascending=False)

In [None]:
df_plot['hue'] = np.sign(df_plot["coeff"])

In [None]:
df_plot.index = sf_cols_rename

In [None]:
plt.figure(figsize=(6,6))
h = sns.barplot(y=df_plot.index,x=df_plot['Signed log10(q-value)'],orient='h',\
                hue=df_plot['hue'],dodge=False)

ax=plt.gca()
ymin=ax.get_ylim()[0]
ymax=ax.get_ylim()[1]

plt.vlines(x=0,ymin=ymin,ymax=ymax,colors='k')
ax.axvline(linewidth=1,x=-np.log10(0.1),linestyle='-',c='r')
ax.axvline(linewidth=1,x=-np.log10(0.25),linestyle='dashed',c='purple')
ax.axvline(linewidth=1,x=np.log10(0.1),linestyle='-',c='r')
ax.axvline(linewidth=1,x=np.log10(0.25),linestyle='dashed',c='purple')

plt.ylabel(None)
plt.setp(ax.get_yticklabels(), font = 'Arial',fontsize=14,weight='bold')
plt.setp(ax.get_xticklabels(), fontsize=12)
plt.ylim(10.5,-0.5)

plt.xlabel('$\mathregular{Signed\hspace{0.5}log_{10}(q-value)}$',weight='bold',fontsize=14)

ax.set_xticklabels(ax.get_xticks(),font='Arial',weight='bold',fontsize=14)
h.legend_.remove()

## M Cluster Signatures

In [None]:
su2c_ssbnmf_harm = su2c_ssbnmf_harm.sort_values(by=['Integrative_cluster'])
su2c_ssbnmf_harm_sorted_I1 = su2c_ssbnmf_harm[su2c_ssbnmf_harm['Integrative_cluster']==1].sort_values(by='I1_norm',\
                                                                ascending=False)
su2c_ssbnmf_harm_sorted_I2 = su2c_ssbnmf_harm[su2c_ssbnmf_harm['Integrative_cluster']==2].sort_values(by='I2_norm',\
                                                                ascending=False)
su2c_ssbnmf_harm_sorted_I3 = su2c_ssbnmf_harm[su2c_ssbnmf_harm['Integrative_cluster']==3].sort_values(by='I3_norm',\
                                                                ascending=False)
su2c_ssbnmf_harm_sorted = pd.concat([su2c_ssbnmf_harm_sorted_I1,su2c_ssbnmf_harm_sorted_I2,\
          su2c_ssbnmf_harm_sorted_I3])
plt.figure(figsize=(14,8))
g = sns.clustermap(su2c_ssbnmf_harm_sorted[['I1_norm','I2_norm','I3_norm']].T,vmin=0,vmax=1,cmap='Blues',\
            yticklabels=False,xticklabels=False,row_cluster=False,col_cluster=False,\
               cbar_kws={'ticks':None,'label':None},figsize=(14,8))
_=g.ax_heatmap.set_xlabel('SU2C-MARK Cohort Samples',fontsize=30,labelpad=20)

## Gene Set Enrichment Results for M Clusters

In [None]:
su2c_tme_gsea_harm['-log10(p-value)'] = -np.log10(su2c_tme_gsea_harm['p-value'])

In [None]:
su2c_tme_gsea_harm_pivot = pd.pivot_table(su2c_tme_gsea_harm[['Gene Set Name','Set','-log10(p-value)']],\
                    index='Set',values='-log10(p-value)',columns='Gene Set Name').fillna(0).T
su2c_tme_gsea_harm_pivot_sorted =su2c_tme_gsea_harm_pivot.sort_values(by=['G1','G2','G3'],ascending=False)

In [None]:
su2c_tme_gsea_harm['Gene Set Name Short'] = su2c_tme_gsea_harm['Gene Set Name'].apply(lambda x: x.replace('HALLMARK_',''))

In [None]:
plt.figure(figsize=(4,6))

g = plt.scatter(data = su2c_tme_gsea_harm,x = 'Set',y = 'Gene Set Name Short',\
           color='r',vmin=0,vmax=5,s=su2c_tme_gsea_harm['-log10(p-value)']*5)

plt.gca().invert_yaxis()

ax = plt.gca()
ax.set_xticklabels(['Wound\nHealing\n(M-1)','Immune\nActivated\n(M-2)','Immune\nDesert\n(M-3)'])
plt.ylabel(None)
_ = plt.setp(ax.get_yticklabels(), font = 'Arial',fontsize=12,weight='bold')
plt.xlabel(None)
_ = plt.setp(ax.get_xticklabels(),font = 'Arial',fontsize=14,weight='bold')

msizes = [2, 5, 10, 20]
markers = []
for size in msizes:
   markers.append(plt.scatter([],[], s=size*5, label=size, color='r'))

_=plt.legend(handles=markers,title = "$\mathregular{-log_{10}(p-value)}$",title_fontsize=14,fontsize=14,loc='lower left')

## Response Rate by M Cluster

In [None]:
I_cluster_response = su2c_merge_master[['Integrative_cluster','Harmonized_Confirmed_BOR_Bin']]\
    .groupby(by=['Integrative_cluster','Harmonized_Confirmed_BOR_Bin']).size().reset_index().\
    sort_values(['Integrative_cluster','Harmonized_Confirmed_BOR_Bin'])

In [None]:
I_cluster_response['Response']= ['Non-responder','Responder']*3

In [None]:
a = I_cluster_response.pivot_table(index='Integrative_cluster',values=0,columns='Response')
a_norm = a.apply(lambda x: x/x.sum(),axis=1)

In [None]:
tab10 = sns.color_palette('tab10')

In [None]:
plt.figure(figsize=(4,6))
plt.bar(a_norm.index,a_norm['Responder'],color=tab10[0],label = 'Responder')
plt.bar(a_norm.index,a_norm['Non-responder'],color=tab10[1],bottom = a_norm['Responder'],label='Non-responder')
plt.legend(loc='center left',bbox_to_anchor=(1.05,0.5),ncol=1,fontsize=14)
g = plt.gca()
g.set_xticklabels(['','','Wound\nHealing\n(TME-1)','','Immune\nActivated\n(TME-2)','','Immune\nDesert\n(TME-3)'],fontsize=14)
g.set_yticklabels([0,0.2,0.4,0.6,0.8,1],fontsize=14)
g.tick_params(bottom=False)
_=g.set_ylabel('Frequency',fontsize=16,labelpad = 14)

## Immune Cell Subsets by M Cluster

In [None]:
sf_subsets = [col for col in su2c_merge_master.columns if '_SF' in col][1:]

In [None]:
rows = 2
cols = 6
zi_cols = sf_subsets

fig,axs = plt.subplots(rows,cols,figsize=(26,10))

for i in range(0,len(zi_cols)):
    row = int(np.floor(i/cols))
    col = i%cols
    subset = zi_cols[i]
    g = sns.swarmplot(data=su2c_merge_master,x='Integrative_cluster',\
                   y=subset,ax=axs[row,col])
    g.set_xticklabels(['M-1','M-2','M-3'],fontsize=14)
    g.set_ylabel(g.get_ylabel().split('_')[0]+' (z-score)',fontsize=16)
    df = su2c_merge_master[[subset,'Integrative_cluster']].dropna()
    x = df[df.iloc[:,1]==1].iloc[:,0]
    y = df[df.iloc[:,1]==2].iloc[:,0]
    z = df[df.iloc[:,1]==3].iloc[:,0]
    ax = axs[row,col]
    plt.setp(ax.get_xticklabels(), fontsize=18, weight='bold')
    plt.setp(ax.get_yticklabels(), fontsize=18, weight='bold')
    ax.set_xlabel(None)
    ax.set_ylabel(ax.get_ylabel(),fontsize=18,weight='bold')

fig.tight_layout(pad=2)
fig.delaxes(axs[1][5])

## Myeloid Subsets

In [None]:
mphage_subsets = ['hMø1','hMø4', 'hMø5', 'hMø6', 'hMø7', 'hMø8', 'hMø9', 'hMono1', 'hMono2',
       'hMono3', 'hN1', 'hN2', 'hN3', 'hN5', 'hDC1', 'hDC2', 'hDC3', 'hpDC']

In [None]:
df_plot = log_reg_df.loc[[name + '_ZI' for name in mphage_subsets]]

In [None]:
df_plot = df_plot.sort_values('coeff',ascending=False)

In [None]:
df_plot['Signed log10(q-value)'] = -np.log10(df_plot['q_value'])*\
    np.sign(df_plot['coeff'])

In [None]:
df_plot = df_plot.sort_values('Signed log10(q-value)',ascending=False)

In [None]:
df_plot['hue'] = np.sign(df_plot["coeff"])

In [None]:
df_plot.index = [name.split('_')[0] for name in df_plot.index]

In [None]:
df_plot = df_plot.loc[['hMono2','hMono1','hMono3','hMø1', 'hMø9', 'hMø6', 'hMø4','hMø5','hMø8','hMø7','hN2','hN1',  'hN5',  'hN3','hDC2', 'hDC1',  'hpDC', 
       'hDC3']]

In [None]:
plt.figure(figsize=(6,6))
h = sns.barplot(y=df_plot.index,x=df_plot['Signed log10(q-value)'],orient='h',\
                hue=df_plot['hue'],dodge=False)

ax=plt.gca()
ymin=ax.get_ylim()[0]
ymax=ax.get_ylim()[1]

plt.vlines(x=0,ymin=ymin,ymax=ymax,colors='k')
ax.axvline(linewidth=1,x=-np.log10(0.1),linestyle='-',c='r')
ax.axvline(linewidth=1,x=-np.log10(0.25),linestyle='dashed',c='purple')
ax.axvline(linewidth=1,x=np.log10(0.1),linestyle='-',c='r')
ax.axvline(linewidth=1,x=np.log10(0.25),linestyle='dashed',c='purple')

plt.ylabel(None)
plt.setp(ax.get_yticklabels(), font = 'Arial',fontsize=14,weight='bold')
plt.setp(ax.get_xticklabels(), fontsize=12)
plt.ylim(17.5,-0.5)

plt.xlabel('$\mathregular{Signed\hspace{0.5}log_{10}(q-value)}$',weight='bold',fontsize=14)
ax.set_xticklabels(ax.get_xticks(),font='Arial',weight='bold',fontsize=14)
h.legend_.remove()

## Myeloid Subsets by M Cluster

In [None]:
rows = 3
cols = 6
zi_cols = [col +'_ZI' for col in mphage_subsets]

fig,axs = plt.subplots(rows,cols,figsize=(26,14))

for i in range(0,len(zi_cols)):
    row = int(np.floor(i/cols))
    col = i%cols
    subset = zi_cols[i]
    
    g = sns.swarmplot(data=su2c_merge_master,x='Integrative_cluster',\
                   y=subset,ax=axs[row,col])
    g.set_xticklabels(['M-1','M-2','M-3'],fontsize=18)
    g.set_ylabel(g.get_ylabel().split('_')[0]+' (z-score)',fontsize=18)
    
    df = su2c_merge_master[[subset,'Integrative_cluster']].dropna()
    x = df[df.iloc[:,1]==1].iloc[:,0]
    y = df[df.iloc[:,1]==2].iloc[:,0]
    z = df[df.iloc[:,1]==3].iloc[:,0]
    
    ax = axs[row,col]
    plt.setp(ax.get_xticklabels(), fontsize=20, weight='bold')
    plt.setp(ax.get_yticklabels(), fontsize=20, weight='bold')
    ax.set_xlabel(None)
    ax.set_ylabel(ax.get_ylabel(),fontsize=20,weight='bold')
    
fig.tight_layout(pad=2)

## TI Clusters

In [None]:
df_plot = log_reg_df.loc[['T1_norm','T2_norm','T3_norm','T4_norm']]

In [None]:
df_plot = df_plot.sort_values('coeff',ascending=False)

In [None]:
df_plot['Signed log10(q-value)'] = -np.log10(df_plot['q_value'])*\
    np.sign(df_plot['coeff'])

In [None]:
df_plot = df_plot.sort_values('Signed log10(q-value)',ascending=False)

In [None]:
df_plot['hue'] = np.sign(df_plot["coeff"])

In [None]:
df_plot.index = ['TI-1','TI-2','TI-3','TI-4']

In [None]:
plt.figure(figsize=(6,6))
h = sns.barplot(y=df_plot.index,x=df_plot['Signed log10(q-value)'],orient='h',\
                hue=df_plot['hue'],dodge=False)

ax=plt.gca()
ymin=ax.get_ylim()[0]
ymax=ax.get_ylim()[1]

plt.vlines(x=0,ymin=ymin,ymax=ymax,colors='k')
ax.axvline(linewidth=1,x=-np.log10(0.1),linestyle='-',c='r')
ax.axvline(linewidth=1,x=-np.log10(0.25),linestyle='dashed',c='purple')
ax.axvline(linewidth=1,x=np.log10(0.1),linestyle='-',c='r')
ax.axvline(linewidth=1,x=np.log10(0.25),linestyle='dashed',c='purple')

plt.ylabel(None)
plt.xticks([-1,-0.5,0,0.5,1])
plt.setp(ax.get_yticklabels(), font = 'Arial',fontsize=18,weight='bold')
plt.setp(ax.get_xticklabels(), fontsize=18)
plt.ylim(3.5,-0.5)
            
plt.xlabel('$\mathregular{Signed\hspace{0.5}log_{10}(q-value)}$',weight='bold',fontsize=18,labelpad=10)

ax.set_xticklabels(ax.get_xticks(),font='Arial',weight='bold',fontsize=18)
h.legend_.remove()

## M Clusters vs TI Clusters

In [None]:
fig,ax = plt.subplots(1,4,figsize=(14,2))

ylabels = ['De-differentiated (TI-1)','Adeno (TI-2)','Squamous (TI-3)','LCNE (TI-4)']
xlabels = ['Wound\nHealing\n(M-1)','Immune\nActivated\n(M-2)','Immune\nDesert\n(M-3)']
for tc in [1,2,3,4]:
    axis = sns.violinplot(data = su2c_merge_master[su2c_merge_master['Tumor_cluster']==tc][['I1_norm','I2_norm','I3_norm']]\
                          ,ax=ax[tc-1],inner=None)
    
    for violin in axis.collections:
        violin.set_edgecolor(violin.get_facecolor())
        violin.set_facecolor((0,0,0,0))

    g = sns.swarmplot(data = su2c_merge_master[su2c_merge_master['Tumor_cluster']==tc][['I1_norm','I2_norm','I3_norm']],\
                   s=3,ax=ax[tc-1])

    plt.setp(ax[tc-1].get_yticklabels(), font = 'Arial',fontsize=10,weight='bold')
    plt.setp(ax[tc-1].get_xticklabels(), fontsize=14)
    ax[tc-1].set_ylim(-0.6,1.6)
    ax[tc-1].set_ylabel(ylabels[tc-1],font='Arial',weight='bold',fontsize=10,\
                        rotation=90)
            
    ax[tc-1].set_xticklabels(xlabels,font='Arial',weight='bold',fontsize=10)
    #plt.yticks([0,0.5,1],[0.0,0.5,1.0])

plt.subplots_adjust(wspace=0.3)

## TI-1 vs M-2

In [None]:
_ = sns.kdeplot(data = su2c_merge_master, x='T1_norm',y='I2_norm')
_ = sns.scatterplot(data = su2c_merge_master, x='T1_norm',y='I2_norm',alpha = 0.5)

plt.axis('square')
plt.xlim(-0.5,1.5)
plt.ylim(-.5,1.5)

ax = plt.gca()

plt.setp(ax.get_yticklabels(), font = 'Arial',fontsize=14,weight='bold')
plt.setp(ax.get_xticklabels(), font = 'Arial',fontsize=14,weight='bold')

ax.xaxis.set_major_locator(plt.MaxNLocator(4))
ax.yaxis.set_major_locator(plt.MaxNLocator(4))

ax.set_xlabel("De-differentiated (TI-1)",font='Arial',weight='bold',fontsize=16,labelpad=14)
ax.set_ylabel("Immune Activated (M-2)",font='Arial',weight='bold',fontsize=16,labelpad=14)

## Response Rate Grid

In [None]:
a = su2c_merge_master[(su2c_merge_master['T1_norm']<0.5) & (su2c_merge_master['I2_norm']<0.5)]['Harmonized_Confirmed_BOR_Bin'].mean()

In [None]:
b = su2c_merge_master[(su2c_merge_master['T1_norm']>=0.5) & (su2c_merge_master['I2_norm']<0.5)]['Harmonized_Confirmed_BOR_Bin'].mean()

In [None]:
c = su2c_merge_master[(su2c_merge_master['T1_norm']<0.5) & (su2c_merge_master['I2_norm']>=0.5)]['Harmonized_Confirmed_BOR_Bin'].mean()

In [None]:
d = su2c_merge_master[(su2c_merge_master['T1_norm']>=0.5) & (su2c_merge_master['I2_norm']>=0.5)]['Harmonized_Confirmed_BOR_Bin'].mean()

In [None]:
rr_df = pd.DataFrame([[c,d],[a,b]],index=['TME-2','TME-2/3/4'],columns=['TI-2/3/4','TI-1'])

In [None]:
k = sns.heatmap(rr_df,cmap='coolwarm',center=0.5,vmin=0,vmax=1,square='equal',annot=True,fmt="0.2f",annot_kws={"font":'Arial',"size":16})

ax = plt.gca()

plt.setp(ax.get_yticklabels(), font = 'Arial',fontsize=14,weight='bold')
plt.setp(ax.get_xticklabels(), font = 'Arial',fontsize=14,weight='bold')

k.set_xticklabels(labels=['low','high'], va='center')
k.set_yticklabels(labels=['high','low'], va='center')

ax.tick_params(axis='x', which='major', pad=10)

_ = ax.set_xlabel("De-differentiated (TI-1)",font='Arial',weight='bold',fontsize=16,labelpad=14)
_ = ax.set_ylabel("Immune Activated (TME-2)",font='Arial',weight='bold',fontsize=16,labelpad=14)

cbar = ax.collections[0].colorbar
cbar.ax.tick_params(labelsize=14,rotation=90)
_ = cbar.ax.set_ylabel("Response Rate",rotation=90,labelpad=30,font='Arial',fontsize=16)

# Heatmap

In [None]:
su2c_limma_harm_names = su2c_limma_harm.copy()

In [None]:
su2c_limma_harm_names.index = su2c_limma_harm.apply(lambda x: x['ensembl_gene_id_version'] + ':' + str(x['hgnc_symbol']),axis=1)

In [None]:
su2c_rna_harm_log = su2c_rna_harm.set_index(['Name','Description']).apply(lambda x: np.log(x+1))

In [None]:
su2c_rna_harm_log.index = su2c_rna_harm.apply(lambda x: x['Name'] + ':' + x['Description'],axis=1)

In [None]:
su2c_rna_harm_log_z = su2c_rna_harm_log.T.apply(scp.zscore).T

In [None]:
top_cutoff = 20

In [None]:
su2c_limma_harm_names['signed_log10p'] = np.sign(su2c_limma_harm_names['logFC'])*(-np.log10(su2c_limma_harm_names['P.Value']))

In [None]:
l_top_up = su2c_limma_harm_names.sort_values('signed_log10p').tail(int(top_cutoff/2)).index.to_list()
l_top_dn = su2c_limma_harm_names.sort_values('signed_log10p').head(int(top_cutoff/2)).index.to_list()
l_top_all = l_top_up+l_top_dn

In [None]:
l_top_up_clean =[gene for gene in l_top_up if 'nan' not in gene]
l_top_dn_clean =[gene for gene in l_top_dn if 'nan' not in gene]
l_top_all_clean =[gene for gene in l_top_all if 'nan' not in gene]

In [None]:
su2c_rna_harm_log_z_all = su2c_rna_harm_log_z.loc[l_top_all_clean]

In [None]:
su2c_rna_harm_log_merge = su2c_rna_harm_log_z_all.T.merge(su2c_merge_master,left_index=True,right_on='Harmonized_SU2C_RNA_Tumor_Sample_ID_v2',how='left')

In [None]:
su2c_rna_harm_log_merge.set_index('Harmonized_SU2C_Participant_ID_v2',inplace=True)

In [None]:
q_near_sig_list = ['PDL1_TPS', 'ARID1A_MUT', 'ATM_MUT', 'EGFR_MUT', 'KEAP1_MUT',
       'RBM10_MUT', 'SMARCA4_MUT', 'Amp_5p15.33 ', 'TERT_CNV', 'log_TMB',
       'log_TMB_clonal', 'log_TMB_indel','log_Neoantigens', 'log_Neoantigens_clonal',
       'log_Aging_Signature', 'log_Smoking_Signature', 'log_APOBEC_Signature', 'T1_norm',
       'I2_norm', 'Macrophages/Monocytes_SF', 'hMono3_ZI','hN3_ZI']

In [None]:
q_sig_list = ['PDL1_TPS', 'ATM_MUT', 'Amp_5p15.33 ', 'TERT_CNV', 'log_TMB',
       'log_TMB_clonal', 'log_Neoantigens', 'log_Neoantigens_clonal',
       'log_S1-SBS80_P', 'log_S3-SBS4_P', 'log_S5-SBS13_P', 'T1_norm']

In [None]:
select_cols = ['hMono3_ZI', 'hN3_ZI',
       'Macrophages/Monocytes_SF', 'Adenosine (Corvus)_HM',
       'ENSG00000204131.9:NHSL2', 'EMT2 (PMID: 27321955)_HM',
       'TGF-B (Mariathasan Nature 2018)_HM', 'ENSG00000143554.13:SLC27A3',
       'NFAT/NR4A1 family T cell dysfunction_HM', 'ENSG00000116991.10:SIPA1L2',
       'ENSG00000158321.15:AUTS2', 'ENSG00000152284.4:TCF7L1',
       'ENSG00000196504.15:PRPF40A',
       'Merck/Nanostring 18 gene\xa0T cell–inflamed GEP score_HM',
       'ENSG00000240065.7:PSMB9',
       'Antigen processing machinery (PMID: 27855702)_HM', 'IFNG_HM',
       'ENSG00000092010.14:PSME1', 'ENSG00000100911.15:PSME2', 'PDL1_TPS',
       'T1_norm', 'I2_norm','log_APOBEC_Signature', 'log_Aging_Signature', 'log_TMB_indel',
       'log_Smoking_Signature', 'log_Neoantigens', 'log_Neoantigens_clonal',
       'log_TMB', 'log_TMB_clonal', 'EGFR_MUT', 'Amp_5p15.33 ', 'TERT_CNV',
       'KEAP1_MUT', 'ENSG00000128564.6:VGF', 'ATM_MUT', 'SMARCA4_MUT',
       'ARID1A_MUT', 'RBM10_MUT']

In [None]:
su2c_heatmap = su2c_rna_harm_log_merge[select_cols].corr().fillna(0)

In [None]:
su2c_heatmap_renamed = su2c_heatmap.copy()

In [None]:
su2c_rename_dict = {'hMono3_ZI':'hMono3 (Zilionas et al.)', 
                    'hN3_ZI':'hN3 (Zilionas et al.)',
       'Macrophages/Monocytes_SF':r'M$\phi$\Monocyte (Sade-Feldman et al.)', 
        'Adenosine (Corvus)_HM':'A2AR (Willingham et al.)',
       'ENSG00000204131.9:NHSL2':'$\it{NHSL2}$ Expression',
        'EMT2 (PMID: 27321955)_HM':'EMT (Hedegaard et al.)',
       'TGF-B (Mariathasan Nature 2018)_HM':'TGF-B (Mariathasan et al.)',
        'ENSG00000143554.13:SLC27A3':'$\it{SLC27A3}$ Expression',
       'NFAT/NR4A1 family T cell dysfunction_HM':'NR4A1 (Chen et al.)',
        'ENSG00000116991.10:SIPA1L2':'$\it{SIPA1L2}$ Expression',
       'ENSG00000158321.15:AUTS2':'$\it{AUTS2}$ Expression',
        'ENSG00000152284.4:TCF7L1':'$\it{TCF7L1}$ Expression',
       'ENSG00000196504.15:PRPF40A':'$\it{PRPF40A}$ Expression',
       'Merck/Nanostring 18 gene\xa0T cell–inflamed GEP score_HM':'T cell–inflamed (Ayers et al.)',
       'ENSG00000240065.7:PSMB9':'$\it{PSMB9}$ Expression',
       'Antigen processing machinery (PMID: 27855702)_HM':'MHC Class I (Senbabaoglu et al.)',
        'IFNG_HM':'IFNG Signature (Gao et al.)',
       'ENSG00000092010.14:PSME1':'$\it{PSME1}$ Expression',
        'ENSG00000100911.15:PSME2':'$\it{PSME2}$ Expression', 
        'PDL1_TPS':'PDL1 TPS',
       'T1_norm':'De-differentiated (TI-1)',
        'I2_norm':'Immune Activated (M-2)',
        'log_APOBEC_Signature':'APOBEC burden',
        'log_Aging_Signature':'Aging burden',
        'log_TMB_indel':'TMB indel',
       'log_Smoking_Signature':'Smoking burden',
        'log_Neoantigens':'Neoantigens', 
        'log_Neoantigens_clonal':'Neoantigens clonal',
       'log_TMB':'TMB', 
        'log_TMB_clonal':'TMB clonal',
        'EGFR_MUT':'$\it{EGFR}$ Mutation', 
        'Amp_5p15.33 ':'5p15.33 Amplification',
        'TERT_CNV':'$\it{TERT}$ Amplification',
       'KEAP1_MUT':'$\it{KEAP1}$ Mutation',
        'ENSG00000128564.6:VGF':'$\it{VGF}$ Expression',
        'ATM_MUT':'$\it{ATM}$ Mutation',
        'SMARCA4_MUT':'$\it{SMARCA4}$ Mutation',
       'ARID1A_MUT':'$\it{ARID1A}$ Mutation',
        'RBM10_MUT':'$\it{RBM10}$ Mutation'}

In [None]:
su2c_heatmap_renamed.rename(index=su2c_rename_dict,columns=su2c_rename_dict,inplace=True)

In [None]:
favorable_list = ['$\it{PRPF40A}$ Expression', 'T cell–inflamed (Ayers et al.)',
       '$\it{PSMB9}$ Expression', 'MHC Class I (Senbabaoglu et al.)',
       'IFNG Signature (Gao et al.)', '$\it{PSME1}$ Expression',
       '$\it{PSME2}$ Expression', 'PDL1 TPS', 'De-differentiated (TI-1)',
       'Immune Activated (M-2)', 'APOBEC burden', 'Aging burden', 'TMB indel',
       'Smoking burden', 'Neoantigens', 'Neoantigens clonal', 'TMB',
       'TMB clonal', '$\it{KEAP1}$ Mutation',
       '$\it{VGF}$ Expression', '$\it{ATM}$ Mutation',
       '$\it{SMARCA4}$ Mutation', '$\it{ARID1A}$ Mutation',
       '$\it{RBM10}$ Mutation']

In [None]:
palette_sig = sns.diverging_palette(150,10,n=8)

In [None]:
df_corr = su2c_heatmap_renamed
palette = sns.color_palette()
palette
color_dict = {}
for i in range(0,len(df_corr.columns)):
    if i<12:
        color_dict[df_corr.columns[i]] = palette[4]
    elif i<22:
        color_dict[df_corr.columns[i]] = palette[1]
    elif i<30:
        color_dict[df_corr.columns[i]] = palette[0]
    else:
        color_dict[df_corr.columns[i]] = palette[6]
# Convert the dictionary into a Series
color_rows_heatmap = pd.Series(color_dict,name='Correlation Cluster')

In [None]:
df_corr = su2c_heatmap_renamed
color_dict = {}
for i in range(0,len(df_corr.index)):
    if df_corr.index[i] in favorable_list:
        color_dict[df_corr.index[i]] = palette_sig[0]
    else:
        color_dict[df_corr.index[i]] = palette_sig[7]

    # Convert the dictionary into a Series
color_sig_heatmap = pd.Series(color_dict,name='Correlation Cluster')

In [None]:
lut = dict(zip(['Positive','Negative'], \
               [palette_sig[0],palette_sig[7]]))

In [None]:
og = sns.clustermap(su2c_heatmap_renamed)
og.fig.clf()
g = sns.clustermap(su2c_heatmap_renamed,vmin=-1,vmax=1,cmap='coolwarm',\
              yticklabels=True,xticklabels=True,figsize=(15,15),\
                   col_colors=[color_rows_heatmap,color_sig_heatmap],row_cluster=False,col_cluster=False)
axx = g.ax_col_dendrogram.axes
axx.clear()

link = og.dendrogram_col.linkage
link[[36,35]] = link[[35,36]]
dendrogram(link, 
           color_threshold=0, 
           ax=axx,
           #truncate_mode='lastp',
           orientation='top',
           link_color_func=lambda x: 'k'
          );

g.ax_cbar.set_position((0.05, .9, .025, .1))
handles = [mpatches.Patch(facecolor=lut[name]) for name in lut]
plt.legend(handles, lut, title='Response Association',
           bbox_to_anchor=(1, 1), bbox_transform=plt.gcf().transFigure, loc='upper right')
_ = g.ax_heatmap.set_xticklabels(g.ax_heatmap.get_xmajorticklabels(), fontsize = 12)
_ = g.ax_heatmap.set_yticklabels(g.ax_heatmap.get_ymajorticklabels(), fontsize = 12)

g.ax_heatmap.yaxis.set_label_position('left')


# Survival Curves

## Biomarker Subsets

In [None]:
interaction_list_tmb = ['hMono3_ZI','Macrophages/Monocytes_SF','T1_norm', 'I2_norm','log_TMB_indel',
                              'TERT_CNV','ATM_MUT']
interaction_list_pdl1 = ['hMono3_ZI','Macrophages/Monocytes_SF','T1_norm', 'I2_norm','log_TMB','log_TMB_indel',
                              'TERT_CNV','ATM_MUT']

In [None]:
ls_high_tmb = []

for feature in interaction_list_tmb:
    
    feature_col = feature

    duration_col = 'Harmonized_PFS_Months'
    event_col = 'Harmonized_PFS_Event'

    df = su2c_merge_master[su2c_merge_master['log_TMB']>np.log10(331/33)]

    df_filt = df[[feature_col,duration_col,event_col]].dropna()
    
    if feature_col in ['log_TMB','log_TMB_indel','T1_norm','I2_norm']:
        med = df[feature_col].median()
        df_filt[feature_col] = df_filt[feature_col].apply(lambda x: x>med)
    else:
        df_filt[feature_col] = df_filt[feature_col].apply(lambda x: x>0)
    
    durations = df_filt[duration_col]
    events = df_filt[event_col]
    groups = df_filt[feature_col].astype('category').cat.codes

    results = multivariate_logrank_test(durations,groups,events)

    df_filt_low = df_filt[df_filt[feature_col]==0]
    df_filt_high = df_filt[df_filt[feature_col]==1]

    kmf_exp = KaplanMeierFitter()
    med_pfs_low = kmf_exp.fit(df_filt_low[duration_col],df_filt_low[event_col]).median_survival_time_
    med_pfs_high = kmf_exp.fit(df_filt_high[duration_col],df_filt_high[event_col]).median_survival_time_
    
    ls_high_tmb.append([feature,results.p_value,-np.log10(results.p_value),results.test_statistic,med_pfs_low,med_pfs_high])

In [None]:
df_high_tmb = pd.DataFrame(ls_high_tmb,columns=['feature','p_value','-log10(p_value)','test_stat','med_pfs_low','med_pfs_high'])

In [None]:
ls_pdl1_high = []

for feature in interaction_list_pdl1:
    
    feature_col = feature

    duration_col = 'Harmonized_PFS_Months'
    event_col = 'Harmonized_PFS_Event'

    df = su2c_merge_master[su2c_merge_master['PDL1_TPS']>=0.5]

    df_filt = df[[feature_col,duration_col,event_col]].dropna()

    if feature_col in ['log_TMB','log_TMB_indel','T1_norm','I2_norm']:
        med = df[feature_col].median()
        df_filt[feature_col] = df_filt[feature_col].apply(lambda x: x>med)
    else:
        df_filt[feature_col] = df_filt[feature_col].apply(lambda x: x>0)
    
    durations = df_filt[duration_col]
    events = df_filt[event_col]
    groups = df_filt[feature_col].astype('category').cat.codes

    results = multivariate_logrank_test(durations,groups,events)

    df_filt_low = df_filt[df_filt[feature_col]==0]
    df_filt_high = df_filt[df_filt[feature_col]==1]

    kmf_exp = KaplanMeierFitter()
    med_pfs_low = kmf_exp.fit(df_filt_low[duration_col],df_filt_low[event_col]).median_survival_time_
    med_pfs_high = kmf_exp.fit(df_filt_high[duration_col],df_filt_high[event_col]).median_survival_time_
    
    ls_pdl1_high.append([feature,results.p_value,-np.log10(results.p_value),results.test_statistic,med_pfs_low,med_pfs_high])

In [None]:
df_pdl1_high = pd.DataFrame(ls_pdl1_high,columns=['feature','p_value','-log10(p_value)','test_stat','med_pfs_low','med_pfs_high'])

In [None]:
ls_pdl1_low = []

for feature in interaction_list_pdl1:

    feature_col = feature

    duration_col = 'Harmonized_PFS_Months'
    event_col = 'Harmonized_PFS_Event'

    df = su2c_merge_master[su2c_merge_master['PDL1_TPS']<=0.01]

    df_filt = df[[feature_col,duration_col,event_col]].dropna()

    if feature_col in ['log_TMB','log_TMB_indel','T1_norm','I2_norm']:
        med = df[feature_col].median()
        df_filt[feature_col] = df_filt[feature_col].apply(lambda x: x>med)
    else:
        df_filt[feature_col] = df_filt[feature_col].apply(lambda x: x>0)
    
    durations = df_filt[duration_col]
    events = df_filt[event_col]
    groups = df_filt[feature_col].astype('category').cat.codes

    results = multivariate_logrank_test(durations,groups,events)

    df_filt_low = df_filt[df_filt[feature_col]==0]
    df_filt_high = df_filt[df_filt[feature_col]==1]

    kmf_exp = KaplanMeierFitter()
    med_pfs_low = kmf_exp.fit(df_filt_low[duration_col],df_filt_low[event_col]).median_survival_time_
    med_pfs_high = kmf_exp.fit(df_filt_high[duration_col],df_filt_high[event_col]).median_survival_time_
    
    ls_pdl1_low.append([feature,results.p_value,-np.log10(results.p_value),results.test_statistic,med_pfs_low,med_pfs_high])

In [None]:
df_pdl1_low = pd.DataFrame(ls_pdl1_low,columns=['feature','p_value','-log10(p_value)','test_stat','med_pfs_low','med_pfs_high'])

In [None]:
df_high_tmb['subset'] = 'High TMB'
df_pdl1_high['subset'] = 'High PDL1'
df_pdl1_low['subset'] = 'Low PDL1'

In [None]:
df_subset = pd.concat([df_high_tmb,df_pdl1_high,df_pdl1_low]).dropna(subset=['p_value'])

In [None]:
df_subset['q'] = multi.multipletests(df_subset['p_value'],method='fdr_bh')[1]

In [None]:
df_subset['-log10(q-values)'] = -np.log10(df_subset['q'])

In [None]:
df_subset['favorable'] = df_subset['med_pfs_low']<df_subset['med_pfs_high']

In [None]:
df_subset = df_subset.reset_index().drop('index',axis=1)

In [None]:
for row in range(0,len(df_subset.index)):
    if df_subset.loc[row,'favorable']:
        df_subset.loc[row,'Signed log10(q-value)'] = df_subset.loc[row,'-log10(q-values)']
    else:
        df_subset.loc[row,'Signed log10(q-value)'] = -1*df_subset.loc[row,'-log10(q-values)']

In [None]:
df_subset['cluster'] = '0'
for i in range(0,len(df_subset.index)):
    if df_subset['feature'][i] in ['hMono3_ZI','Macrophages/Monocytes_SF']:
        df_subset.iloc[i,-1] = '1'
    elif df_subset['feature'][i] in ['PDL1_TPS','T1_norm','I2_norm']:
        df_subset.iloc[i,-1] = '2'
    elif df_subset['feature'][i] in ['log_TMB','log_TMB_indel']:
        df_subset.iloc[i,-1] = '3'
    else:
        df_subset.iloc[i,-1] = '4'

In [None]:
df_subset = df_subset.sort_values(by=['subset','cluster'])

In [None]:
df_plot=df_subset[df_subset['subset']=='High TMB'].sort_values(by='cluster')
plt.figure(figsize=(6,6))
ylabels = ['hMono3','Macrophages/Monocytes','De-differentiated (TI-1)',
           'Immune Activated (M-2)','TMB indels',
           '$\it{TERT}$ Amplification','$\it{ATM}$ Mutation']
h = sns.barplot(y=df_plot['feature'],x=df_plot['Signed log10(q-value)'],orient='h',\
                hue=df_plot['favorable'],dodge=False)
ax=plt.gca()
ymin=ax.get_ylim()[0]
ymax=ax.get_ylim()[1]
plt.vlines(x=0,ymin=ymin,ymax=ymax,colors='k')
ax.axvline(linewidth=1,x=-np.log10(0.1),linestyle='-',c='r')
ax.axvline(linewidth=1,x=-np.log10(0.25),linestyle='dashed',c='purple')
ax.axvline(linewidth=1,x=np.log10(0.1),linestyle='-',c='r')
ax.axvline(linewidth=1,x=np.log10(0.25),linestyle='dashed',c='purple')
plt.ylabel(None)
plt.setp(ax.get_yticklabels(),font = 'Arial',fontsize=18,weight='bold')
plt.setp(ax.get_xticklabels(), fontsize=18)
plt.xlim(-1.5,1.5)
plt.ylim(6.5,-0.5)
h.set_yticklabels(ylabels)
plt.xlabel('$\mathregular{Signed\hspace{0.5}log_{10}(q-value)}$',weight='bold',fontsize=18,labelpad=10)
ax.set_xticklabels(ax.get_xticks(),font='Arial',weight='bold',fontsize=18)
h.legend_.remove()

In [None]:
df_plot=df_subset[df_subset['subset']=='High PDL1'].sort_values(by='cluster')
plt.figure(figsize=(6,6))
ylabels = ['hMono3','Macrophages/Monocytes','De-differentiated (TI-1)','Immune Activated (M-2)','TMB','TMB Indel','$\it{TERT}$ Amplification','$\it{ATM}$ Mutation']
h = sns.barplot(y=df_plot['feature'],x=df_plot['Signed log10(q-value)'],orient='h',\
                hue=df_plot['favorable'],dodge=False)
ax=plt.gca()
ymin=ax.get_ylim()[0]
ymax=ax.get_ylim()[1]
plt.vlines(x=0,ymin=ymin,ymax=ymax,colors='k')
ax.axvline(linewidth=1,x=-np.log10(0.1),linestyle='-',c='r')
ax.axvline(linewidth=1,x=-np.log10(0.25),linestyle='dashed',c='purple')
ax.axvline(linewidth=1,x=np.log10(0.1),linestyle='-',c='r')
ax.axvline(linewidth=1,x=np.log10(0.25),linestyle='dashed',c='purple')
plt.ylabel(None)
plt.setp(ax.get_yticklabels(),font = 'Arial',fontsize=18,weight='bold')
plt.setp(ax.get_xticklabels(), fontsize=12)
plt.xlim(-1.5,1.5)
plt.ylim(6.5,-0.5)
h.set_yticklabels(ylabels)    
plt.xlabel('$\mathregular{Signed\hspace{0.5}log_{10}(q-value)}$',weight='bold',fontsize=18,labelpad=10)
ax.set_xticklabels(ax.get_xticks(),font='Arial',weight='bold',fontsize=18)
h.legend_.remove()

In [None]:
df_plot=df_subset[df_subset['subset']=='Low PDL1'].sort_values(by='cluster')
plt.figure(figsize=(6,6))
ylabels = ['hMono3','Macrophages/Monocytes','De-differentiated (TI-1)','Immune Activated (M-2)','TMB','TMB Indel','$\it{TERT}$ Amplification','$\it{ATM}$ Mutation']
h = sns.barplot(y=df_plot['feature'],x=df_plot['Signed log10(q-value)'],orient='h',\
                hue=df_plot['favorable'],dodge=False)
ax=plt.gca()
ymin=ax.get_ylim()[0]
ymax=ax.get_ylim()[1]
plt.vlines(x=0,ymin=ymin,ymax=ymax,colors='k')
ax.axvline(linewidth=1,x=-np.log10(0.1),linestyle='-',c='r')
ax.axvline(linewidth=1,x=-np.log10(0.25),linestyle='dashed',c='purple')
ax.axvline(linewidth=1,x=np.log10(0.1),linestyle='-',c='r')
ax.axvline(linewidth=1,x=np.log10(0.25),linestyle='dashed',c='purple')
plt.ylabel(None)
plt.setp(ax.get_yticklabels(),font = 'Arial',fontsize=18,weight='bold')
plt.setp(ax.get_xticklabels(), fontsize=12)
plt.xlim(-1.5,1.5)
plt.ylim(6.5,-0.5)
h.set_yticklabels(ylabels)    
plt.xlabel('$\mathregular{Signed\hspace{0.5}log_{10}(q-value)}$',weight='bold',fontsize=18,labelpad=10)
ax.set_xticklabels(ax.get_xticks(),font='Arial',weight='bold',fontsize=18)
h.legend_.remove()

## Biomarker PFS Plots

In [None]:
plt.figure(figsize=(8,4))

feature_col = 'Macrophages/Monocytes_SF'
duration_col = 'Harmonized_PFS_Months'
event_col = 'Harmonized_PFS_Event'

df = su2c_merge_master[su2c_merge_master['PDL1_TPS']>=.5]
df_filt = df[[feature_col,duration_col,event_col]].dropna()
df_filt[feature_col] = df_filt[feature_col].apply(lambda x: x>0)

medpfs, ax = survival_curve_pair(df_filt,feature_col,duration_col,event_col)

plt.ylim(0,1.1)
ax.set_ylabel("PFS Probability",font='Arial',weight='bold',fontsize=16,labelpad = 14)
ax.set_xlabel("Months",font='Arial',weight='bold',fontsize=16,labelpad=14)
ax.set_xticklabels(np.round(ax.get_xticks()),font='Arial',weight='bold',fontsize=14)
ax.set_yticklabels(np.round(ax.get_yticks(),1),font='Arial',weight='bold',fontsize=14)
L = ax.legend(frameon=False,prop={'size':12,'weight':'bold'})
L.get_texts()[0].set_text("Macrophages/Monocytes high")
L.get_texts()[1].set_text("Macrophages/Monocytes low")
ax.xaxis.set_major_formatter(FormatStrFormatter('%.0f'))
fig = plt.gcf()
fig.set_size_inches(8, 4)

In [None]:
feature_col = 'I2_norm'
duration_col = 'Harmonized_PFS_Months'
event_col = 'Harmonized_PFS_Event'

df = su2c_merge_master[su2c_merge_master['PDL1_TPS']<0.01]
df_filt = df[[feature_col,duration_col,event_col]].dropna()
med = su2c_merge_master[feature_col].median()

df_filt[feature_col] = (df_filt[feature_col]>med)

medpfs, ax = survival_curve_pair(df_filt,feature_col,duration_col,event_col)
plt.ylim(0,1.1)
ax.set_ylabel("PFS Probability",font='Arial',weight='bold',fontsize=16,labelpad = 14)
ax.set_xlabel("Months",font='Arial',weight='bold',fontsize=16,labelpad=14)
ax.set_xticklabels(np.round(ax.get_xticks()),font='Arial',weight='bold',fontsize=14)
ax.set_yticklabels(np.round(ax.get_yticks(),1),font='Arial',weight='bold',fontsize=14)
L = ax.legend(frameon=False,prop={'size':12,'weight':'bold'})
L.get_texts()[0].set_text("Immune Activation (TME-2)")
L.get_texts()[1].set_text("Other TME subtype")
ax.xaxis.set_major_formatter(FormatStrFormatter('%.0f'))
fig = plt.gcf()
fig.set_size_inches(8, 4)