## Immunotherapy Response Prediction

This notebook is for immunotherapy response prediction. 

The data is presented in Table S7, and the results are shown in Figure 5-6.

In [None]:
from ici_utils import *

dpi = 600

In [None]:
### loading

ici_file = 'TableS7.csv' # fill in the path of TableS7.csv
ici_df = pd.read_csv(ici_file)

label_col = 'ICI response'

### Allele benefit scores

In [None]:
### Cox model

# arguments
cancers = ['melanoma', 'NSCLC']
confounders = ['sex', 'age']
metrics = ['benefitScore_I', 'benefitScore_II', 'benefitScore_dual', 'logTMB', 'combinedScore']
method_rename_dict = {'benefitScore_I':'benefitScore-I', 'benefitScore_II':'benefitScore-II', 'benefitScore_dual':'benefitScore-dual'}

# duplicate columns (because "-" is not a valid symbol in Cox model function)
ici_df['benefitScore_I'] = ici_df['benefitScore-I']
ici_df['benefitScore_II'] = ici_df['benefitScore-II']
ici_df['benefitScore_dual'] = ici_df['benefitScore-dual']

# plot
fig, ax = plt.subplots(1, 2, figsize=(7, 3), dpi=dpi)
for idx, cancer in enumerate(cancers):
    event = 'OS' if cancer == 'melanoma' else 'PFS'
    duration = f'{event}.time'
    cancer_df = ici_df[ici_df['cancer']==cancer] # specific cancer
    cancer_df = cancer_df.dropna(subset=[duration, event] + confounders) # drop NA
    print(f'#Samples in {cancer} =', cancer_df.shape[0])
    HRPlot(cancer_df, metrics, confounder_cols=confounders, duration_col=duration,
           method_rename_dict=method_rename_dict, event_col=event, ax=ax[idx])
    ax[idx].set_title(cancer)
    ax[idx].set_xlabel('')
ax[1].set_yticklabels('')
fig.supxlabel('Hazard Ratio (95% CI)')
fig.tight_layout()

### Neoantigen landscape w/o tumor clonality

In [None]:
### burden

x_cols = ['TMB', 'TNB', 'NP-Immuno-dual.burden']
method_rename_dict = {'TMB': 'TMB', 'TNB': 'TNB', 'NP-Immuno-dual.burden': 'NPB'}

# box plot
fig, ax = plt.subplots(1, 2, figsize=(10, 3), dpi=dpi)
BurdenBoxPlot(ici_df, x_cols, label_col, 'melanoma', method_rename_dict=method_rename_dict, ax=ax[0], legend=False) # melanoma
BurdenBoxPlot(ici_df, x_cols, label_col, 'NSCLC', method_rename_dict=method_rename_dict, ax=ax[1]) # NSCLC
fig.tight_layout()

# bar plot for Cliff's delta
fig, ax = plt.subplots(1, 1, figsize=(3, 3), dpi=dpi)
BurdenCliffPlot(ici_df, x_cols, label_col, ['melanoma', 'NSCLC'], method_rename_dict=method_rename_dict, ax=ax)
fig.tight_layout()

In [None]:
### performance

# compute performance
x_cols = ['TMB', 'TNB', 'NP-LandscapeSum', 'NP-LandscapeSum(Integrated)']
perf_df = MetricPerformance(ici_df, x_cols, label_col, group_col='cancer')

# plot
x_cols = ['TMB', 'TNB', 'NP-LandscapeSum']
method_rename_dict = {'TMB': 'TMB', 'TNB': 'TNB', 'NP-LandscapeSum': 'NP-Sum', 'NP-LandscapeSum(Integrated)': 'NP-Integrated'}

fig, ax = plt.subplots(1, 3, figsize=(12, 3), dpi=dpi, gridspec_kw={'width_ratios': [2, 1, 1]})
PerformanceBarPlot(perf_df, 'AUROC', ax=ax[0], method_rename_dict=method_rename_dict, ncol=4) # bar plot
ROCCurve(ici_df, x_cols, label_col, 'melanoma', method_rename_dict=method_rename_dict, ax=ax[1]) # ROC curve for melanoma
ROCCurve(ici_df, x_cols, label_col, 'NSCLC', method_rename_dict=method_rename_dict, ax=ax[2]) # ROC curve for NSCLC

fig.tight_layout()

In [None]:
### comparison between I, II, and dual

x_cols = ['NP-LandscapeSum-I', 'NP-LandscapeSum-II', 'NP-LandscapeSum']
perf_df = MetricPerformance(ici_df, x_cols, label_col, group_col='cancer')

method_rename_dict={'NP-LandscapeSum-I': 'MHC-I', 'NP-LandscapeSum-II': 'MHC-II', 'NP-LandscapeSum': 'MHC-dual'}
PerformanceBarPlot(perf_df, 'AUROC', method_rename_dict=method_rename_dict, figsize=(5,3)) # bar plot

In [None]:
### survival curve

tasks = [
    ('melanoma', 'OS', 'OS.time', 'TMB'),
    ('melanoma', 'OS', 'OS.time', 'NP-LandscapeSum'),
    ('NSCLC', 'PFS', 'PFS.time', 'TMB'),
    ('NSCLC', 'PFS', 'PFS.time', 'NP-LandscapeSum'),
]
method_rename_dict = {'TMB': 'TMB', 'TNB': 'TNB', 'NP-LandscapeSum(Integrated)': 'NP-Integrated'}

fig, ax = plt.subplots(1, 4, figsize=(16, 3), dpi=dpi)
for i, task in enumerate(tasks):
    cancer, event, duration, method = task
    SurvivalCurvePlot(ici_df, method, cancer, event_col=event, duration_col=duration,
                      method_rename_dict=method_rename_dict, ax=ax[i])
    
fig.tight_layout()

### Clonal analysis

In [None]:
### sGini vs. pGini

g = sns.JointGrid(data=ici_df, x='sGini', y='pGini', hue='cancer', height=4, ratio=4)

# Scatterplot with color (hue) and shape
sns.scatterplot(
    data=ici_df,
    x='sGini',
    y='pGini',
    hue='cancer',
    style=label_col,
    ax=g.ax_joint,
    s=10
)

# Add vertical and horizontal median lines for each hue
for category in ici_df['cancer'].unique():
    subset = ici_df[ici_df['cancer'] == category]
    median_x = subset['sGini'].median()
    median_y = subset['pGini'].median()
    g.ax_joint.axvline(median_x, color=sns.color_palette()[ici_df['cancer'].unique().tolist().index(category)],
                       linestyle='--', alpha=0.5, lw=0.5, label='')
    g.ax_joint.axhline(median_y, color=sns.color_palette()[ici_df['cancer'].unique().tolist().index(category)],
                       linestyle='-.', alpha=0.5, lw=0.5, label='')

# Add marginal plots
sns.histplot(data=ici_df, x='sGini', hue='cancer', stat='probability', common_norm=False, ax=g.ax_marg_x, bins=30, element='step', legend=False)
sns.histplot(data=ici_df, y='pGini', hue='cancer', stat='probability', common_norm=False, ax=g.ax_marg_y, bins=30, element='step', legend=False)

# Adjust legend
handles, labels = g.ax_joint.get_legend_handles_labels()
handles = handles[1:3] + handles[4:6]
labels = labels[1:3] + labels[4:6]
labels = [s.replace('0', 'Negative').replace('1', 'Positive') for s in labels]
legend = g.ax_joint.legend(handles, labels, title='', bbox_to_anchor=(-.2, 1.1), loc='lower left', ncol=2, fontsize=8)
g.fig.add_artist(legend)

In [None]:
### smoking status

nsclc_df = ici_df[ici_df['cancer']=='NSCLC']
hue_order = ['never', 'former', 'current']


# clonal ratio
fig, ax = plt.subplots(1, 1, figsize=(5, 3), dpi=dpi)
sns.kdeplot(data=ici_df, x='%ClonalMuts', hue='smoking_status', hue_order=hue_order,
            common_norm=False, clip=(0, 1), fill=True, ax=ax)
sns.move_legend(ax, loc='lower left', bbox_to_anchor=(0, 1), title='', ncol=3)
fig.tight_layout()

# binding ratio
fig, ax = plt.subplots(1, 1, figsize=(5, 3), dpi=dpi)
sns.kdeplot(data=ici_df, x='%Binding-I', hue='smoking_status', hue_order=hue_order,
            common_norm=False, clip=(0, 1), fill=True, ax=ax)
sns.move_legend(ax, loc='upper left', bbox_to_anchor=(0, 1))
fig.tight_layout()

In [None]:
### melanoma vs. NSCLC

# clonal ratio
fig, ax = plt.subplots(1, 1, figsize=(5, 3), dpi=dpi)
sns.kdeplot(data=ici_df, x='%ClonalMuts', hue='cancer',
            common_norm=False, clip=(0, 1), fill=True, ax=ax)
sns.move_legend(ax, loc='upper left', bbox_to_anchor=(0, 1))
fig.tight_layout()

# binding ratio
fig, ax = plt.subplots(1, 1, figsize=(5, 3), dpi=dpi)
sns.kdeplot(data=ici_df, x='%Binding-I', hue='cancer',
            common_norm=False, clip=(0, 1), fill=True, ax=ax)
sns.move_legend(ax, loc='upper left', bbox_to_anchor=(0, 1))
fig.tight_layout()

### Neoantigen landscape w/ tumor clonality

In [None]:
### performance

x_cols = ['TMB','TNB', 'CSiN', 'ioTNL', 'NP-LandscapeSum', 'NP-LandscapeCCF', 'NP-LandscapeClone']
perf_df = MetricPerformance(ici_df, x_cols, label_col, group_col='cancer')
method_rename_dict = {
    'NP-LandscapeSum': 'NP-Sum',
    'NP-LandscapeCCF': 'NP-CCF',
    'NP-LandscapeClone': 'NP-Clone'
}

fig, ax = plt.subplots(1, 3, figsize=(16, 3), dpi=dpi, gridspec_kw={'width_ratios': [2, 1, 1]})
PerformanceBarPlot(perf_df, 'AUROC', ax=ax[0], method_rename_dict=method_rename_dict, ncol=4) # bar plot
ROCCurve(ici_df, x_cols, label_col, 'melanoma', method_rename_dict=method_rename_dict, ax=ax[1]) # ROC curve for melanoma
ROCCurve(ici_df, x_cols, label_col, 'NSCLC', method_rename_dict=method_rename_dict, ax=ax[2]) # ROC curve for NSCLC

fig.tight_layout()

In [None]:
### group by heterogeneity

split_col1 = 'sGini'
split_col2 = 'pGini'

# performance
x_cols = ['TMB', 'CSiN', 'ioTNL', 'NP-LandscapeSum', 'NP-LandscapeCCF', 'NP-LandscapeClone']
method_rename_dict = {}
perf_df = FourGroupsPerf(ici_df, split_col1, split_col2, x_cols, label_col)

# plot
fig, ax = plt.subplots(2, 1, figsize=(12, 6), dpi=dpi)
cancers = perf_df['cancer'].unique().tolist()
for i, cancer in enumerate(cancers):
    tmp_perf_df = perf_df[perf_df['cancer']==cancer]
    PerformanceBarPlot(tmp_perf_df, 'AUROC', group_col='group', ax=ax[i], method_rename_dict=method_rename_dict, ncol=6)
    ax[i].set_title(cancer, loc='left')
ax[0].set_xticklabels('')
ax[1].set_xticklabels(['low sGini - low pGini',
                       'low sGini - high pGini',
                       'high sGini - low pGini',
                       'high sGini - high pGini'])
ax[1].get_legend().remove()
fig.tight_layout()

### Smoking status

In [None]:
### NSCLC data

cancer_df = ici_df[ici_df['cancer']=='NSCLC']
smoking_order = ['never', 'former', 'current']

In [None]:
### TMB

fig, ax = plt.subplots(1, 1, figsize=(3, 4), dpi=dpi)
sns.barplot(data=cancer_df, x='smoking_status', y='TMB', hue='smoking_status',
            hue_order=smoking_order, order=smoking_order, palette='muted', ax=ax)
fig.tight_layout()

In [None]:
### heterogeneity

fig, ax = plt.subplots(1, 1, figsize=(4,4), dpi=dpi)
sns.histplot(data=cancer_df, x='heterogeneity_group', hue='smoking_status', hue_order=smoking_order, multiple='fill', palette='muted', ax=ax)
ax.set_xlabel('')
ax.set_ylabel('Proportion')
ax.tick_params(axis='x', rotation=90)
sns.move_legend(ax, loc='center left', bbox_to_anchor=(1, 0.5))
fig.tight_layout()

In [None]:
### immunotherapy prediction performance

x_cols = ['TMB', 'CSiN', 'ioTNL', 'NP-LandscapeSum', 'NP-LandscapeCCF', 'NP-LandscapeClone']

perf_df = pd.DataFrame()
for smoke in smoking_order:
    tmp_df = cancer_df[cancer_df['smoking_status']==smoke]
    tmp_perf_df = MetricPerformance(tmp_df, x_cols, label_col)
    tmp_perf_df['smoking_status'] = smoke
    perf_df = pd.concat([perf_df, tmp_perf_df], axis=0)

# plot
fig, ax = plt.subplots(1, 1, figsize=(3, 4), dpi=dpi)
sns.barplot(data=perf_df, x='smoking_status', y='AUROC', hue='method', palette='pastel', ax=ax)
sns.move_legend(ax, title='', ncol=1, loc='lower center', bbox_to_anchor=(0.5, 1))
fig.tight_layout()