# Library Import

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.pyplot import FormatStrFormatter
import scipy.stats as scp
import scipy.optimize as sco

In [None]:
# Toggle as needed
import warnings
warnings.filterwarnings("ignore")

# Load Data

In [None]:
# Set this to your source_data directory
source_data_path = ".../Source Data/"

In [None]:
source_data_path_clinical = source_data_path + "Clinical/"
source_data_path_rna = source_data_path + "RNA/"
source_data_path_ref = source_data_path + "Reference/"

In [None]:
annot_file = 'Table_S1_Clinical_Annotations.xlsx'
su2c_clinical = pd.read_excel(source_data_path_clinical + annot_file,skiprows=2)

In [None]:
su2c_limma_harm = pd.read_csv(source_data_path_rna + 'SU2C-MARK_Harmonized_Limma_All_v1.txt',sep='\t')

In [None]:
su2c_gsea_harm = pd.read_csv(source_data_path_rna + 'SU2C-MARK_Harmonized_GSEA_Hallmark_All_v1.txt',sep='\t')

In [None]:
su2c_rna_harm = pd.read_csv(source_data_path_rna + 'SU2C-MARK_Harmonized_rnaseqc_tpm_v1.gct',skiprows=2,sep='\t')

In [None]:
ifng_file = 'HALLMARK_INTERFERON_GAMMA_RESPONSE.v7.5.1.tsv'
ipro_file = 'Immunoproteasome.tsv'
pro_file = 'GOCC_PROTEASOME_COMPLEX.v7.5.1.tsv'

In [None]:
go_ifng = pd.read_csv(source_data_path_ref + ifng_file).columns.to_list()

In [None]:
go_pro = pd.read_csv(source_data_path_ref + pro_file).columns.to_list()

In [None]:
go_ipro = pd.read_csv(source_data_path_ref + ipro_file).columns.to_list()

# Analysis

## Response and Non-Response Associated Genes

In [None]:
su2c_rna_harm_log = su2c_rna_harm.set_index(['Name','Description']).apply(lambda x: np.log(x+1))

In [None]:
df_response = su2c_clinical[['Harmonized_SU2C_RNA_Tumor_Sample_ID_v2','Harmonized_Confirmed_BOR']]

In [None]:
df_response['Harmonized_Confirmed_BOR'].replace({'PR':1,'CR':1,'SD':0,'PD':0,'NE':np.nan},inplace=True)

In [None]:
su2c_limma_harm_up = su2c_limma_harm[su2c_limma_harm['logFC']>0]

In [None]:
su2c_limma_harm_dn = su2c_limma_harm[su2c_limma_harm['logFC']<0]

In [None]:
top_10_limma_up = su2c_limma_harm_up.sort_values(by='P.Value')['ensembl_gene_id_version'][0:10]

In [None]:
top_10_limma_dn = su2c_limma_harm_dn.sort_values(by='P.Value')['ensembl_gene_id_version'][0:10]

In [None]:
df_rna_response_up = su2c_rna_harm_log.reset_index().set_index('Name').loc[top_10_limma_up].reset_index()\
    .melt(id_vars=['Name','Description'])\
    .merge(df_response[['Harmonized_SU2C_RNA_Tumor_Sample_ID_v2','Harmonized_Confirmed_BOR']],\
           left_on='variable',right_on='Harmonized_SU2C_RNA_Tumor_Sample_ID_v2')

In [None]:
fig,ax = plt.subplots(figsize=(14,6))
g_s = sns.stripplot(data=df_rna_response_up,x='Description',y='value', hue='Harmonized_Confirmed_BOR',\
              dodge=True,ax=ax)

g_b = sns.boxplot(data=df_rna_response_up,x='Description',y='value', hue='Harmonized_Confirmed_BOR',\
                dodge=True,showfliers=False,ax=ax)

box_line_col = sns.color_palette('tab10')[0:2]

for i in range(0,len(g_b.artists)):
    mybox = g_b.artists[i]
    idx = i%2
    box_col = box_line_col[idx]
    mybox.set_edgecolor(box_col)
    mybox.set_facecolor('white')

    for j in range(i*5,i*5+5):
        line = g_b.lines[j]
        line.set_color(box_col)
        line.set_mfc(box_col)
        line.set_mec(box_col)

handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[2:], ['Non-responder (SD/PD)','Responder (PR/CR)'],ncol=2,
          loc='upper center',bbox_to_anchor=(0.5,1.1),frameon=False,fontsize=14)
     
_ = ax.set_xlabel("",font='Arial',weight='bold',fontsize=20,labelpad = 14)
_ = ax.set_ylabel("ln(TPM + 1)",font='Arial',weight='bold',fontsize=20,labelpad=14)
_ = ax.set_xticklabels(ax.get_xticklabels(),font='Arial',weight='bold',style='italic',fontsize=18)
_ = ax.set_yticklabels(np.round(ax.get_yticks(),0),font='Arial',weight='bold',fontsize=18)            

ax.yaxis.set_major_formatter(FormatStrFormatter('%.0f'))
plt.setp(ax.get_legend().get_texts(), font='Arial',fontsize=20,weight='bold')
[ymin,ymax] = ax.get_ylim()
_=plt.ylim(ymin,ymax)

In [None]:
df_rna_response_dn = su2c_rna_harm_log.reset_index().set_index('Name').loc[top_10_limma_dn].reset_index()\
    .melt(id_vars=['Name','Description'])\
    .merge(df_response[['Harmonized_SU2C_RNA_Tumor_Sample_ID_v2','Harmonized_Confirmed_BOR']],\
           left_on='variable',right_on='Harmonized_SU2C_RNA_Tumor_Sample_ID_v2')

In [None]:
fig,ax = plt.subplots(figsize=(14,6))
g_s = sns.stripplot(data=df_rna_response_dn,x='Description',y='value', hue='Harmonized_Confirmed_BOR',\
              dodge=True,ax=ax)

g_b = sns.boxplot(data=df_rna_response_dn,x='Description',y='value', hue='Harmonized_Confirmed_BOR',\
                dodge=True,showfliers=False,ax=ax)

box_line_col = sns.color_palette('tab10')[0:2]

for i in range(0,len(g_b.artists)):
    mybox = g_b.artists[i]
    idx = i%2
    box_col = box_line_col[idx]
    mybox.set_edgecolor(box_col)
    mybox.set_facecolor('white')

    for j in range(i*5,i*5+5):
        line = g_b.lines[j]
        line.set_color(box_col)
        line.set_mfc(box_col)
        line.set_mec(box_col)

handles, labels = ax.get_legend_handles_labels()
ax.legend(handles[2:], ['Non-responder (SD/PD)','Responder (PR/CR)'],ncol=2,loc = 'upper center',
          bbox_to_anchor=(0.5,1.1),frameon=False,fontsize=14)
     
_ = ax.set_xlabel("",font='Arial',weight='bold',fontsize=16,labelpad = 14)
_ = ax.set_ylabel("ln(TPM + 1)",font='Arial',weight='bold',fontsize=20,labelpad=14)
_ = ax.set_xticklabels(ax.get_xticklabels(),font='Arial',weight='bold',style='italic',fontsize=18)
_ = ax.set_yticklabels(np.round(ax.get_yticks(),0),font='Arial',weight='bold',fontsize=18)            

ax.yaxis.set_major_formatter(FormatStrFormatter('%.0f'))
plt.setp(ax.get_legend().get_texts(), font='Arial',fontsize=20,weight='bold')
[ymin,ymax] = ax.get_ylim()
_ = plt.ylim(ymin,ymax)

## Immunoproteasome Subunit Volcano Plot

In [None]:
df_all = su2c_limma_harm
df = df_all
df['hue'] = 1
df_ipro = df[df['hgnc_symbol'].isin(go_ipro)]
df_pro = df[df['hgnc_symbol'].isin(go_pro)]
df_ifng = df[df['hgnc_symbol'].isin(go_ifng)]
df_ifn = df[df['hgnc_symbol'].isin(['IFNG'])]
df_list = [df_ipro, df_pro, df_ifng, df_ifn]
fig,ax = plt.subplots(figsize=(6,6))
palette_list = [sns.color_palette("tab10")[2],sns.color_palette("tab10")[1],sns.color_palette("tab10")[0]]
label_list = ['Immunoproteasome','Proteasome','$\it{IFNG}$ Targets','Interferon Gamma']
alpha_list = [1,1,1,1]

for i in [2,1,0]:
    df = df_list[i]
    x = df['logFC']
    y = df['P.Value']
    h = sns.scatterplot(x=x,y=-np.log10(y),ax=ax,s=100,alpha=alpha_list[i],hue=df['hue'],palette=[palette_list[i]],\
                       legend=None,label=label_list[i])
        
plt.xlim(-2,2)
plt.ylim(0,7.3)
xmin = ax.get_xlim()[0]
xmax = ax.get_xlim()[1]
ymin = ax.get_ylim()[0]
ymax = ax.get_ylim()[1]
plt.hlines(y=-np.log10(.05),xmin=xmin,xmax=xmax,linestyles='dashed',colors='k',alpha=0.3)
plt.vlines(x=0.5,ymin=ymin,ymax=ymax,linestyles='dashed',colors='k',alpha=0.3)
plt.vlines(x=-0.5,ymin=ymin,ymax=ymax,linestyles='dashed',colors='k',alpha=0.3)
plt.xlabel('Coefficient',labelpad=10,fontname='Arial',size=18,fontweight='bold')
plt.ylabel('-log10p',labelpad=10,size=18,fontweight='bold')
_ = ax.set_xlabel("$\mathregular{log_{2}(Fold Change)}$",font='Arial',weight='bold',fontsize=18,labelpad = 14)
_ = ax.set_ylabel("$\mathregular{-log_{10}(p-value)}$",font='Arial',weight='bold',fontsize=20,labelpad=14)
_ = ax.set_xticklabels(ax.get_xticks(),font='Arial',weight='bold',fontsize=20)
_ = ax.set_yticklabels(np.round(ax.get_yticks(),1),font='Arial',weight='bold',fontsize=20)            
ax.yaxis.set_major_formatter(FormatStrFormatter('%.0f'))
ax.xaxis.set_major_formatter(FormatStrFormatter('%.1f'))
_ = plt.legend(title=None,prop={'size':18,'family':'Arial','weight':'bold'},\
           markerscale=1,loc='upper left',frameon=False)

## Immunoproteasome Features

In [None]:
df_all = su2c_limma_harm
df = df_all
df['hue'] = 1
df_ipro = df[df['hgnc_symbol'].isin(go_ipro)]
df_pro = df[df['hgnc_symbol'].isin(go_pro)]
df_ifng = df[df['hgnc_symbol'].isin(go_ifng)]
df_ifn = df[df['hgnc_symbol'].isin(['IFNG'])]

df_ipro['set'] = 'Immunoproteasome'
df_pro['set'] = 'Proteasome'
df_ifng['set'] = 'Interferon Gamma Targets'

df_pool = pd.concat([df_ifng,df_pro,df_ipro],axis=0)

In [None]:
df_pool['signed_log10p'] = np.sign(df_pool['logFC'])*-np.log10(df_pool['P.Value'])

In [None]:
fig,ax = plt.subplots(figsize=(4,5))
g_s = sns.stripplot(data=df_pool,x='set',y='signed_log10p',\
              dodge=True,ax=ax)

g_b = sns.boxplot(data=df_pool,x='set',y='signed_log10p',\
                dodge=True,showfliers=False,ax=ax)

box_line_col = sns.color_palette('tab10')[0:3]

for i in range(0,len(g_b.artists)):
    mybox = g_b.artists[i]
    idx = i%3
    box_col = box_line_col[idx]
    mybox.set_edgecolor(box_col)
    mybox.set_facecolor('white')

    for j in range(i*5,i*5+5):
        line = g_b.lines[j]
        line.set_color(box_col)
        line.set_mfc(box_col)
        line.set_mec(box_col)

handles, labels = ax.get_legend_handles_labels()
     
_ = ax.set_xlabel("",font='Arial',weight='bold',fontsize=16,labelpad = 14)
_ = ax.set_ylabel("signed $\mathregular{log_{10}(p-value)}$",font='Arial',weight='bold',fontsize=14,labelpad=14)
_ = ax.set_xticklabels(['Interferon\nGamma\nTargets','Proteasome\nSubunits','Immuno-\nproteasome\nSubunits'],font='Arial',weight='bold',fontsize=12)
_ = ax.set_yticklabels(np.round(ax.get_yticks(),0),font='Arial',weight='bold',fontsize=12)            

ax.yaxis.set_major_formatter(FormatStrFormatter('%.0f'))
[ymin,ymax] = ax.get_ylim()
_=plt.ylim(ymin,ymax)

## Immunoproteasome Subunit Expression

In [None]:
df_rna_ipro = su2c_rna_harm_log.reset_index().drop('Name',axis=1).set_index('Description').loc[go_ipro+['IFNG','TNF']]

In [None]:
df_response = su2c_clinical[['Harmonized_SU2C_RNA_Tumor_Sample_ID_v2','Harmonized_Confirmed_BOR']]

In [None]:
df_response['Harmonized_Confirmed_BOR'].replace({'PR':'PR/CR','CR':'PR/CR','SD':'SD','PD':'PD','NE':np.nan},inplace=True)

In [None]:
df_rna_ipro_response = df_rna_ipro.T.merge(df_response,left_index=True,right_on='Harmonized_SU2C_RNA_Tumor_Sample_ID_v2')

In [None]:
with sns.plotting_context(rc={"axes.labelsize":14,"font.family":'Arial'}):
    plt.figure(figsize=(10,10))
    pp = sns.pairplot(df_rna_ipro_response.drop('TNF',axis=1),kind='reg')

for ax in pp.axes.flat[::6]:
    ax.set_ylabel(ax.get_ylabel()+'\nln(TPM+1)',style='italic',weight='bold')
    ax.set_ylim(0,7)
    ax.set_yticks([0,2,4,6])
    ax.set_yticklabels(['0','2','4','6'],weight='bold',size=14)
for ax in pp.axes.flat[-6:]:
    ax.set_xlabel(ax.get_xlabel()+'\nln(TPM+1)',style='italic',weight='bold')
    ax.set_xlim(0,7)
    ax.set_xticks([0,2,4,6])
    ax.set_xticklabels(['0','2','4','6'],weight='bold',size=14)

In [None]:
plot_width = 600
plot_height = 800
num_contours = 16

def Plot_Contours(fit_function, vals, params):
    f = plt.figure(figsize=(plot_width/100.0, plot_height/100.0), dpi=100)
    axes = f.add_subplot(111)

    x_vals = vals[0]
    y_vals = vals[1]
    z_vals = vals[2]

    xModel = np.linspace(min(x_vals), max(x_vals), 20)
    yModel = np.linspace(min(y_vals), max(y_vals), 20)
    X, Y = np.meshgrid(xModel, yModel)
    Z = fit_function(np.array([X, Y]), *params)

    axes.set_xlabel('$\it{IFNG}$\nln(TPM + 1)',font='Arial',weight='bold',fontsize=14,labelpad=10) # X axis vals label
    axes.set_ylabel('$\it{TNF}$\nln(TPM + 1)',font='Arial',weight='bold',fontsize=14,labelpad=10) # Y axis vals label
    CS = plt.contourf(X, Y, Z, num_contours, cmap='coolwarm')
    cbar = plt.colorbar(orientation='horizontal')
    cbar.set_label(label='$\it{PSMB8}$ Modeled\nln(TPM + 1)',size=14)
    cbar.ax.tick_params(labelsize=13)
    sns.scatterplot(x=x_vals,y=y_vals, hue=z_vals)
    _ = axes.set_xticklabels(axes.get_xticks(),font='Arial',weight='bold',fontsize=14)
    _ = axes.set_yticklabels(axes.get_yticks(),font='Arial',weight='bold',fontsize=14)            
    axes.legend(title='$\it{PSMB8}$',fontsize=14,title_fontsize=14)    
    plt.show()

def fit_function(vals, a, b, c):
    x = vals[0]
    y = vals[1]
    return (a * x) + (y * b) + c

if __name__ == "__main__":
    x_vals = df_rna_ipro_response['IFNG']
    y_vals = df_rna_ipro_response['TNF']
    z_vals = df_rna_ipro_response['PSMB8']
    vals = [x_vals,y_vals,z_vals]
    
    initialParameters = [1.0, 1.0, 1.0]
    params, pcov = sco.curve_fit(fit_function, [x_vals, y_vals], z_vals, p0 = initialParameters)
    Plot_Contours(fit_function,vals,params)