This notebook explores the multiple mutations of NOTCH1 per patient and also de convergent evolution of the NOTCH1 pathway mutations adding FBXW7 variants. The plot corresponds to figure 2 d of the paper 

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatch
import matplotlib.gridspec as gridspec
import seaborn as sns
from aux_data_in_pyvar import config_rcparams, PATS_DIRS
from matplotlib.lines import Line2D

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', -1)
%matplotlib inline

In [None]:
config_rcparams()

In [None]:
genes = {'FBXW7':"#decd87ff",
'NOTCH1':'#d35f5fff'}

errors = ['9_139399325_G_T','9_139400005_G_A','9_139390864_C_G'] # manually inspection of consecutive mutations in BAM revealed two NOTCH1 misalignment errors

In [None]:
# The expected dataframe here is driver_muts_TALL_subsets.tsv that is the output of 
# running ../processing/driver_mutations_TALL.ipynb
df_all = pd.read_csv("/workspace/projects/all_aecc/figures_jul2020/TALL_driver_alterations/driver_muts_TALL_subsets.tsv", sep='\t') 

In [None]:
# clinical
df_info = pd.read_csv("../ext_files/all_cohort_clinical_groups.tsv", sep='\t')
df_info = df_info[df_info['COHORT']=='ADULT TALL AECC PROJECT']

# mutations
dire_out = "" 

dff_all = pd.DataFrame()

for pat in df_info['PATIENT'].unique():
    
    dire_mafs = PATS_DIRS[pat]
    
    # get patient information
    pat_info = df_info[df_info['PATIENT'] == pat].reset_index(drop=True)
    com_pry = pat_info[pat_info['STAGE'] == 'primary']['COMPARISON'].unique()[0]
    com_rel = pat_info[pat_info['STAGE'] == 'relapse']['COMPARISON'].unique()[0]
    print(com_pry) 
    
    # read maf files with clonal information
    df_pry = pd.read_csv(os.path.join(dire_mafs, pat, com_pry, com_pry+'_strelka_uniq_all_anno_vep92_categories_filt_snps_cluster.maf'), sep='\t')
    df_pry.rename(columns={'ccf':'ccf_pry', 'clonal_classification':'clonal_classification_pry'}, inplace=True)
    df_rel = pd.read_csv(os.path.join(dire_mafs, pat, com_rel, com_rel+'_strelka_uniq_all_anno_vep92_categories_filt_snps_cluster.maf'), sep='\t')
    df_rel.rename(columns={'ccf':'ccf_rel', 'clonal_classification':'clonal_classification_rel'}, inplace=True)
    
    df_pry = df_pry[~df_pry['Variant'].isin(errors)]
    df_rel = df_rel[~df_rel['Variant'].isin(errors)]
    
    df_pry['ccf_pry'] = df_pry['ccf_pry'].astype(float)
    df_rel['ccf_rel'] = df_rel['ccf_rel'].astype(float)
    
    # get driver mutations
    df_pat = df_all[df_all['PATIENT'] == pat]
    df_pat = df_pat[df_pat['SYMBOL'].isin(genes.keys())]
    
    # get clonal info
    df_pat = df_pat[['SYMBOL', 'AA_change', 'PATIENT', '#CHROM', 'POS','REF', 'subset']].merge(df_pry[['#CHROM', 'POS', 'REF','ccf_pry','clonal_classification_pry']], how='left', on=['#CHROM', 'POS', 'REF'])
    df_pat = df_pat.merge(df_rel[['#CHROM', 'POS','REF', 'ccf_rel', 'clonal_classification_rel']], how='left', on=['#CHROM', 'POS', 'REF'])
    df_pat = df_pat.fillna(0)
    
    # append all
    dff_all = dff_all.append(df_pat, ignore_index=True, sort=False)

In [None]:
grps = dff_all.groupby('PATIENT')
dff_plot = pd.DataFrame()
   
fig = plt.figure(figsize=(20,2))
outer = gridspec.GridSpec(1, len(grps), wspace=0.1, hspace=0.2)

for i,pat in enumerate(grps.groups):
    print(i)
    df_plot = grps.get_group(pat)
    df_plot = df_plot[['PATIENT', 'subset', 'clonal_classification_pry', 'clonal_classification_rel', 'SYMBOL']]
    df_plot = df_plot.groupby(['subset', 'clonal_classification_pry', 'clonal_classification_rel', 'SYMBOL']).count()
    df_plot = df_plot.reset_index()
    df_plot.rename(columns={'PATIENT':'num'}, inplace=True)
    df_plot['PATIENT'] = pat
    dff_plot = dff_plot.append(df_plot, ignore_index=True, sort=False)

    ax = plt.subplot(outer[i])
    ax.set_xlim(0,2)
    ax.set_ylim(0,2)
    
    #primary
    rect = mpatch.Rectangle(xy=(0, 1.5),width=1,height=0.5,color="#d1e5f0",alpha=1., zorder=3)
    ax.annotate(s="S",xy=(0.5, 1.7),ha='center',va='center',fontweight='bold', fontsize=14)
    ax.add_patch(rect)
    
    rect = mpatch.Rectangle(xy=(1, 1.5),width=1,height=0.5,color="#2c7fb8",alpha=1., zorder=3)
    ax.annotate(s="C",xy=(1.5, 1.7),ha='center',va='center',fontweight='bold', fontsize=14,color="w")
    ax.add_patch(rect)
    
    # relapse
    rect = mpatch.Rectangle(xy=(0, 0),width=1,height=0.5,color="#fddbc7",alpha=1., zorder=3)
    ax.annotate(s="S",xy=(0.5, 0.25),ha='center',va='center',fontweight='bold', fontsize=14)
    ax.add_patch(rect)
    
    rect = mpatch.Rectangle(xy=(1, 0),width=1,height=0.5,color="#fd8d3c",alpha=1., zorder=3)
    ax.annotate(s="C",xy=(1.5, 0.25),ha='center',va='center',fontweight='bold', fontsize=14,color="w")
    ax.add_patch(rect)
    
    for j,rw in df_plot.iterrows():
        if rw['subset'] == 'shared':
            if (rw['clonal_classification_pry'] == 'clonal') and (rw['clonal_classification_rel'] == 'clonal'):
                line1 = [(1.50,0.25), (1.50,1.75)]
                (line1_xs, line1_ys) = zip(*line1)
                ax.add_line(Line2D(line1_xs, line1_ys, linewidth=4, color=genes[rw['SYMBOL']]))
                ax.annotate(s=rw['num'],xy=(1.5, 1),ha='center',va='center',fontweight='bold', fontsize=10)
            elif (rw['clonal_classification_pry'] == 'clonal') and (rw['clonal_classification_rel'] == 'subclonal'):
                line1 = [(0.5,0.25), (1.50,1.75)]
                (line1_xs, line1_ys) = zip(*line1)
                ax.add_line(Line2D(line1_xs, line1_ys, linewidth=4, color=genes[rw['SYMBOL']]))
                ax.annotate(s=rw['num'],xy=(1, 1),ha='center',va='center',fontweight='bold', fontsize=10)
            elif (rw['clonal_classification_pry'] == 'subclonal') and (rw['clonal_classification_rel'] == 'clonal'):
                line1 = [(1.50,0.25), (0.5,1.75)]
                (line1_xs, line1_ys) = zip(*line1)
                ax.add_line(Line2D(line1_xs, line1_ys, linewidth=4, color=genes[rw['SYMBOL']]))
                ax.annotate(s=rw['num'],xy=(1, 1),ha='center',va='center',fontweight='bold', fontsize=10)
            elif (rw['clonal_classification_pry'] == 'subclonal') and (rw['clonal_classification_rel'] == 'subclonal'):
                line1 = [(0.50,0.25), (0.50,1.75)]
                (line1_xs, line1_ys) = zip(*line1)
                ax.add_line(Line2D(line1_xs, line1_ys, linewidth=4, color=genes[rw['SYMBOL']]))
                ax.annotate(s=rw['num'],xy=(0.5, 1),ha='center',va='center',fontweight='bold', fontsize=10)
            else:
                pass
        elif (rw['clonal_classification_pry'] == 'clonal') and (rw['clonal_classification_rel'] == 0):
            line1 = [(0.5,1),(1.50,1.75)]
            (line1_xs, line1_ys) = zip(*line1)
            ax.add_line(Line2D(line1_xs, line1_ys, linewidth=4,color=genes[rw['SYMBOL']], marker='X', markersize=8))
            ax.annotate(s=rw['num'],xy=(0.5,1),ha='center',va='center',fontweight='bold', fontsize=10)
        elif (rw['clonal_classification_pry'] == 'subclonal') and (rw['clonal_classification_rel'] == 0):
            line1 = [(1.25,1),(0.5,1.75)]
            (line1_xs, line1_ys) = zip(*line1)
            ax.add_line(Line2D(line1_xs, line1_ys, linewidth=4,color=genes[rw['SYMBOL']],marker='X', markersize=8))
            ax.annotate(s=rw['num'],xy=(1.25,1),ha='center',va='center',fontweight='bold', fontsize=10)
        elif (rw['clonal_classification_pry'] == 0) and (rw['clonal_classification_rel'] == 'subclonal'):
            line1 = [(0.75,1), (0.25,0.5)]
            (line1_xs, line1_ys) = zip(*line1)
            ax.add_line(Line2D(line1_xs, line1_ys, linewidth=4,color=genes[rw['SYMBOL']], marker='X', markersize=8))
            ax.annotate(s=rw['num'],xy=(0.75,1),ha='center',va='center',fontweight='bold', fontsize=10)
        elif (rw['clonal_classification_pry'] == 0) and (rw['clonal_classification_rel'] == 'clonal'):
            line1 = [(0.75,1), (1.5,0.25)]
            (line1_xs, line1_ys) = zip(*line1)
            ax.add_line(Line2D(line1_xs, line1_ys, linewidth=4,color=genes[rw['SYMBOL']], marker='X', markersize=8))
            ax.annotate(s=rw['num'],xy=(0.75,1),ha='center',va='center',fontweight='bold', fontsize=10)
        else:
            pass
    if i == 0:
        ax.set_xticks([])
        ax.set_yticks([0.25,1.75])
        ax.set_yticklabels(['Relapse','Primary'])
    else:
        ax.set_xticks([])
        ax.set_yticks([])
    ax.tick_params(top='off', left='off', right='off')
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)
    ax.spines['bottom'].set_visible(False)
    ax.set_title(pat)
    
fig.savefig(os.path.join(dire_out, 
                         'clonality_change_main.svg'), 
            dpi=300, bbox_inches='tight')
plt.show()

In [None]:
dff_plot.to_csv("../intermediate_files/notch1_pathway_muts.tsv", sep='\t', index=False)

CAREFUL! Some FBXW7 mutations, here represented as yellow bars, can be overlapped by red ones. To make those visual we finished the final figure with an SVG software. Same with the red bars with upper numbers being >1 for which we manually draw as much bars as the number. 