This notebook does the needle plot of NOTCH1 of Figure 2 c in the paper. Protein domains were added latter with an SVG software. 

Input of this notebook are the re-annotated NOTCH1 mutations with exons. 

In [None]:
import sys, os
os.environ["PATH"] = os.path.dirname(sys.executable) + os.pathsep + os.environ["PATH"]

In [None]:
import pandas as pd
import numpy as np
import seaborn
import matplotlib.pyplot as plt
import matplotlib.patches as mpatch
import pybedtools
from aux_data_in_pyvar import config_rcparams

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_colwidth', -1)

In [None]:
## FUNCTIONS

def plot_combination(combination_subset, coord_x, ax_grid, coord_y=1):
    size_standard = 100
    #size_standard = 160
    
    if {'shared', 'private_relapse','private_primary'} == combination_subset:
        ax_grid = drawPieMarker(xs=coord_x,
              ys=coord_y,
              ratios=[0.5, 0.5],
              sizes=[size_standard,size_standard],
              colors=['#2c7fb8','#fd8d3c'],
              border_colors = "#006837",
              border_width = 2,
              ax = ax_grid)  
    elif {'shared', 'private_relapse'} == combination_subset:
        ax_grid = drawPieMarker(xs=coord_x,
              ys=coord_y,
              ratios=[0.5, 0.5],
              sizes=[size_standard,size_standard],
              colors=['#FFFFFF','#fd8d3c'],
              border_colors = "#006837",
              border_width = 2,
              ax = ax_grid)  
    elif {'shared', 'private_primary'} == combination_subset:
        ax_grid = drawPieMarker(xs=coord_x,
              ys=coord_y,
              ratios=[0.5, 0.5],
              sizes=[size_standard,size_standard],
              colors=['#2c7fb8','#FFFFFF'],
              border_colors = "#006837",
              border_width = 2,
              ax = ax_grid)  
    elif {'private_primary', 'private_relapse'} == combination_subset:
        ax_grid = drawPieMarker(xs=coord_x,
              ys=coord_y,
              ratios=[0.5, 0.5],
              sizes=[size_standard,size_standard],
              colors=['#2c7fb8','#fd8d3c'],
              border_colors = '#FFFFFF',
              border_width = 0.8,
              ax = ax_grid)  
    elif {'shared'} == combination_subset:
        ax_grid = drawPieMarker(xs=coord_x,
              ys=coord_y,
              ratios=[1],
              sizes=[size_standard],
              colors=['#FFFFFF'],
              border_colors = "#006837",
              border_width = 2,
              ax = ax_grid)
    elif {'private_primary'} == combination_subset:
        ax_grid = drawPieMarker(xs=coord_x,
              ys=coord_y,
              ratios=[0.5, 0.5],
              sizes=[size_standard,size_standard],
              colors=['#2c7fb8','#FFFFFF'],
              border_colors = '#FFFFFF',
              border_width = 0.8,
              ax = ax_grid)
    elif {'private_relapse'} == combination_subset:
        ax_grid = drawPieMarker(xs=coord_x,
              ys=coord_y,
              ratios=[0.5, 0.5],
              sizes=[size_standard,size_standard],
              colors=['#FFFFFF', '#fd8d3c'],
              border_colors = '#FFFFFF',
              border_width = 0.8,
              ax = ax_grid)
    else:
 #       drawPieMarker(xs=coord_x,
 #             ys=1,
 #             ratios=[1],
 #             sizes=[size_standard],
 #             colors=['#FFFFFF'],
 #             border_colors = '#FFFFFF',
 #             ax = ax_grid)
        pass
    return ax_grid

    
def drawPieMarker(xs, ys, ratios, sizes, colors, border_colors, border_width, ax):
    "Adapted from https://stackoverflow.com/questions/56337732/how-to-plot-scatter-pie-chart-using-matplotlib"
    assert sum(ratios) <= 1, 'sum of ratios needs to be < 1'
    
    markers = []
    previous = 0
    # calculate the points of the pie pieces
    for color, ratio in zip(colors, ratios):
        this = 2 * np.pi * ratio + previous
        x  = [0] + np.cos(np.linspace(previous, this, 40)).tolist() + [0]
        y  = [0] + np.sin(np.linspace(previous, this, 40)).tolist() + [0]
        xy = np.column_stack([x, y])
        previous = this
        markers.append({'marker':xy, 's':np.abs(xy).max()**2*np.array(sizes), 'facecolor':color, 
                        'edgecolors':border_colors, 'linewidth':border_width})

    # scatter each of the pie pieces to create pies
    if len(ratios) == 1:
        ax.scatter(xs, ys, marker = 'o',s=np.abs(xy).max()**2*np.array(sizes), facecolor=color, 
                        edgecolors=border_colors, linewidth=border_width)
        #ax.set_ylim(-2,2)
    else:
        for marker in markers: 
            ax.scatter(xs, ys, **marker)
            #ax.set_ylim(-2,2)
    return ax

In [None]:
out_path = "" # path for the figure

In [None]:
## READ NOTCH1 DRIVER MUTATIONS

candidates_notch1 = pd.read_csv("../intermediate_files/notch1_needle_muts/candidate_muts_notch1.tsv", sep='\t')
candidates_notch1['AA_change'] = candidates_notch1.apply(lambda x: x['Amino_acids'].split("/")[0]+x['Protein_position']+x['Amino_acids'].split("/")[1], axis=1)
candidates_notch1['exon'] = candidates_notch1['EXON'].apply(lambda x: x.split("/")[0])
candidates_notch1['exon'] = candidates_notch1['exon'].astype(int)
candidates_notch1.head()

In [None]:
## READ EXON COORDINATES

exons_notch1 = pd.read_csv("../ext_files/notch1_exons.txt", sep='\t')
exons_notch1['length_exon'] = exons_notch1.apply(lambda x: x['Exon region end (bp)'] - x['Exon region start (bp)'], axis=1)

lists = [i for i in range(0,34,1)]
exons_notch1 = exons_notch1.iloc[lists]

exons_notch1.sort_values('Exon region start (bp)', ascending=True, inplace=True)
exons_notch1.head()

In [None]:
## RE-SCALE EXON COORDINATES FOR THE FIGURE

prev_coord = 1

exons_rescale = pd.DataFrame()

for i, rw in exons_notch1.iterrows():
    exons_rescale = exons_rescale.append({'exon_num':rw['Exon rank in transcript'], 'start_exon':prev_coord, 'end_exon':prev_coord+rw['length_exon'], 
                     'original_start_exon':rw['Exon region start (bp)'],
                     'original_end_exon':rw['Exon region end (bp)'],'length':rw['length_exon']}, ignore_index=True)
    prev_coord = prev_coord+rw['length_exon']+1
    
    
exons_rescale[['exon_num','start_exon', 'end_exon', 'original_start_exon', 'original_end_exon', 'length']] = exons_rescale[['exon_num','start_exon', 'end_exon', 'original_start_exon', 'original_end_exon', 'length']].astype(int)
exons_rescale['chrom'] = '9'
exons_rescale = exons_rescale[['chrom','original_start_exon', 'original_end_exon', 'start_exon', 'end_exon', 'length', 'exon_num']]

In [None]:
## MERGE MUTATIONS WITH EXON COORDINATES

muts = pybedtools.BedTool.from_dataframe(candidates_notch1[['#CHROM', 'POS', 'POS','REF', 'ALT', 
                                                            'SYMBOL', 'Consequence', 'AA_change',
                                                            'PATIENT', 'COHORT', 
                                                            'Variant','subset', 'exon']])
exons = pybedtools.BedTool.from_dataframe(exons_rescale)

result = muts.intersect(exons, loj = True)

merged = pd.read_table(result.fn, names=['#CHROM', 'POS', 'END','REF', 'ALT', 'SYMBOL', 'Consequence',
                                         'AA_change','PATIENT', 'COHORT', 'Variant','subset', 'exon', 
                                         'chrom','original_start_exon', 'original_end_exon', 'start_exon', 
                                         'end_exon', 'length', 'exon_num'])
print(len(merged))
merged['length_start_pos'] = merged.apply(lambda x: x['POS']-x['original_start_exon'], axis=1) 
merged['pos_rescaled'] = merged['start_exon']+merged['length_start_pos']
merged[['POS', 'original_start_exon',
       'original_end_exon', 'start_exon', 'end_exon', 'length', 'exon_num',
       'length_start_pos', 'pos_rescaled', 'AA_change']].sort_values("POS", ascending=False)

In [None]:
## SEPARATE BY AGE GROUPS

# ADULT MUTS ABOVE FIGURE
merged_adult = merged[merged['COHORT'] == 'ADULT TALL AECC PROJECT']
merged_adult.sort_values(by=['exon_num', 'pos_rescaled'],ascending=[True, False], inplace=True)
merged_adult.reset_index(inplace=True, drop=True)


# PEDIATRIC MUTS BELOW FIGURE
merged_pedia = merged[merged['COHORT'] != 'ADULT TALL AECC PROJECT']
merged_pedia.sort_values(by=['exon_num', 'pos_rescaled'],ascending=[True, False], inplace=True)
merged_pedia.reset_index(inplace=True, drop=True)

In [None]:
merged_adult[['PATIENT', 'POS','subset','AA_change','exon']].sort_values(by=['PATIENT','exon','POS'], ascending=[True, False,True])

A few adult NOTCH1 mutations with positions that are consecutive seem to be calling errors due to a misalingment of the region. After inspection of the BAM are manually corrected 

In [None]:
errors = ['9_139399325_G_T','9_139400005_G_A','9_139390864_C_G']

merged_adult = merged_adult[~merged_adult['Variant'].isin(errors)]

In [None]:
## FIGURE

fig,ax = plt.subplots(figsize=(20,30))
lenght = exons_rescale[exons_rescale['exon_num'] == 1]['end_exon'].unique()[0]
ax.set_ylim(-40,30)
ax.set_xlim(0,lenght+1)

ax.spines['bottom'].set_linewidth(0.25)
ax.spines['left'].set_linewidth(0.25)
ax.spines['right'].set_visible(False)
ax.spines['top'].set_visible(False)

ax.hlines(xmin=1.,xmax=lenght,y=-0.15,lw=2,color="black",alpha=1.)
for i,r in exons_rescale.iterrows():
    rect = mpatch.Rectangle(xy=(r["start_exon"],-0.3),width=r["length"],height=0.3,
                            color='#fcbba1',alpha=1., zorder=2, ec='black')
    ax.annotate(s=r["exon_num"],xy=((r["length"]/2)+r['start_exon'],-0.3),fontsize=10, ha='center')
    ax.add_patch(rect)

#adults

for i ,rw in merged_adult.iterrows():
    ax = plot_combination({rw['subset']}, coord_x=rw['pos_rescaled'], ax_grid=ax, coord_y=1+i) 
    ax.text(s=rw['AA_change'], x=rw['pos_rescaled'], y=1.5+i, rotation=90, ha='center',  va='bottom')
    ax.vlines(x=rw['pos_rescaled'], ymin=0, ymax=0.9+i, linestyles='dashed', color='#BFBFBF')

#pediatrics    
for j ,rw_2 in merged_pedia.iterrows():
    ax = plot_combination({rw_2['subset']}, coord_x=rw_2['pos_rescaled'], ax_grid=ax, coord_y=-1-j)
    ax.text(s=rw_2['AA_change'], x=rw_2['pos_rescaled'], y=-1.2-j, rotation=90, ha='center',  va='top')
    ax.vlines(x=rw_2['pos_rescaled'], ymax=-0.15, ymin=-0.9-j, linestyles='dashed', color='#BFBFBF')
    
ax.set_yticks([])
ax.set_xticks([])
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['bottom'].set_visible(False)
ax.spines['left'].set_visible(False)

fig.savefig(os.path.join(out_path,"notch1_python.svg"), dpi=100)
plt.show()