# Lollipop plots

This notebook generates lollipop plots for point mutations observed in PDGFRA and ATM in our cohort.

I.e., Extended Data Fig. 2b,d.

I will first check if all of the variants are in the OncoPanel data; if not, I will check the exome data also.

In [1]:
import os
import numpy as np
import pandas as pd
import palettable

In [3]:
lollipop_muts = pd.read_csv('../data/icb_PDGFRA_and_ATM_mutations.tsv', sep = '\t')
oncopanel_muts = pd.read_csv('../data/oncopanel_mutations.csv', sep = '\t')
patient_sheet = pd.read_csv('../data/participant_cohort_sheet.tsv', sep = '\t')

In [4]:
lollipop_muts.shape

(8, 25)

In [5]:
oncopanel_muts['unique_mut_id'] = oncopanel_muts['CHROMOSOME'] + '_' + oncopanel_muts['POSITION'].astype(str) + '_' + oncopanel_muts['REF_ALLELE'] + '_' + oncopanel_muts['ALT_ALLELE']

In [12]:
lollipop_muts["unique_mut_id"]

0      4_55133621_G_T
1      4_55131142_G_A
2    11_108117799_G_A
3      4_55144148_C_G
4      4_55152092_G_T
5    11_108216609_C_T
6      4_55131162_T_G
7    11_108129712_G_C
Name: unique_mut_id, dtype: object

In [9]:
oncopanel_muts

Unnamed: 0,id_drspatient,id_uniquesample,id_icb,id_mj,VARIANT_CALL_ID,CANONICAL_GENE,BEST_EFF_GENE,CANONICAL_VARIANT_CLASS,BEST_EFF_VARIANT_CLASS,HARMONIZED_HUGO_GENE_NAME,...,COVERAGE,TUMOR_PURITY,PANEL_VERSION,CANONICAL_ENSEMBL_TSCP_ID,CANONICAL_REF_SEQ_TSCP_ID,BEST_EFFECT_ENSEMBL_TSCP_ID,BEST_EFFECT_REF_SEQ_TSCP_ID,HARMONIZED_TRANSCRIPT_ID,MAX_GNOMAD_FREQUENCY,unique_mut_id
0,101954791,1019543883780,,254.0,268637,,,,,PIK3CA,...,122.0,55.0,1.0,,,,NM_006218,ENST00000263967,0,3_178936092_A_C
1,101954646,1019543883873,,209.0,526203,,,,,SF3B1,...,161.0,55.0,1.0,,,,NM_012433,ENST00000335508,,2_198267698_C_T
2,101954597,1019543883876,221.0,192.0,210153,,,,,TP53,...,201.0,65.0,1.0,,,,NM_001126112,ENST00000269305,8.80300000000e-06,17_7578406_C_T
3,101954597,1019543883876,221.0,192.0,210001,,,,,RB1,...,504.0,65.0,1.0,,,,NM_000321,ENST00000267163,,13_48916769_G_GA
4,1019541715,1019543883880,,518.0,253258,,,,,SOX9,...,507.0,60.0,1.0,,,,NM_000346,ENST00000245479,,17_70120388_TC_T
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4251,1043044192,1043044005290,,722.0,6352739,,,,Nonsense,PDGFRB,...,377.0,80.0,3.0,,,,,ENST00000261799,,5_149506089_C_T
4252,1043044192,1043044005290,,722.0,6352694,,,,Frameshift,RAD54B,...,310.0,80.0,3.0,,,,,ENST00000336148,.000058173,8_95404084_CAAGT_C
4253,1043044192,1043044005290,,722.0,6352788,,,,Splice_Donor,RB1,...,172.0,80.0,3.0,,,,,ENST00000267163,,13_49030486_G_A
4254,1043044192,1043044005290,,722.0,6352802,,,,Missense,CBFA2T3,...,228.0,80.0,3.0,,,,,ENST00000268679,,16_88967983_G_C


In [13]:
lollipop_muts_merged = lollipop_muts\
    .merge(oncopanel_muts, how = 'left', on = ['id_icb', 'unique_mut_id'])\
    .merge(patient_sheet[['participant_id_legacy', 'ICB for Newly diagnosed']], left_on = 'Patient_ID', right_on = 'participant_id_legacy')

In [14]:
lollipop_muts_merged.shape

(8, 65)

In [15]:
lollipop_muts_merged[['Hugo_Symbol', 'Patient_ID', 'ICB for Newly diagnosed'] + [x for x in lollipop_muts_merged if 'PROTEIN' in x]].sort_values('Hugo_Symbol')

Unnamed: 0,Hugo_Symbol,Patient_ID,ICB for Newly diagnosed,HARMONIZED_PROTEIN_CHANGE,CANONICAL_PROTEIN_CHANGE,BEST_EFF_PROTEIN_CHANGE
2,ATM,GBM.ICB-109,Recurrent,p.R337H,,
5,ATM,GBM.ICB-73,Recurrent,p.T2853M,,
7,ATM,GBM.ICB-108,Recurrent,,,p.K793_splice
0,PDGFRA,GBM.ICB-2,Recurrent,p.V309F,,
1,PDGFRA,GBM.ICB-53,Recurrent,p.E229K,,
3,PDGFRA,GBM.ICB-21,Newly-diagnosed,p.N659K,,
4,PDGFRA,GBM.ICB-31,Newly-diagnosed,p.D842Y,,
6,PDGFRA,GBM.ICB-127,Newly-diagnosed,p.C235W,,


In [16]:
lollipop_muts_merged['Protein_Change'] = np.where(lollipop_muts_merged['HARMONIZED_PROTEIN_CHANGE'].isnull(),
                                                  lollipop_muts_merged['BEST_EFF_PROTEIN_CHANGE'],
                                                  lollipop_muts_merged['HARMONIZED_PROTEIN_CHANGE'])

In [17]:
diag_colors = palettable.colorbrewer.diverging.PRGn_3.hex_colors
diag_mapping = {'Newly-diagnosed': diag_colors[0], 'Recurrent': diag_colors[2]}

lollipop_muts_merged['color'] = lollipop_muts_merged['ICB for Newly diagnosed'].map(diag_mapping)
lollipop_muts_merged['arg'] = lollipop_muts_merged['Protein_Change'].str.split('.').str[-1] + lollipop_muts_merged['color']
lollipop_muts_merged['arg']

0          V309F#7FBF7B
1          E229K#7FBF7B
2          R337H#7FBF7B
3          N659K#AF8DC3
4          D842Y#AF8DC3
5         T2853M#7FBF7B
6          C235W#AF8DC3
7    K793_splice#7FBF7B
Name: arg, dtype: object

In [18]:
pdgfra_muts = lollipop_muts_merged[lollipop_muts_merged['Hugo_Symbol'] == 'PDGFRA']['arg'].values.tolist()
pdgfra_muts = ' '.join(p.split('.')[-1] for p in pdgfra_muts)

atm_muts = lollipop_muts_merged[lollipop_muts_merged['Hugo_Symbol'] == 'ATM']['arg'].values.tolist()
atm_muts = ' '.join(p.split('.')[-1] for p in atm_muts)

In [19]:
pdgfra_muts

'V309F#7FBF7B E229K#7FBF7B N659K#AF8DC3 D842Y#AF8DC3 C235W#AF8DC3'

In [20]:
atm_muts

'R337H#7FBF7B T2853M#7FBF7B K793_splice#7FBF7B'

These strings can be used to generate lollipop plots via the [lollipops](https://github.com/joiningdata/lollipops) tool.

See their documentation for installation and usage: 

https://github.com/joiningdata/lollipops

In [None]:
# EXAMPLE USAGE:

# png
! ./lollipops\
    -legend\
    -labels\
    -o=PDGFRA.png\
    -w=4000\
    -dpi=450\
    -domain-labels=off\
    PDGFRA {pdgfra_muts}

# svg
! ./lollipops\
    -legend\
    -labels\
    -o=PDGFRA.svg\
    -w=4000\
    -domain-labels=off\
    PDGFRA {pdgfra_muts}

# png
! ./lollipops\
    -legend\
    -labels\
    -o=ATM.png\
    -w=4000\
    -dpi=450\
    -domain-labels=off\
    ATM {atm_muts}

# svg
! ./lollipops\
    -legend\
    -labels\
    -o=ATM.svg\
    -w=4000\
    -domain-labels=off\
    ATM {atm_muts}