# Code for muts.py

## Previous code at muts.py

In [1]:
import functools
import random

ALT_COLOR_MAP = {'C>A': '#1fbdef',
                 'C>G': '#0d1011',
                 'C>T': '#e52b29',
                 'T>A': '#cac8ca',
                 'T>C': '#a0cd64',
                 'T>G': '#ecc6c3'}


PASSENGERS = ['APP', 'BUB3', 'ELN', 'GDF11', 'HDAC3', 'HSP90AA1', 'IKBKB', 'MAPK14',
              'MT-CO1', 'NFKBIA', 'POU1F1', 'PPARGC1A', 'RECQL4', 'TOP2B',
              'VEGFA', 'YWHAZ']


class Mela:
    alts = ['C>A'] * 6 + ['C>G'] * 6 + ['C>T'] * 70 + ['T>A'] * 6 + ['T>C'] * 6 + ['T>G'] * 6
    drivers = ['BRAF', 'NRAS', 'ANK3', 'MLL3', 'BAP1', 'CDKN2A', 'SVEP1', 'MECOM', 'MAP2K1', 'NF1']
    passengers = PASSENGERS
    driver_drug_map = {'BRAF': 'Sorafenib', 'NRAS': 'MEK inhibitors', 'ANK3': '',
                       'MLL3': '', 'BAP1': 'HDAC inhibitors', 'CDKN2A': 'CDK4/6 inhibitors',
                       'SVEP1': '', 'MECOM': '', 'MAP2K1': 'ERK inhibitors',
                       'NF1': 'PD1 Ab inhibitors'}


class Lung:
    alts = ['C>A'] * 50 + ['C>G'] * 11 + ['C>T'] * 11 + ['T>A'] * 11 + ['T>C'] * 11 + ['T>G'] * 6
    drivers = ['TP53', 'EGFR', 'MLL2', 'FGFR2', 'PIK3CA', 'CDKN2A', 'NF1', 'PTEN', 'NOTCH1', 'ARID1A', 'RB1']
    passengers = PASSENGERS
    driver_drug_map = {'TP53': 'HSP90 inhibitors', 'EGFR': 'Erlotinib', 'MLL2': 'Bicalutamide',
                       'FGFR2': 'FGFR inhibitors', 'PIK3CA': '', 'CDKN2A': 'Ilorasertib',
                       'NF1': '', 'PTEN': 'Sirolimus', 'NOTCH1': 'OMP-52M51',
                       'ARID1A': 'ATR inhibitors', 'RB1': '',
                       'DDR2': 'Dasatinib', 'EPHA2': 'MTOR inhibitors'}


MELA = Mela()
LUNG = Lung()


@functools.lru_cache(50)
def get(ttype, n, code=None):

    if ttype == 'skin':
        cancer = MELA
    elif ttype == 'lung':  # lung
        cancer = LUNG
    else:
        raise NotImplementedError

    result = []
    for i in range(n-1):
        d = {'id': i}

        # 30% change to be a driver except for the 1st (to ensure, at least, 1 driver)
        driver = True if i == 0 else random.random() >= 0.7

        if driver:
            d['driver'] = True
            gene = random.choice(cancer.drivers)
            d['gene'] = gene
            d['drug'] = cancer.driver_drug_map[gene]
        else:
            d['gene'] = random.choice(cancer.passengers)

        alt = random.choice(cancer.alts)
        d['alt'] = alt
        d['color'] = ALT_COLOR_MAP[alt]

        result.append(d)

    return result

## New code

In [3]:
df = pd.read_csv('../static/data/code/mutations_db.tsv.gz',sep='\t')
df

Unnamed: 0,cancer_type,mutation_id,gene,aa_change,dna_change,driver_passenger,og_tsg,targeted_therapy,targeted_therapy_approved
0,lung,FBXW7_R505P,FBXW7,R505P,4:152326136_C>G,driver,tsg,,False
1,lung,FAM135B_G186E,FAM135B,G186E,8:138243054_C>T,driver,og,,False
2,lung,ERBB3_C331R,ERBB3,C331R,12:56088750_T>C,driver,og,,False
3,lung,PTEN_R130G,PTEN,R130G,10:87933147_C>G,driver,tsg,,False
4,lung,PTEN_C136R,PTEN,C136R,10:87933165_T>C,driver,tsg,,False
...,...,...,...,...,...,...,...,...,...
1236,skin,MSMB_A103T,MSMB,A103T,chr10:46033460_C>T,passenger,unknown,,False
1237,skin,ACSM2B_D391N,ACSM2B,D391N,chr16:20546402_C>T,passenger,unknown,,False
1238,skin,TAAR5_P143S,TAAR5,P143S,chr6:132589260_G>A,passenger,unknown,,False
1239,skin,SLC17A3_G135R,SLC17A3,G135R,chr6:25861930_C>T,passenger,unknown,,False


Consensus to randomize mutations:
- First 3 mutations: 1 driver with therapy,  2 passengers
- Next 1-6 mutations: 70% passenger, 30% driver without therapy
- Keep only one mutation per gene

In [109]:
import click
import pandas as pd
import random
import os

@click.command()

@click.option('--cancer_type',
              '-c',
              required = True,
              help="Cancer type: must be 'lung' or 'skin'")
@click.option('--number_muts',
              '-n',
              required = False,
              default = 0
              help="Cancer type: must be 'lung' or 'skin'")
@click.option('--dir_path',
              '-d',
              required = True,
              default = False
              help="Cancer type: must be 'lung' or 'skin'")

def cli(cancer_type, n, dir_path):
    
    if n_muts == 0:
        n = random.choice(list(range(1,7))

    #Get database
    df = pd.read_csv('../static/data/code/mutations_db.tsv.gz',sep='\t')

    #Select cancer type alterations
    df_ct = df[df['cancer_type']==cancer_type]

    #Create final df with list of mutations
    final_df = pd.DataFrame()
    for i in range(n):
        # 30% change to be a driver except for the 1st (to ensure, at least, 1 driver)
        driver = True if i == 0 else random.random() >= 0.7
        if driver:
            if i == 0:
                #Get the first driver with therapy
                df_drivers_therapy = df_ct[(df_ct['driver_passenger']=='driver')&(df_ct['targeted_therapy']!='None')]
                driver_therapy = df_drivers_therapy.sample()
                final_df = pd.concat([final_df,driver_therapy])
            else:
                stop = False
                while stop == False:
                    #Get the other drivers
                    df_drivers = df_ct[df_ct['driver_passenger']=='driver']
                    drivers = df_drivers.sample()
                    if drivers['gene'].to_list() not in final_df['gene'].to_list():
                        stop = True
                final_df = pd.concat([final_df,drivers])
        else:
            stop = False
            while stop == False:
                #Get the other drivers
                df_passengers = df_ct[df_ct['driver_passenger']=='passenger']
                passengers = df_passengers.sample()
                if passengers['gene'].to_list() not in final_df['gene'].to_list():
                    stop = True
            final_df = pd.concat([final_df,passengers])
                          
    if dir_path == False:
        dir_path = os.getcwd()

    final_df.to_csv(dir_path+'results.tsv',sep='\t',index=False)
                          
if __name__ == '__main__':
    cli()

Unnamed: 0,cancer_type,mutation_id,gene,aa_change,dna_change,driver_passenger,og_tsg,targeted_therapy,targeted_therapy_approved
174,lung,EGFR_L858R,EGFR,L858R,7:55191822_T>G,driver,og,Erlotinib,True
496,lung,TP53_R174W,TP53,R174W,17:7675092_T>A,driver,tsg,,False
337,lung,FAT3_H2306N,FAT3,H2306N,11:92800379_C>A,driver,unknown,,False
324,lung,SMARCA4_G1162C,SMARCA4,G1162C,19:11030831_G>T,driver,tsg,,False
437,lung,FAM135B_R1289G,FAM135B,R1289G,8:138139022_G>C,driver,og,,False


In [146]:
n_muts = 6
cancer_type = 'lung'

if n_muts == False:
    n = random.choice(list(range(1,7)))

#Get database
df = pd.read_csv('../static/data/code/mutations_db.tsv.gz',sep='\t')

#Select cancer type alterations
df_ct = df[df['cancer_type']==cancer_type]

#Create final df with list of mutations
final_df = pd.DataFrame()
for i in range(n):
    # 30% change to be a driver except for the 1st (to ensure, at least, 1 driver)
    driver = True if i == 0 else random.random() >= 0.7
    if driver:
        if i == 0:
            #Get the first driver with therapy
            df_drivers_therapy = df_ct[(df_ct['driver_passenger']=='driver')&(df_ct['targeted_therapy']!='None')]
            driver_therapy = df_drivers_therapy.sample()
            final_df = pd.concat([final_df,driver_therapy])
        else:
            stop = False
            while stop == False:
                #Get the other drivers
                df_drivers = df_ct[df_ct['driver_passenger']=='driver']
                drivers = df_drivers.sample()
                if drivers['gene'].to_list() not in final_df['gene'].to_list():
                    stop = True
            final_df = pd.concat([final_df,drivers])
    else:
        stop = False
        while stop == False:
            #Get the other drivers
            df_passengers = df_ct[df_ct['driver_passenger']=='passenger']
            passengers = df_passengers.sample()
            if passengers['gene'].to_list() not in final_df['gene'].to_list():
                stop = True
        final_df = pd.concat([final_df,passengers])
final_df

Unnamed: 0,cancer_type,mutation_id,gene,aa_change,dna_change,driver_passenger,og_tsg,targeted_therapy,targeted_therapy_approved
165,lung,BRAF_V600E,BRAF,V600E,7:140753336_A>T,driver,og,Dabrafenib,False
544,lung,OR14K1_L172I,OR14K1,L172I,chr1:247739128_C>A,passenger,unknown,,False
571,lung,GIP_G82W,GIP,G82W,chr17:48964323_C>A,passenger,unknown,,False
550,lung,OR7E24_H262Q,OR7E24,H262Q,chr19:9251829_C>A,passenger,unknown,,False
314,lung,LRP1B_F4531L,LRP1B,F4531L,2:140234852_G>T,driver,tsg,,False


In [32]:
df_ct[(df_ct['driver_passenger']=='driver')&(df_ct['targeted_therapy']!='None')]

Unnamed: 0,cancer_type,mutation_id,gene,aa_change,dna_change,driver_passenger,og_tsg,targeted_therapy,targeted_therapy_approved
134,lung,EGFR_L861Q,EGFR,L861Q,7:55191831_T>A,driver,og,Afatinib,False
165,lung,BRAF_V600E,BRAF,V600E,7:140753336_A>T,driver,og,Dabrafenib,False
174,lung,EGFR_L858R,EGFR,L858R,7:55191822_T>G,driver,og,Erlotinib,True
175,lung,EGFR_T790M,EGFR,T790M,7:55181378_C>T,driver,og,"Rociletinib,HM61713",False
177,lung,EGFR_G719A,EGFR,G719A,7:55174015_G>C,driver,og,Afatinib,False
266,lung,EGFR_G719S,EGFR,G719S,7:55174014_G>A,driver,og,Afatinib,False
269,lung,EGFR_S768I,EGFR,S768I,7:55181312_G>T,driver,og,Afatinib,False
295,lung,BRAF_G466V,BRAF,G466V,7:140781611_C>A,driver,og,Dasatinib,False


In [149]:
df.groupby('targeted_therapy').count()

Unnamed: 0_level_0,cancer_type,mutation_id,gene,aa_change,dna_change,driver_passenger,og_tsg,targeted_therapy_approved
targeted_therapy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Afatinib,4,4,4,4,4,4,4,4
Dabrafenib,1,1,1,1,1,1,1,1
Dabrafenib;Trametinib,1,1,1,1,1,1,1,1
Dasatinib,1,1,1,1,1,1,1,1
Erlotinib,1,1,1,1,1,1,1,1
,1229,1229,1229,1229,1229,1229,1229,1229
"Rociletinib,HM61713",1,1,1,1,1,1,1,1
Sorafenib,1,1,1,1,1,1,1,1
Vemurafenib,2,2,2,2,2,2,2,2
