# Reformat samples for the web

Create data to replace static content of the web:

- gens_js.txt: contains mutations formatted for gens.js
- quiz_js.txt: contains variable names for quiz.js
- quiz_css.txt: contains gene/mutation display and targeted treatments for drivers (if any)

In [22]:
import pandas as pd

### Input and output files 

In [23]:
skin_input_f = 'skin_db.txt'
lung_input_f = 'lung_db.txt'

In [24]:
gens_js_f = 'gens_js.txt'
quiz_js_f = 'quiz_js.txt'
quiz_css_f = 'quiz_css.txt'

In [25]:
# Load mutations
skin_df = pd.read_csv(skin_input_f, sep='\t', header=0)
lung_df = pd.read_csv(lung_input_f, sep='\t', header=0)
mutations_df = pd.concat([skin_df, lung_df])
mutations_df.head(2)

Unnamed: 0,cancer_type,sample_type,sample,number_mutations,drivers,treatments,passengers,total
0,a,1a_2a_3a,1a_2a_3a_4a,3,BRAF_V600E,BRAF_V600E:Vemurafenib,APOC3_L7V;OR51L1_P20S,APOC3_L7V;BRAF_V600E;OR51L1_P20S
1,a,1a_2a_3a,1a_2a_3a_4b,3,BRAF_V600K,BRAF_V600K:Vemurafenib + Cobimetinib,OR13H1_L36H;OR8D2_M145I,BRAF_V600K;OR13H1_L36H;OR8D2_M145I


In [26]:
len(mutations_df)

540

## 1) Gens js

We will generate 270 * 2 (540) rows + 2 headers

In [27]:
lung_info = False
with open(gens_js_f, 'w') as ofd: 
    ofd.write('/* //// Type A (pell) //// */\n')
    for _, row in mutations_df.iterrows(): 

        # Get data
        ctype = row['cancer_type']
        sample = row['sample']
        genes_muts = row['total'].split(';')
        
        # Write lung header if needed
        if ctype == 'b' and not lung_info: 
            ofd.write('/* //// Type B (pulmo) //// */\n')
            lung_info = True
        
        # Reformat
        data = f'var muta_{ctype}_{sample} = \''
        for gene_mut in genes_muts:
            gene, mut = gene_mut.split('_')
            data += f'<ul class="{gene} {mut}"><li class="gen1"><a href="https://www.genecards.org/cgi-bin/carddisp.pl?gene={gene}" target="_blank"></a></li><li class="gen2"></li><li class="gen3"></li><li class="gen4"></li></ul>'    
        data += "';"
        ofd.write(f'{data}\n')

## 2) Quiz js

In [28]:
lung_info = False
with open(quiz_js_f, 'w') as ofd: 
    ofd.write(f'//skin\n')
    for i, row in mutations_df.iterrows(): 
        i += 1
        
        # Get data
        ctype = row['cancer_type']
        sample = row['sample'].split('_')
        ctype_sample_1 = 'gen-' + '-'.join(list(ctype) + sample)
        ctype_sample_2 = 'muta_' + '_'.join(list(ctype) + sample)
        
        # Write lung header if needed
        if ctype == 'b' and not lung_info: 
            ofd.write(f'//lung\n')
            lung_info = True
        
        # Reformat
        var_id = f'{i:03d}'
        var_type = f'type{ctype.upper()}{var_id}'
        data1 = f'var {var_type} = document.getElementById( \'{ctype_sample_1}\' ); if(typeA01) '
        data2 = f'{var_type}.innerHTML += {ctype_sample_2};'
        data = data1 + '{ ' + data2 + ' }'
        
        ofd.write(f'{data}\n')
print(data)       

var typeB270 = document.getElementById( 'gen-b-1c-2c-3c-4j' ); if(typeA01) { typeB270.innerHTML += muta_b_1c_2c_3c_4j; }


## 3) Quiz css

In [29]:
unique_muts_total = set()
for mut_str in mutations_df['total'].tolist(): 
    for mut in mut_str.split(';'): 
        unique_muts_total.add(mut)
unique_muts_total = list(sorted(unique_muts_total))
print(len(unique_muts_total))
unique_muts_total[:10]

213


['ACTRT1_D84N',
 'ADAM30_R503K',
 'AHSG_P358L',
 'AMELX_L6I',
 'APOC3_L7V',
 'AQP2_A190T',
 'ARR3_V155I',
 'ATOH1_G339R',
 'BRAF_V600E',
 'BRAF_V600K']

In [30]:
unique_muts_drivers = set()
for mut_str in mutations_df['drivers'].tolist(): 
    for mut in mut_str.split(';'): 
        unique_muts_drivers.add(mut)
unique_muts_drivers = list(sorted(unique_muts_drivers))
print(len(unique_muts_drivers))
unique_muts_drivers[:10]

20


['BRAF_V600E',
 'BRAF_V600K',
 'CDKN2A_P81L',
 'EGFR_L858R',
 'EGFR_L861Q',
 'HRAS_G12V',
 'KEAP1_C23Y',
 'KEAP1_V155F',
 'KRAS_G12C',
 'KRAS_G12D']

In [31]:
for i in unique_muts_drivers: 
    print(i.split('_')[0] + ':' + i.split('_')[1])

BRAF:V600E
BRAF:V600K
CDKN2A:P81L
EGFR:L858R
EGFR:L861Q
HRAS:G12V
KEAP1:C23Y
KEAP1:V155F
KRAS:G12C
KRAS:G12D
NF1:R1276P
NFE2L2:G31R
NRAS:Q61H
NRAS:Q61K
PTEN:D162G
TP53:L145R
TP53:P151S
TP53:R248G
TP53:R273H
TP53:Y220C


In [32]:
unique_muts_passengers = set()
for mut_str in mutations_df['passengers'].tolist(): 
    for mut in mut_str.split(';'): 
        unique_muts_passengers.add(mut)
unique_muts_passengers = list(sorted(unique_muts_passengers))
print(len(unique_muts_passengers))
unique_muts_passengers[:10]

193


['ACTRT1_D84N',
 'ADAM30_R503K',
 'AHSG_P358L',
 'AMELX_L6I',
 'APOC3_L7V',
 'AQP2_A190T',
 'ARR3_V155I',
 'ATOH1_G339R',
 'BRS3_Y54N',
 'C10orf120_R326L']

In [33]:
mutations_df.head()

Unnamed: 0,cancer_type,sample_type,sample,number_mutations,drivers,treatments,passengers,total
0,a,1a_2a_3a,1a_2a_3a_4a,3,BRAF_V600E,BRAF_V600E:Vemurafenib,APOC3_L7V;OR51L1_P20S,APOC3_L7V;BRAF_V600E;OR51L1_P20S
1,a,1a_2a_3a,1a_2a_3a_4b,3,BRAF_V600K,BRAF_V600K:Vemurafenib + Cobimetinib,OR13H1_L36H;OR8D2_M145I,BRAF_V600K;OR13H1_L36H;OR8D2_M145I
2,a,1a_2a_3a,1a_2a_3a_4c,3,BRAF_V600E;NF1_R1276P;TP53_R248G,BRAF_V600E:Vemurafenib,GAST_D41N,BRAF_V600E;GAST_D41N;NF1_R1276P;TP53_R248G
3,a,1a_2a_3a,1a_2a_3a_4d,3,BRAF_V600E;CDKN2A_P81L,BRAF_V600E:Vemurafenib,SPZ1_E62K,BRAF_V600E;CDKN2A_P81L;SPZ1_E62K
4,a,1a_2a_3a,1a_2a_3a_4e,3,BRAF_V600E;NF1_R1276P,BRAF_V600E:Vemurafenib,OR5J2_S67L,BRAF_V600E;NF1_R1276P;OR5J2_S67L


In [34]:
genes_with_treatments = dict(list([(i.split(':')[0], i.split(':')[1]) for i in mutations_df['treatments'] if type(i) == str]))
genes_with_treatments

{'BRAF_V600E': 'Vemurafenib',
 'BRAF_V600K': 'Vemurafenib + Cobimetinib',
 'EGFR_L858R': 'Erlotinib',
 'EGFR_L861Q': 'Erlotinib'}

In [19]:
len(unique_muts_total)

213

In [36]:
with open(quiz_css_f, 'w') as ofd: 
    
    ofd.write('/* Gen 1 i 2 */\n\n')
    string_1 = ''
    for gene_mut in unique_muts_total: 
        gene, mut = gene_mut.split('_')
        data1 = f'.{gene}.{mut} .gen1 a:after '
        data2 = f'content:"{gene}";'
        data3 = f' .{gene}.{mut} .gen2.active:after '
        data4 = f'content:"{mut}";'
        data = data1 + '{ ' + data2 + ' }' + data3 + '{ ' + data4 + ' }'
        string_1 += data
    ofd.write(string_1)
    
    ofd.write('\n\n/* Gen 3*/\n\n')
    string_3a = ''
    for gene_mut in unique_muts_drivers: 
        gene, mut = gene_mut.split('_')
        string_3a += f'.{gene}.{mut} .gen3.active:after, '
    string_3a += '{ content:"Impulsora"; color:var(--red); }'
    ofd.write(string_3a)
    ofd.write('\n')
    string_3b = ''
    for gene_mut in unique_muts_passengers: 
        gene, mut = gene_mut.split('_')
        string_3b += f'.{gene}.{mut} .gen3.active:after, '
    string_3b += '{ content:"No impulsora"; }'
    ofd.write(string_3b)
    
    ofd.write('\n\n/* Gen 4 no*/\n\n')
    string_4a = ''
    for gene_mut in unique_muts_total: 
        if gene not in genes_with_treatments.keys():
            gene, mut = gene_mut.split('_')
            string_4a += f'.{gene}.{mut} .gen4.active:after, '
    string_4a += '{ content:"Cap"; display:inline-block; background:url("../grafs/gral/supr.svg") no-repeat left center; background-size:20px; padding-left:25px; line-height:30px; color:var(--red); color:rgba(255,255,255,.4); }'
    ofd.write(string_4a)
    ofd.write('\n')
    
    ofd.write('\n\n/* Gen 4 si*/\n\n')
    for gene_mut in genes_with_treatments: 
        gene, mut = gene_mut.split('_')
        treatment = genes_with_treatments[gene_mut]
        string_4b = f'.{gene}.{mut} .gen4.active:after' + '{ content:"' + f'{treatment}' + '"; }'
        ofd.write(string_4b)
    ofd.write('\n')
    