## Analysis of chrnic infection from a single patient
(https://doi.org/10.1038/s41467-021-26602-3)

In [1]:
import pandas as pd
import altair as alt

In [2]:
# Lineage file prepared with ucsc_voc.ipynb 
v = pd.read_csv('constellations.csv.gz')

In [3]:
cols_d = "Sample,POS,FILTER,REF,ALT,DP,AF,SB,DP4,IMPACT,FUNCLASS,EFFECT,GENE,CODON,AA,TRID,min_AF,max_AF,countunique_change,countunique_FUNCLASS,change".split(',')

In [4]:
cols_m = "accession,base_count,cell_line,cell_type,center_name,checklist,collected_by,collection_date,country,cram_index_ftp,description,experiment_accession,experiment_alias,experiment_title,fastq_aspera,fastq_bytes,fastq_ftp,fastq_md5,first_created,first_public,host,host_sex,host_tax_id,instrument_model,instrument_platform,investigation_type,isolate,isolation_source,last_updated,lat,library_layout,library_name,library_selection,library_source,library_strategy,library_construction_protocol,location,lon,read_count,run_accession,run_alias,sample_accession,sample_alias,sample_description,sample_material,sample_title,sampling_campaign,sampling_platform,sampling_site,scientific_name,sra_aspera,sra_bytes,sra_ftp,sra_md5,strain,study_accession,study_alias,study_title,sub_species,sub_strain,submitted_bytes,submitted_format,submitted_host_sex,submitted_sex,tax_id".split(',')

In [5]:
# Analysis data for data from https://doi.org/10.1038/s41467-021-26602-3
d = pd.read_csv('chronic/PRJEB47786.tsv', sep='\t', names = cols_d, header=0)
m = pd.read_csv('chronic/metadata_PRJEB47786_cleaned.tsv', header=0,sep='\t', names = cols_m)

In [6]:
sample_info = {"acc":'ERR6863927,ERR6863928,ERR6863929,ERR6863930,ERR6863931,ERR6863932,ERR6863933,ERR6863934,ERR6863935,ERR6863936,ERR6863937,ERR6863938'.split(','),
"day":[
    0,
    7,
    14,
    14,
    42,
    56,
    59,
    71,
    105,
    105,
    105,
    140],
"prep":'swab,swab,swab,isolate,swab,swab,swab,swab,swab,isolate,isolate_(del),swab'.split(',')
}

In [7]:
d = d.merge(pd.DataFrame.from_dict(sample_info),left_on='Sample',right_on='acc',how='left')

In [8]:
d['POS'] = d['POS']-1

In [9]:
d = d.merge(m[['collection_date','run_accession']],left_on = 'Sample', right_on='run_accession')

In [10]:
d = d.merge(v,how='left')

In [11]:
d['mut']=d['POS'].astype('str')+d['ALT']

In [12]:
d['collection_date'] = pd.to_datetime(d['collection_date'])

In [13]:
d['y_lab']='Day: ' + d['day'].astype('str')+ " | " + d['prep']

In [14]:
y_order = d.sort_values(by=['day','prep'])['y_lab'].unique()

In [15]:
d = d[['Sample','POS','collection_date','mut','lineage','AA','TRID','EFFECT','AF','y_lab']]

In [16]:
js = d.to_json()

In [17]:
alt.renderers.set_embed_options(actions=True)

source = js

interval = alt.selection_interval()
slider = alt.binding_range(min=.01, max=1, step=.001,name='Allele Frequency')
selector = alt.selection_single(fields=['AF'],
                                   bind=slider,init={'AF': .01})

var = alt.Chart(d).mark_point(opacity=.6,color='red').encode(
    x=alt.X("POS:Q",
            title='Position',
            axis=alt.Axis(grid=False),
            scale=alt.Scale(domain=[0, 30000])
           ),
    y=alt.Y("y_lab:N",
            title='Timepoint',
            axis=alt.Axis(grid=False),
            sort=y_order,
           ),
    size='AF',
    opacity=alt.condition(alt.datum.AF > selector.AF, alt.value(.9), alt.value(.1)),
    color='EFFECT:N',
    tooltip=[
            alt.Tooltip("POS",title="Genome position"),
            alt.Tooltip("AF",title="AF"),
            alt.Tooltip("TRID",title="TRID"),
            alt.Tooltip("AA",title="AA"),
    ]
).properties(
    width=800,
    height=300
).transform_filter(
    interval
).interactive(bind_y=False)

v = var.add_selection(selector)
v = v.add_selection(interval)

voc = alt.Chart(d[d['lineage'].notna()]).mark_text(opacity=.5).encode(
    x=alt.X("AA:N",title='Mutations overlapping with VOCs',
           sort=d[d['lineage'].notna()].sort_values(by='POS')['AA'].unique()),
    y=alt.Y("lineage:N",title="Variants of concern",axis=alt.Axis(grid=True),),
    text='TRID:N',
    opacity=alt.condition(alt.datum.AF > selector.AF, alt.value(.9), alt.value(.1)),
).properties(
    width=800,
    height=250
).add_selection(
    interval
)

v&voc

In [18]:
(v&voc).save('/Users/anton/git/SARS-CoV-2/data/ipynb/graphs/freiburg_chronic.json')