In [457]:
import altair as alt
import pandas as pd
import numpy as np
import json

In [483]:
df = pd.read_csv('bhatt_results_nextstrain/concatenated_results_df/results_for_altair.csv')

In [484]:
#make color mapping lists
dom = list(set(df['legible_name']))
rng = []
for d in dom:
    rng.append(list(df[df['legible_name']==d]['color'])[0])

In [485]:
#order to plot on the x-axis
virus_order = ['Measles', 'Mumps', 'Influenza A/H3N2', 
                  'Influenza A/H1N1pdm', 'Influenza B/Vic', 'Influenza B/Yam', 
                  'OC43-A', 'OC43-B', '229E', 'NL63', 'RSV-A', 'RSV-B', 
                  'Dengue 1-I', 'Dengue 1-III', 'Dengue 1-IV', 'Dengue 1-V', 
                  'Dengue 2-AA', 'Dengue 2-AI', 'Dengue 2-AM', 'Dengue 2-C', 
                  'Dengue 3-I', 'Dengue 3-II', 'Dengue 3-III', 'Dengue 4-I', 
                  'Dengue 4-II', 'Zika']

In [486]:
def readin_virus_config(virus):
    config_json = f'config/adaptive_evo_config_{virus}.json'
    with open(config_json) as json_handle:
        configs = json.load(json_handle)
        
    return configs

In [487]:
#get name of each viral gene and make it legible
virus_gene_names = []

#add legible gene names to df
legible_genes = {'polymerase': 'Polymerase', 'membrane_fusion': 'Membrane Fusion', 
                 'receptor_binding':'Receptor Binding'}

all_viruses = list(set(df['virus']))
for v in all_viruses:
    configs = readin_virus_config(v)
    for x in ['polymerase', 'membrane_fusion', 'receptor_binding']:
        gene_name = configs[x]['virus_gene']
        legible_gene =legible_genes[x]
        gene_name_legible = f'{gene_name.upper()} ({legible_gene})'
        virus_gene_names.append({'virus':v, 'gene': x, 'legible_gene_name': gene_name_legible})
        
virus_gene_name_mapper = pd.DataFrame(virus_gene_names)

df = df.merge(virus_gene_name_mapper.reset_index())

In [488]:
#add a column for the url address
map_to_url = {'Measles': 'measles', 'Mumps':'mumps', 'Influenza A/H3N2':'h3n2', 
                  'Influenza A/H1N1pdm': 'h1n1pdm', 'Influenza B/Vic': 'vic', 'Influenza B/Yam': 'yam', 
                  'OC43-A': 'oc43_a', 'OC43-B': 'oc43_b', '229E': '229e', 'NL63':'nl63', 'RSV-A':'rsv_a', 'RSV-B':'rsv_b', 
                  'Dengue 1-I':'denv1_i', 'Dengue 1-III':'denv1_iii', 'Dengue 1-IV':'denv1_iv', 'Dengue 1-V':'denv1_v', 
                  'Dengue 2-AA':'denv2_aa', 'Dengue 2-AI':'denv2_ai', 'Dengue 2-AM':'denv1_am', 'Dengue 2-C':'denv2_c', 
                  'Dengue 3-I':'denv3_i', 'Dengue 3-II':'denv3_ii', 'Dengue 3-III':'denv3_iii', 'Dengue 4-I':'denv4_i', 
                  'Dengue 4-II':'denv4_ii', 'Zika':'zika'}

df['url_ending'] = df['legible_name'].map(map_to_url)


In [489]:
#add column for legible rate
df['legible_rate'] = df['adaptive_subs_per_codon_per_year'].round(2).astype(str)+'×10⁻³'


#subset dataframes to each gene
df_polymerase = df[df['gene']=='polymerase']
df_fusion = df[df['gene']=='membrane_fusion']
df_receptorbinding = df[df['gene']=='receptor_binding']


In [490]:
#get relative sizes of plot for each gene in order to make y-axis scale the same, 
#but truncate axes of polymerase and fusion

p_floor = min(df_polymerase['lower_95ci'])
p_ceiling = max(df_polymerase['upper_95ci'])

f_floor = min(df_fusion['lower_95ci'])
f_ceiling = max(df_fusion['upper_95ci'])

r_floor = min(df_receptorbinding['lower_95ci'])
r_ceiling = max(df_receptorbinding['upper_95ci'])

#relative extent of y-axes (compared to receptor-binding, which has largest range)
r_range = r_ceiling-r_floor
p_relative_range = (p_ceiling-p_floor)/r_range
f_relative_range = (f_ceiling-f_floor)/r_range


In [516]:
#set height and width of plots
standard_width = 600
standard_height = 250


# generate the points
points_p = alt.Chart(df_polymerase).transform_calculate(
    url='https://blab.github.io/atlas-of-viral-adaptation/' + alt.datum.url_ending
).mark_point(
    filled=True,
    size=200, 
    opacity=1
).encode(
    x=alt.X('legible_name:N', sort=virus_order, title=''),
    y=alt.Y('adaptive_subs_per_codon_per_year:Q', title='', axis=alt.Axis(tickMinStep=1)),
    color=alt.Color('legible_name', scale=alt.Scale(domain=dom, range=rng), legend=None),
    href='url:N',
    tooltip=[alt.Tooltip('legible_name:N', title='Virus'),
             alt.Tooltip('legible_gene_name:N', title='Protein/Subunit'), 
             alt.Tooltip('legible_rate', title='Rate')],
).properties(
    width=standard_width,
    height=standard_height*p_relative_range, 
    title='Polymerase'
)

points_r = alt.Chart(df_receptorbinding).transform_calculate(
    url='https://blab.github.io/atlas-of-viral-adaptation/' + alt.datum.url_ending
).mark_point(
    filled=True,
    size=200, opacity=1,
).encode(
    x=alt.X('legible_name:N', sort=virus_order, title=''),
    y=alt.Y('adaptive_subs_per_codon_per_year:Q', title=''),
    color=alt.Color('legible_name', scale=alt.Scale(domain=dom, range=rng), legend=None),
    href='url:N',
    tooltip=[alt.Tooltip('legible_name:N', title='Virus'),
             alt.Tooltip('legible_gene_name:N', title='Protein/Subunit'), 
             alt.Tooltip('legible_rate', title='Rate')],
).properties(
    width=standard_width,
    height=standard_height, 
    title='Receptor-Binding'
)

#generate the error bars
errorbars_p = alt.Chart(df_polymerase).mark_errorbar().encode(
    x=alt.X('legible_name:N', sort=virus_order, title='', axis=alt.Axis(labels=False)),
    y=alt.Y("lower_95ci", title=''),
    y2="upper_95ci", 
    color=alt.Color('legible_name', scale=alt.Scale(domain=dom, range=rng), legend=None),
    tooltip=alt.value(None)
).properties(
    width=standard_width,
    height=standard_height*p_relative_range
)


errorbars_r = alt.Chart(df_receptorbinding).mark_errorbar().encode(
    x=alt.X('legible_name:N', sort=virus_order, title=''),
    y=alt.Y("lower_95ci", title=''),
    y2="upper_95ci",
    color=alt.Color('legible_name', scale=alt.Scale(domain=dom, range=rng), legend=None),
    tooltip=alt.value(None)
).properties(
    width=standard_width,
    height=standard_height
)



#y-axis label for both plots
text = alt.Chart().mark_text(
    align="center",
    baseline="bottom",
    fontSize=16,
    fontWeight=400,
    angle=270,
    color='black'
).encode(
    x=alt.value(10),  # pixels from left
    y=alt.value(170),  # pixels from top
    text=alt.value(['Rate of Adaptation'])
)

#y-axis label for both plots
text2 = alt.Chart().mark_text(
    align="center",
    baseline="bottom",
    fontSize=12,
    fontWeight=400,
    angle=270,
    color='black'
).encode(
    x=alt.value(0),  # pixels from left
    y=alt.value(170),  # pixels from top
    text=alt.value(['Adaptive Mutations per Codon per Year (× 10⁻³)'])
)

#layer the points and error bars
top = alt.layer(points_p, errorbars_p, data=df_polymerase)
bottom = alt.layer(points_r, errorbars_r, data=df_receptorbinding)

#stack the genes
plot_layout = alt.vconcat(top, bottom, spacing=10)

#add the yaxis label
chart = alt.hconcat(text, text2, plot_layout, spacing=0).configure_axis(
    grid=False
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=16,
    anchor='start',
    color='#c6c6c6'
).resolve_scale(
    y='independent',
    x='independent',
)

chart.save('../atlas-of-viral-adaptation/adaptation_overview_plot.html')

chart


In [447]:
#WITH FUSION PROTEIN

#set height and width of plots
standard_width = 600
standard_height = 250


# generate the points
points_p = alt.Chart(df_polymerase).mark_point(
    filled=True,
    size=200, 
    opacity=1
).encode(
    x=alt.X('legible_name:N', sort=virus_order, title=''),
    y=alt.Y('adaptive_subs_per_codon_per_year:Q', title=''),
    color=alt.Color('legible_name', scale=alt.Scale(domain=dom, range=rng), legend=None),
    tooltip=[alt.Tooltip('legible_name:N', title='Virus'),
             alt.Tooltip('legible_gene_name:N', title='Protein/Subunit'), 
             alt.Tooltip('legible_rate', title='Rate')],
).properties(
    width=standard_width,
    height=standard_height*p_relative_range, 
    title='Polymerase'
)

points_f = alt.Chart(df_fusion).mark_point(
    filled=True,
    size=200, opacity=1,
).encode(
    x=alt.X('legible_name:N', sort=virus_order, title=''),
    y=alt.Y('adaptive_subs_per_codon_per_year:Q', title=''),
    color=alt.Color('legible_name', scale=alt.Scale(domain=dom, range=rng), legend=None),
    tooltip=[alt.Tooltip('legible_name:N', title='Virus'),
             alt.Tooltip('legible_gene_name:N', title='Protein/Subunit'), 
             alt.Tooltip('legible_rate', title='Rate')],
).properties(
    width=standard_width,
    height=standard_height*f_relative_range, 
    title='Membrane Fusion'
)

points_r = alt.Chart(df_receptorbinding).mark_point(
    filled=True,
    size=200, opacity=1,
).encode(
    x=alt.X('legible_name:N', sort=virus_order, title=''),
    y=alt.Y('adaptive_subs_per_codon_per_year:Q', title=''),
    color=alt.Color('legible_name', scale=alt.Scale(domain=dom, range=rng), legend=None),
    tooltip=[alt.Tooltip('legible_name:N', title='Virus'),
             alt.Tooltip('legible_gene_name:N', title='Protein/Subunit'), 
             alt.Tooltip('legible_rate', title='Rate')],
).properties(
    width=standard_width,
    height=standard_height, 
    title='Receptor Binding'
)

#generate the error bars
errorbars_p = alt.Chart(df_polymerase).mark_errorbar().encode(
    x=alt.X('legible_name:N', sort=virus_order, title='', axis=alt.Axis(labels=False)),
    y=alt.Y("lower_95ci", title=''),
    y2="upper_95ci", 
    color=alt.Color('legible_name', scale=alt.Scale(domain=dom, range=rng), legend=None),
    tooltip=alt.value(None)
).properties(
    width=standard_width,
    height=standard_height*p_relative_range
)

errorbars_f = alt.Chart(df_fusion).mark_errorbar().encode(
    x=alt.X('legible_name:N', sort=virus_order, title='', axis=alt.Axis(labels=False)),
    y=alt.Y("lower_95ci", title=''),
    y2="upper_95ci",
    color=alt.Color('legible_name', scale=alt.Scale(domain=dom, range=rng), legend=None),
    tooltip=alt.value(None)
).properties(
    width=standard_width,
    height=standard_height*f_relative_range
)

errorbars_r = alt.Chart(df_receptorbinding).mark_errorbar().encode(
    x=alt.X('legible_name:N', sort=virus_order, title=''),
    y=alt.Y("lower_95ci", title=''),
    y2="upper_95ci",
    color=alt.Color('legible_name', scale=alt.Scale(domain=dom, range=rng), legend=None),
    tooltip=alt.value(None)
).properties(
    width=standard_width,
    height=standard_height
)

#y-axis label for all 3 plots
text = alt.Chart().mark_text(
    align="center",
    baseline="bottom",
    fontSize=18,
    fontWeight=400,
    angle=270,
    color='black'
).encode(
    x=alt.value(20),  # pixels from left
    y=alt.value(250),  # pixels from top
    text=alt.value(['Rate of Adaptation', 'Adaptive Mutations per Codon per Year (× 10⁻³)'])
)

#layer the points and error bars
top = alt.layer(points_p, errorbars_p, data=df_polymerase)
middle = alt.layer(points_f, errorbars_f, data=df_fusion)
bottom = alt.layer(points_r, errorbars_r, data=df_receptorbinding)

#stack the genes
plot_layout = alt.vconcat(top, middle, bottom, spacing=10)

#add the yaxis label
chart = alt.hconcat(text, plot_layout).configure_axis(
    grid=False
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=16,
    anchor='start',
    color='#c6c6c6'
).resolve_scale(
    y='independent',
    x='independent',
)

# chart.save('overview_plot.html')

chart