In [125]:
import altair as alt
import pandas as pd
import numpy as np
import json

In [126]:
df = pd.read_csv('bhatt_results_nextstrain/concatenated_results_df/results_for_altair.csv')

In [127]:
#order to plot on the x-axis
#by default, only plot one point for each Dengue subtype (choose the one with the most samples)
virus_order = ['Measles', 'Mumps', 'Parainfluenza-1', 'Parainfluenza-3', 'Influenza A/H3N2', 
                  'Influenza A/H1N1pdm', 'Influenza B/Vic', 'Influenza B/Yam', 'Influenza C/Yamagata', 
                  'OC43-A', '229E', 'NL63', 'RSV-A', 'RSV-B', 
                  'Dengue 1-I', 'Dengue 2-AA', 'Dengue 3-III', 'Dengue 4-II', 
               'Rotavirus A/P[8]', 'Rotavirus A/P[4]', 'Norovirus GII.4', 'Enterovirus D68', 'Hepatitis A-IA',
               'Parvovirus B19-1A', 'Adenovirus B-7', 'Adenovirus B-3', 'Hepatitis B-A2', 'Hepatitis B-D3']

In [128]:
def readin_virus_config(virus):
    config_json = f'config/adaptive_evo_config_{virus}.json'
    with open(config_json) as json_handle:
        configs = json.load(json_handle)
        
    return configs

In [129]:
#get name of each viral gene and make it legible
virus_gene_names = []

#add legible gene names to df
legible_genes = {'polymerase': 'Polymerase', 'membrane_fusion': 'Membrane Fusion', 
                 'receptor_binding':'Receptor Binding'}

rota_segment_to_gene = {'A':'NSP1', 'C':'VP2', 'E':'NSP4', 'G':'VP7', 'I':'VP6', 
                        'M': 'VP3', 'N': 'NSP2', 'P':'VP4', 'R':'VP1', 'T':'NSP3'}

#manually capitalize gene names that should have letters other than just the first capitalized
proper_capitalization_genes = {'pb1':'PB1', 'ha2':'HA2', 'ha1':'HA1', 'rdrp':'RdRp'}

all_viruses = list(set(df['virus']))
for v in all_viruses:
    configs = readin_virus_config(v)
    for x in ['polymerase', 'membrane_fusion', 'receptor_binding']:
        gene_name = configs[x]['virus_gene']
        if 'rotavirus' in v:
            if gene_name!='None':
                gene_name = rota_segment_to_gene[gene_name.upper()]
        legible_gene =legible_genes[x]
        if gene_name in proper_capitalization_genes.keys():
            gene_name = proper_capitalization_genes[gene_name]
        gene_name_legible = f'{gene_name[0].upper()+gene_name[1:]} ({legible_gene})'
        virus_gene_names.append({'virus':v, 'gene': x, 'legible_gene_name': gene_name_legible})
        
virus_gene_name_mapper = pd.DataFrame(virus_gene_names)

df = df.merge(virus_gene_name_mapper.reset_index())

#capitalize virus family
df['virus_family'] = df['virus_family'].str.capitalize()

In [130]:
#add information about genome type
virus_genome_types = []
for v in all_viruses:
    configs = readin_virus_config(v)
    genome_type = configs['genome_type']
    enveloped = configs['enveloped']
    transmission = configs['transmission']
    virus_genome_types.append({'virus':v, 'genome_type': genome_type, 'enveloped': enveloped, 'transmission': transmission})
    
genome_type_mapper = pd.DataFrame(virus_genome_types)

df = df.merge(genome_type_mapper, on= 'virus')

In [131]:
#add a column for the url address
map_to_url = {'Measles': 'measles', 'Mumps':'mumps', 
              'Parainfluenza-1':'parainfluenza_1', 'Parainfluenza-3':'parainfluenza_3',
              'Influenza A/H3N2':'h3n2', 
                  'Influenza A/H1N1pdm': 'h1n1pdm', 'Influenza B/Vic': 'vic', 'Influenza B/Yam': 'yam', 
              'Influenza C/Yamagata': 'influenzaC_yamagata', 
                  'OC43-A': 'oc43_a', 'OC43-B': 'oc43_b', '229E': '229e', 'NL63':'nl63', 
              'Lassa-A':'lassa_a', 'Lassa-B':'lassa_b', 'RSV-A':'rsv_a', 'RSV-B':'rsv_b', 
              'HepatitisC-1a1':'hepC_1a1', 'HepatitisC-1a2':'hepC_1a2', 'HepatitisC-1b':'hepC_1b', 'HepatitisC-3a':'hepC_3a',
                  'Dengue 1-I':'denv1_i', 'Dengue 1-III':'denv1_iii', 'Dengue 1-IV':'denv1_iv', 'Dengue 1-V':'denv1_v', 
                  'Dengue 2-AA':'denv2_aa', 'Dengue 2-AI':'denv2_ai', 'Dengue 2-AM':'denv1_am', 'Dengue 2-C':'denv2_c', 
                  'Dengue 3-I':'denv3_i', 'Dengue 3-II':'denv3_ii', 'Dengue 3-III':'denv3_iii', 'Dengue 4-I':'denv4_i', 
                  'Dengue 4-II':'denv4_ii', 'Rotavirus A/P[8]':'rotavirusA_g1p8', 'Rotavirus A/P[4]':'rotavirusA_g1p4', 
              'Enterovirus D68': 'enterovirusd68', 'Hepatitis A-IA': 'hepatitisA_IA',
                'Norovirus GII.4': 'norovirus_gii4', 'Parvovirus B19-1A':'parvovirusB19', 
              'Adenovirus B-7':'adenovirusB7', 'Adenovirus B-3':'adenovirusB3', 
              'Hepatitis B-A2':'hepatitisB_A2', 'Hepatitis B-D3':'hepatitisB_D3'}

df['url_ending'] = df['legible_name'].map(map_to_url)


In [132]:
#make x-axis values (to allow squishing viruses closer together or spreading them apart)
x_tick_pos = []

all_xticks = []

last_coord = 0

regular_spacing = 3
squished_spacing = 1

#separate enveloped and non-enveloped viruses by a bit more
#keep track of where this is to add a vertical line

#first non-enveloped virus
# first_nonenveloped = 'Enterovirus D68'
first_nonenveloped = 'Rotavirus A/P[8]'

#first DNA virus
first_DNA = 'Parvovirus B19-1A'


dengue_serotypes_already_seen = []
for x in virus_order:
    if x == first_nonenveloped:
        enveloped_vert_divider = last_coord+regular_spacing
        tick_pos = last_coord+regular_spacing*2
    elif x == first_DNA:
        dna_vert_divider = last_coord+regular_spacing
        tick_pos = last_coord+regular_spacing*2
    else:
        tick_pos = last_coord+regular_spacing
        
    last_coord = tick_pos
        
    all_xticks.append(tick_pos)
    x_tick_pos.append({'legible_name': x, 'xtick_pos': tick_pos})
            


xtick_df = pd.DataFrame(x_tick_pos)

df = df.merge(xtick_df, on='legible_name')

df = df.drop(columns=['index'])

In [133]:
#add filler row for viruses that don't have polymerase
# filler_df_row = {'virus': 'parvovirusB19', 'subtype':'NaN', 'virus_and_subtype': 
#                               'parvovirusB19', 'gene': 'polymerase', 'adaptive_subs_per_codon_per_year': 0.2, 
#                               'lower_95ci':0.0, 'upper_95ci':0.0, 'ci':[0.0,0.0], 'len_in_codons':0, 
#                               'adaptive_muts_per_year':0.2, 'lower_95ci_mutspergene':0.0,
#                               'upper_95ci_mutspergene':0.0, 'legible_name':'Parvovirus B19-1A', 
#                               'virus_family': 'parvovirus', 'color': '#FFFFFF', 
#                               'legible_gene_name':'None', 'genome_type':'Single-stranded DNA', 'enveloped':'False',
#                               'transmission':'Respiratory', 'url_ending':'parvovirusB19', 'xtick_pos':72, 
#                               'legible_rate_percodon':0, 'legible_rate_pergene': 0}

# df  = df.append(filler_df_row, ignore_index=True)

In [134]:
#make color mapping lists
#for polymerase plot
dom_p = list(set(df[df['gene']=='polymerase']['legible_name']))
rng_p = []
for d in dom_p:
    rng_p.append(list(df[(df['gene']=='polymerase')&(df['legible_name']==d)]['color'])[0])
    
#for receptor-binding plot
dom_r = list(set(df[df['gene']=='receptor_binding']['legible_name']))
rng_r = []
for d in dom_r:
    rng_r.append(list(df[(df['gene']=='receptor_binding')&(df['legible_name']==d)]['color'])[0])
    

In [135]:
#make labelExpr
label_expr_str = ""

for i in range(len(x_tick_pos)):
    x = x_tick_pos[i]
    if i==len(x_tick_pos)-1:
        label_expr_str+=f"'{x['legible_name']}'"
    else:
        label_expr_str+=f"datum.label == {x['xtick_pos']} ? '{x['legible_name']}' : "

In [136]:
#add column for legible rate
df['legible_rate_percodon'] = df['adaptive_subs_per_codon_per_year'].round(2).astype(str)+'×10⁻³ muts per codon per year'
df['legible_rate_pergene'] = df['adaptive_muts_per_year'].round(2).astype(str)+' muts per year'


#subset dataframes to each gene
df_polymerase = df[df['gene']=='polymerase']
df_fusion = df[df['gene']=='membrane_fusion']
df_receptorbinding = df[df['gene']=='receptor_binding']


In [137]:
#get relative sizes of plot for each gene in order to make y-axis scale the same, 
#but truncate axes of polymerase and fusion

p_floor = min(df_polymerase['lower_95ci'])
p_ceiling = max(df_polymerase['upper_95ci'])

f_floor = min(df_fusion['lower_95ci'])
f_ceiling = max(df_fusion['upper_95ci'])

r_floor = min(df_receptorbinding['lower_95ci'])
r_ceiling = max(df_receptorbinding['upper_95ci'])

#relative extent of y-axes (compared to receptor-binding, which has largest range)
r_range = r_ceiling-r_floor
p_relative_range = (p_ceiling-p_floor)/r_range
f_relative_range = (f_ceiling-f_floor)/r_range


In [138]:
#set height and width of plots
standard_width = 850
standard_height = 250


# generate the points
points_p = alt.Chart(df_polymerase).transform_calculate(
    url='https://blab.github.io/atlas-of-viral-adaptation/' + alt.datum.url_ending
).mark_point(
    filled=True,
    size=200, 
    opacity=1
).encode(
    x=alt.X('xtick_pos:Q', title='', 
            axis=alt.Axis(values = all_xticks, labelExpr=label_expr_str, tickCount=len(all_xticks))),
    y=alt.Y('adaptive_subs_per_codon_per_year:Q', title='', axis=alt.Axis(format=".1f", tickMinStep=0.5)),
    color=alt.Color('legible_name:N', scale=alt.Scale(domain=dom_p, range=rng_p), legend=None),
    href='url:N',
    tooltip=[alt.Tooltip('legible_name:N', title='Virus'),
             alt.Tooltip('virus_family:N', title='Family'),
             alt.Tooltip('transmission:N', title='Transmission'),
             alt.Tooltip('genome_type:N', title='Genome Type'),
             alt.Tooltip('enveloped:N', title='Enveloped'),
             alt.Tooltip('legible_gene_name:N', title='Protein/Subunit'), 
             alt.Tooltip('legible_rate_percodon', title='Rate')],
).properties(
    width=standard_width,
    height=standard_height*p_relative_range, 
    title='Polymerase'
)

points_r = alt.Chart(df_receptorbinding).transform_calculate(
    url='https://blab.github.io/atlas-of-viral-adaptation/' + alt.datum.url_ending
).mark_point(
    filled=True,
    size=200, opacity=1,
).encode(
    x=alt.X('xtick_pos:Q', title='', 
            axis=alt.Axis(values = all_xticks, labelExpr=label_expr_str, tickCount=len(all_xticks))),
    y=alt.Y('adaptive_subs_per_codon_per_year:Q', title='', axis=alt.Axis(format=".1f")),
    color=alt.Color('legible_name:N', scale=alt.Scale(domain=dom_r, range=rng_r), legend=None),
    href='url:N',
    tooltip=[alt.Tooltip('legible_name:N', title='Virus'),
             alt.Tooltip('virus_family:N', title='Family'),
             alt.Tooltip('transmission:N', title='Transmission'),
             alt.Tooltip('genome_type:N', title='Genome Type'),
             alt.Tooltip('enveloped:N', title='Enveloped'),
             alt.Tooltip('legible_gene_name:N', title='Protein/Subunit'), 
             alt.Tooltip('legible_rate_percodon', title='Rate')],
).properties(
    width=standard_width,
    height=standard_height, 
    title='Receptor-Binding'
)

#generate the error bars
errorbars_p = alt.Chart(df_polymerase).mark_errorbar().encode(
    x=alt.X('xtick_pos:Q', sort=virus_order, title='', axis=alt.Axis(labels=False)),
    y=alt.Y("lower_95ci", title=''),
    y2="upper_95ci", 
    color=alt.Color('legible_name:N', scale=alt.Scale(domain=dom_p, range=rng_p), legend=None),
    tooltip=alt.value(None)
).properties(
    width=standard_width,
    height=standard_height*p_relative_range
)


errorbars_r = alt.Chart(df_receptorbinding).mark_errorbar().encode(
    x=alt.X('xtick_pos:Q', sort=virus_order, title=''),
    y=alt.Y("lower_95ci", title=''),
    y2="upper_95ci",
    color=alt.Color('legible_name:N', scale=alt.Scale(domain=dom_r, range=rng_r), legend=None),
    tooltip=alt.value(None)
).properties(
    width=standard_width,
    height=standard_height
)

#vertical line to divide enveloped from non-enveloped viruses
vert_divider_p = alt.Chart(pd.DataFrame({'x': [enveloped_vert_divider]})
                        ).mark_rule(strokeDash=[3,5]).encode(x='x')

#vertical line to divide enveloped from non-enveloped viruses
vert_divider_r = alt.Chart(pd.DataFrame({'x': [enveloped_vert_divider], 'y': [-0.5], 'y2': [4.5]})
                        ).mark_rule(strokeDash=[3,5]).encode(x='x', y='y', y2='y2')
enveloped_text = vert_divider_r.mark_text(
        align='right',
        color='#6e6e6e',
        fontSize=12,
        dy=-200,
        dx=-10  # Nudges text to left so it doesn't appear on top of the bar. in pixels
).encode(
    text=alt.value(['enveloped', 'viruses']))
nonenveloped_text = vert_divider_r.mark_text(
        align='left',
        color='#6e6e6e',
        fontSize=12,
        dy=-200,
        dx=10  # Nudges text to left so it doesn't appear on top of the bar. in pixels
).encode(
    text=alt.value(['non-enveloped', 'viruses']))

#vertical line to divide RNA from DNA viruses
vert_divider2 = alt.Chart(pd.DataFrame({'x': [dna_vert_divider]})).mark_rule(strokeDash=[3,5]).encode(x='x')
rna_text = vert_divider2.mark_text(
        align='right',
        color='#6e6e6e',
        fontSize=14,
        dy=-120,
        dx=-10  # Nudges text to left so it doesn't appear on top of the bar. in pixels
).encode(
    text=alt.value(['RNA', 'viruses']))
dna_text = vert_divider2.mark_text(
        align='left',
        color='#6e6e6e',
        fontSize=14,
        dy=-120,
        dx=10  # Nudges text to left so it doesn't appear on top of the bar. in pixels
).encode(
    text=alt.value(['DNA', 'viruses']))


#y-axis label for both plots
text = alt.Chart().mark_text(
    align="center",
    baseline="bottom",
    fontSize=18,
    fontWeight=400,
    angle=270,
    color='black'
).encode(
    x=alt.value(10),  # pixels from left
    y=alt.value(160),  # pixels from top
    text=alt.value(['Rate of Adaptation'])
)

#y-axis label for both plots
text2 = alt.Chart().mark_text(
    align="center",
    baseline="bottom",
    fontSize=14,
    fontWeight=400,
    angle=270,
    color='black'
).encode(
    x=alt.value(0),  # pixels from left
    y=alt.value(160),  # pixels from top
    text=alt.value(['Adaptive Mutations per Codon per Year (× 10⁻³)'])
)

#layer the points and error bars
top = alt.layer(points_p, errorbars_p, vert_divider_p, vert_divider2, data=df_polymerase).resolve_scale(color='independent')
bottom = alt.layer(points_r, errorbars_r, vert_divider_r, enveloped_text, 
                   nonenveloped_text, vert_divider2, rna_text, dna_text, 
                   data=df_receptorbinding).resolve_scale(color='independent')

#stack the genes
plot_layout = alt.vconcat(top, bottom, spacing=10)

#add the yaxis label
chart = alt.hconcat(text, text2, plot_layout, spacing=0).configure_axis(
    grid=False,
    labelFontSize=12
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=18,
    anchor='start',
    color='#c6c6c6'
).resolve_scale(
    y='independent',
    x='independent',
).configure_axisX(labelAngle=270)

chart.save('../atlas-of-viral-adaptation/assets/adaptation_percodon_overview_plot.html', scale_factor=10.0)

chart


In [139]:
#set height and width of plots
standard_width = 850
standard_height = 250


# generate the points
points_p = alt.Chart(df_polymerase).transform_calculate(
    url='https://blab.github.io/atlas-of-viral-adaptation/' + alt.datum.url_ending
).mark_point(
    filled=True,
    size=200, 
    opacity=1
).encode(
    x=alt.X('xtick_pos:Q', title='', 
            axis=alt.Axis(values = all_xticks, labelExpr=label_expr_str, tickCount=len(all_xticks))),
    y=alt.Y('adaptive_muts_per_year:Q', title='', axis=alt.Axis(format=".1f")),
    color=alt.Color('legible_name', scale=alt.Scale(domain=dom_p, range=rng_p), legend=None),
    href='url:N',
    tooltip=[alt.Tooltip('legible_name:N', title='Virus'),
             alt.Tooltip('virus_family:N', title='Family'),
             alt.Tooltip('genome_type:N', title='Genome Type'),
             alt.Tooltip('enveloped:N', title='Enveloped'),
             alt.Tooltip('legible_gene_name:N', title='Protein/Subunit'), 
             alt.Tooltip('legible_rate_pergene', title='Rate')],
).properties(
    width=standard_width,
    height=standard_height*p_relative_range, 
    title='Polymerase'
)

points_r = alt.Chart(df_receptorbinding).transform_calculate(
    url='https://blab.github.io/atlas-of-viral-adaptation/' + alt.datum.url_ending
).mark_point(
    filled=True,
    size=200, opacity=1,
).encode(
    x=alt.X('xtick_pos:Q', title='', 
            axis=alt.Axis(values = all_xticks, labelExpr=label_expr_str, tickCount=len(all_xticks))),
    y=alt.Y('adaptive_muts_per_year:Q', title='', axis=alt.Axis(format=".1f"), 
            scale=alt.Scale(domain=[-0.1, 2.5])),
    color=alt.Color('legible_name', scale=alt.Scale(domain=dom_r, range=rng_r), legend=None),
    href='url:N',
    tooltip=[alt.Tooltip('legible_name:N', title='Virus'),
             alt.Tooltip('virus_family:N', title='Family'),
             alt.Tooltip('genome_type:N', title='Genome Type'),
             alt.Tooltip('enveloped:N', title='Enveloped'),
             alt.Tooltip('legible_gene_name:N', title='Protein/Subunit'), 
             alt.Tooltip('legible_rate_pergene', title='Rate')],
).properties(
    width=standard_width,
    height=standard_height, 
    title='Receptor-Binding'
)

#generate the error bars
errorbars_p = alt.Chart(df_polymerase).mark_errorbar().encode(
    x=alt.X('xtick_pos:Q', sort=virus_order, title='', axis=alt.Axis(labels=False)),
    y=alt.Y("lower_95ci_mutspergene", title=''),
    y2="upper_95ci_mutspergene", 
    color=alt.Color('legible_name', scale=alt.Scale(domain=dom_p, range=rng_p), legend=None),
    tooltip=alt.value(None)
).properties(
    width=standard_width,
    height=standard_height*p_relative_range
)


errorbars_r = alt.Chart(df_receptorbinding).mark_errorbar().encode(
    x=alt.X('xtick_pos:Q', sort=virus_order, title=''),
    y=alt.Y("lower_95ci_mutspergene", title=''),
    y2="upper_95ci_mutspergene",
    color=alt.Color('legible_name', scale=alt.Scale(domain=dom_r, range=rng_r), legend=None),
    tooltip=alt.value(None)
).properties(
    width=standard_width,
    height=standard_height
)

#vertical line to divide enveloped from non-enveloped viruses
vert_divider_p = alt.Chart(pd.DataFrame({'x': [enveloped_vert_divider]})
                        ).mark_rule(strokeDash=[3,5]).encode(x='x')

#vertical line to divide enveloped from non-enveloped viruses
vert_divider_r = alt.Chart(pd.DataFrame({'x': [enveloped_vert_divider], 'y': [-0.2], 'y2': [2.0]})
                        ).mark_rule(strokeDash=[3,5]).encode(x='x', y='y', y2='y2')
enveloped_text = vert_divider_r.mark_text(
        align='right',
        color='#6e6e6e',
        fontSize=12,
        dy=-200,
        dx=-10  # Nudges text to left so it doesn't appear on top of the bar. in pixels
).encode(
    text=alt.value(['enveloped', 'viruses']))
nonenveloped_text = vert_divider_r.mark_text(
        align='left',
        color='#6e6e6e',
        fontSize=12,
        dy=-200,
        dx=10  # Nudges text to left so it doesn't appear on top of the bar. in pixels
).encode(
    text=alt.value(['non-enveloped', 'viruses']))

#vertical line to divide RNA from DNA viruses
vert_divider2 = alt.Chart(pd.DataFrame({'x': [dna_vert_divider]})).mark_rule(strokeDash=[3,5]).encode(x='x')
rna_text = vert_divider2.mark_text(
        align='right',
        color='#6e6e6e',
        fontSize=14,
        dy=-120,
        dx=-10  # Nudges text to left so it doesn't appear on top of the bar. in pixels
).encode(
    text=alt.value(['RNA', 'viruses']))
dna_text = vert_divider2.mark_text(
        align='left',
        color='#6e6e6e',
        fontSize=14,
        dy=-120,
        dx=10  # Nudges text to left so it doesn't appear on top of the bar. in pixels
).encode(
    text=alt.value(['DNA', 'viruses']))



#y-axis label for both plots
text = alt.Chart().mark_text(
    align="center",
    baseline="bottom",
    fontSize=18,
    fontWeight=400,
    angle=270,
    color='black'
).encode(
    x=alt.value(10),  # pixels from left
    y=alt.value(140),  # pixels from top
    text=alt.value(['Rate of Adaptation'])
)

#y-axis label for both plots
text2 = alt.Chart().mark_text(
    align="center",
    baseline="bottom",
    fontSize=14,
    fontWeight=400,
    angle=270,
    color='black'
).encode(
    x=alt.value(0),  # pixels from left
    y=alt.value(140),  # pixels from top
    text=alt.value(['Adaptive Mutations per Year'])
)

#layer the points and error bars
top = alt.layer(points_p, errorbars_p, vert_divider_p, vert_divider2, data=df_polymerase).resolve_scale(color='independent')
bottom = alt.layer(points_r, errorbars_r, vert_divider_r, enveloped_text, 
                   nonenveloped_text, vert_divider2, rna_text, dna_text, 
                   data=df_receptorbinding).resolve_scale(color='independent')

#stack the genes
plot_layout = alt.vconcat(top, bottom, spacing=10)

#add the yaxis label
chart = alt.hconcat(text, text2, plot_layout, spacing=0).configure_axis(
    grid=False,
    labelFontSize=12
).configure_view(
    strokeWidth=0
).configure_title(
    fontSize=18,
    anchor='start',
    color='#c6c6c6'
).resolve_scale(
    y='independent',
    x='independent',
).configure_axisX(labelAngle=270)

#scale factor makes them not blurry
chart.save('../atlas-of-viral-adaptation/assets/adaptation_pergene_overview_plot.html', scale_factor=10.0)

chart


In [None]:
#old stuff below

In [None]:
#order to plot on the x-axis
# virus_order = ['Measles', 'Mumps', 'Parainfluenza-1', 'Parainfluenza-3', 'Influenza A/H3N2', 
#                   'Influenza A/H1N1pdm', 'Influenza B/Vic', 'Influenza B/Yam', 'Influenza C/Yamagata', 
#                   'OC43-A', '229E', 'NL63', 'RSV-A', 'RSV-B', 
#                   'Dengue 1-I', 'Dengue 1-III', 'Dengue 1-IV', 'Dengue 1-V', 
#                   'Dengue 2-AA', 'Dengue 2-AI', 'Dengue 2-C', 
#                'Dengue 3-II', 'Dengue 3-III', 'Dengue 4-I', 
#                   'Dengue 4-II', 'Enterovirus D68', 'Norovirus GII.4']

In [None]:
    #regular spacing between dengue serotypes, but squished spacing between genotypes of same serotype
#     if 'Dengue' in x:
#         if x[0:8] in dengue_serotypes_already_seen:
#             tick_pos = last_coord+squished_spacing
#             last_coord = tick_pos
#         else:
#             tick_pos = last_coord+regular_spacing
#             last_coord = tick_pos
#             dengue_serotypes_already_seen.append(x[0:8])