In [10]:
import altair as alt
import pandas as pd
import numpy as np

# generate some data points with uncertainties
np.random.seed(0)
x = ['zika', 'oc43', 'h3n2']
y = np.random.normal(10, 0.5, size=len(x))
species = ['a', 'b', 'c']
yerr_u = 0.2
yerr_l = 0.1

# set up data frame
source = pd.DataFrame({"x": x, "y": y, "yerr_upper": yerr_u, "yerr_lower":yerr_l, "species":species})

# the base chart
base = alt.Chart(source).transform_calculate(
    ymin="datum.y-datum.yerr_lower",
    ymax="datum.y+datum.yerr_upper",
    color='species:N'
).facet(
    facet='species:N',
    columns=1
)

# generate the points
points = base.mark_point(
    filled=True,
    size=50,
    color='black'
).encode(
    x=alt.X('x'),
    y=alt.Y('y', scale=alt.Scale(zero=False))
)

# generate the error bars
errorbars = base.mark_errorbar().encode(
    x="x",
    y="ymin:Q",
    y2="ymax:Q"
)

points + errorbars


NameError: name 'clf' is not defined

In [24]:
df = pd.read_csv('bhatt_results_nextstrain/concatenated_results_df/results_for_altair.csv')

In [25]:
df

Unnamed: 0,virus,subtype,virus_and_subtype,gene,adaptive_subs_per_codon_per_year,lower_95ci,upper_95ci,ci,legible_name,virus_family,color
0,measles,,measles,polymerase,-0.003625,-0.060572,0.072271,"[-0.06057151085178205, 0.0722712037218351]",Measles,paramyxovirus,#F5AD52
1,measles,,measles,membrane_fusion,-0.023358,-0.072270,0.002027,"[-0.07226998180645815, 0.0020271132896373225]",Measles,paramyxovirus,#F5AD52
2,measles,,measles,receptor_binding,-0.008966,-0.047425,0.033683,"[-0.04742524560275517, 0.03368317403865831]",Measles,paramyxovirus,#F5AD52
3,mumps,,mumps,polymerase,-0.001213,-0.005555,0.004104,"[-0.005555407961188948, 0.004103814448642033]",Mumps,paramyxovirus,#f58452
4,mumps,,mumps,membrane_fusion,0.061843,0.000000,0.185529,"[0.0, 0.1855287569573284]",Mumps,paramyxovirus,#f58452
...,...,...,...,...,...,...,...,...,...,...,...
73,dengue,denv4_II,dengue_denv4_II,membrane_fusion,0.055152,0.000000,0.529178,"[0.0, 0.5291777368780051]",Dengue 4-II,flavivirus,#adebfa
74,dengue,denv4_II,dengue_denv4_II,receptor_binding,0.082180,-0.041498,0.157342,"[-0.04149831034680393, 0.15734153342122373]",Dengue 4-II,flavivirus,#adebfa
75,zika,,zika,polymerase,0.000000,0.000000,0.000000,"[0.0, 0.0]",Zika,flavivirus,#0a829f
76,zika,,zika,membrane_fusion,0.000000,0.000000,0.000000,"[0.0, 0.0]",Zika,flavivirus,#0a829f


In [42]:
#make color mapping lists
dom = list(set(df['legible_name']))
rng = []
for d in dom:
    rng.append(list(df[df['legible_name']==d]['color'])[0])

In [57]:
alt.Chart(df).mark_point(filled=True, size=200).encode(
    x=alt.X('legible_name:N', sort=['Measles', 'Mumps', 'Influenza A/H3N2', 
                  'Influenza A/H1N1pdm', 'Influenza B/Vic', 'Influenza B/Yam', 
                  'OC43-A', 'OC43-B', '229E', 'NL63', 'RSV-A', 'RSV-B', 
                  'Dengue 1-I', 'Dengue 1-III', 'Dengue 1-IV', 'Dengue 1-V', 
                  'Dengue 2-AA', 'Dengue 2-AI', 'Dengue 2-AM', 'Dengue 2-C', 
                  'Dengue 3-I', 'Dengue 3-II', 'Dengue 3-III', 'Dengue 4-I', 
                  'Dengue 4-II', 'Zika'], title=None),
    y=alt.Y('adaptive_subs_per_codon_per_year:Q', title='Adaptive Subs per Codon per Year (x 10 0xb3)'), 
    color=alt.Color('legible_name', scale=alt.Scale(domain=dom, range=rng), legend=None),
    facet=alt.Facet('gene:N', columns=1)
).configure_axis(
    grid=False
).configure_view(
    strokeWidth=0
)