In [5]:
import pandas as pd 
import altair as alt
import numpy as np
import theme 

alt.themes.register('main_theme', theme.main_theme)
alt.themes.enable('main_theme')

ThemeRegistry.enable('main_theme')

In [58]:
frequencies_60y_df = pd.read_csv('results/h3n2_ha_60y_frequencies_df.csv')
frequencies_60y_df['date'] = pd.to_datetime(frequencies_60y_df['date'])

In [86]:
antique = ['#855C75', '#D9AF6B', '#AF6458', '#736F4C', '#526A83', '#625377', 
           '#68855C', '#9C9C5E', '#A06177', '#8C785D', '#467378', '#7C7C7C']

pastel = ['#66C5CC', '#F6CF71', '#F89C74', '#DCB0F2', '#87C55F', '#9EB9F3', 
          '#FE88B1', '#C9DB74', '#8BE0A4', '#B497E7', '#D3B484', '#B3B3B3']

custom = ['#B3E2CD', '#F4CAE4', '#CBD5E8', '#FDCDAC', '#E6F5C9', '#FFF2AE', '#F1E2CC', '#CCCCCC']

def plot_site_frequencies(data, site, width=220, height=60):
    site_data = data.query('site == @site')

    # order alleles by mean frequency
    allele_order = (
        site_data.groupby('allele')['frequency']
        .mean()
        .sort_values(ascending=False)
        .index.tolist()
    )

    chart = alt.Chart(site_data).mark_area(stroke='black', strokeWidth=0.5).encode(
        x=alt.X(
            "date:T",
            title="",
            axis=alt.Axis(
                grid=False,
                tickCount=3,
            ),
        ),
        y=alt.Y(
            "frequency:Q",
            title="Frequency",
            axis=alt.Axis(
                grid=False,
                tickCount=3,
            ),
        ).stack("normalize"),
        color=alt.Color(
            "allele:N",
            title="Allele",
            scale=alt.Scale(range=custom, domain=allele_order),
            legend=alt.Legend(
                columns=2,
            ),
        ),
        order=alt.Order('allele:N'),
        tooltip=['HA_region', 'site', 'allele', 'frequency', 'date']
    ).properties(
        width=width,
        height=height,
        title=alt.TitleParams(
            text=f"Site {site}",
            anchor='middle',
            fontSize=16,
            fontWeight='normal',
        )
    )
    return chart

In [87]:
plot_site_frequencies(frequencies_60y_df, 220)

In [88]:
plot_site_frequencies(frequencies_60y_df, 229)

In [89]:
plot_site_frequencies(frequencies_60y_df, 165)

In [90]:
plot_site_frequencies(frequencies_60y_df, 205)

In [83]:
frequencies_12y_df = pd.read_csv('results/h3n2_ha_12y_frequencies_df.csv')
frequencies_12y_df['date'] = pd.to_datetime(frequencies_12y_df['date'])

In [84]:
plot_site_frequencies(frequencies_12y_df, 140, width=300, height=80)

In [85]:
plot_site_frequencies(frequencies_12y_df, 145, width=300, height=80)