In [2]:
import pandas as pd 
import altair as alt
import numpy as np
import theme 

alt.themes.register('main_theme', theme.main_theme)
alt.themes.enable('main_theme')

ThemeRegistry.enable('main_theme')

In [3]:
frequencies_60y_df = pd.read_csv('results/h3n2_ha_60y_frequencies_df.csv')
frequencies_60y_df['date'] = pd.to_datetime(frequencies_60y_df['date'])

In [5]:
antique = ['#855C75', '#D9AF6B', '#AF6458', '#736F4C', '#526A83', '#625377', 
           '#68855C', '#9C9C5E', '#A06177', '#8C785D', '#467378', '#7C7C7C']

pastel = ['#66C5CC', '#F6CF71', '#F89C74', '#DCB0F2', '#87C55F', '#9EB9F3', 
          '#FE88B1', '#C9DB74', '#8BE0A4', '#B497E7', '#D3B484', '#B3B3B3']

custom = ['#B3E2CD', '#F4CAE4', '#CBD5E8', '#FDCDAC', '#E6F5C9', '#FFF2AE', '#F1E2CC', '#CCCCCC']

biochem_order_aas = ['R','K','H','D','E','Q','N','S','T','Y','W','F','A','I','L','M','V','G','P','C']
aas = ['A','C','D','E','F','G','H','I','K','L','M','N','P','Q','R','S','T','V','W','Y']

ggsci_cat20 = ["#393B79", "#637939", "#8C6D31", "#843C39", "#7B4173", "#5254A3", "#8CA252", "#BD9E39", 
                "#AD494A", "#A55194", "#6B6ECF", "#B5CF6B", "#E7BA52", "#D6616B", "#CE6DBD", "#9C9EDE", 
                "#CEDB9C", "#E7CB94", "#E7969C", "#DE9ED6"]


def plot_site_frequencies(data, site, width=220, height=60):
    site_data = data.query('site == @site')

    # order alleles by mean frequency
    #allele_order = (
    #    site_data.groupby('allele')['frequency']
    #    .mean()
    #    .sort_values(ascending=False)
    #    .index.tolist()
    #)

    chart = alt.Chart(site_data).mark_area(stroke='black', strokeWidth=0.5, opacity=0.6).encode(
        x=alt.X(
            "date:T",
            title="",
            axis=alt.Axis(
                grid=False,
                tickCount=3,
            ),
        ),
        y=alt.Y(
            "frequency:Q",
            title="Frequency",
            axis=alt.Axis(
                grid=False,
                tickCount=3,
            ),
        ).stack("normalize"),
        color=alt.Color(
            "allele:N",
            title="Allele",
            scale=alt.Scale(range=ggsci_cat20[::-1], domain=aas),
            legend=alt.Legend(
                columns=2,
            ),
        ),
        order=alt.Order('allele:N'),
        tooltip=['HA_region', 'site', 'allele', 'frequency', 'date']
    ).properties(
        width=width,
        height=height,
        title=alt.TitleParams(
            text=f"Site {site}",
            anchor='middle',
            fontSize=16,
            fontWeight='normal',
        )
    )
    return chart

In [97]:
plot_site_frequencies(frequencies_60y_df, 220)

In [98]:
plot_site_frequencies(frequencies_60y_df, 229)

In [99]:
plot_site_frequencies(frequencies_60y_df, 165)

In [100]:
plot_site_frequencies(frequencies_60y_df, 205)

In [101]:
frequencies_12y_df = pd.read_csv('results/h3n2_ha_12y_frequencies_df.csv')
frequencies_12y_df['date'] = pd.to_datetime(frequencies_12y_df['date'])

In [102]:
plot_site_frequencies(frequencies_12y_df, 140)

In [103]:
plot_site_frequencies(frequencies_12y_df.query('date.dt.year >= 2023'), 145)

In [104]:
plot_site_frequencies(frequencies_12y_df.query('date.dt.year >= 2023'), 189)

In [10]:
p196 = plot_site_frequencies(frequencies_60y_df, 196)
p202 = plot_site_frequencies(frequencies_60y_df, 202)
p219 = plot_site_frequencies(frequencies_60y_df, 219)
p223 = plot_site_frequencies(frequencies_60y_df, 223)
p227 = plot_site_frequencies(frequencies_60y_df, 227)
p450 = plot_site_frequencies(frequencies_60y_df, 450)
p452 = plot_site_frequencies(frequencies_60y_df, 452)

(p196 & p202 & p219 & p223) |  (p227 & p450 & p452)