Generate interactive maps displaying info for each state (on a per capita basis) and over the timespans.
The main info is general statistics about number or reports, unique sequences, and clades.

In [1]:
# set up
import pandas as pd
from plotly.offline import plot, iplot
import os

project_folder = os.path.join("..", "..")

In [2]:
# read in population dataframe
population_df = pd.read_csv(os.path.join(project_folder, "data", "population", "clean_state_population.csv"))
population_df.head(1)

Unnamed: 0,GEO_ID,Population,Name,Postal Code,FIPS
0,0400000US01,5024279,Alabama,AL,1


In [3]:
# make population dictionary with postal code key and population as the value
population_dictionary = {state:pop for (state, pop) in zip(population_df['Postal Code'], population_df['Population'])}

In [4]:
# read in our timespan dataframes
dfs = []
for i in range(11):
    dfs.append(pd.read_csv(os.path.join(project_folder, 'data', 'final', "T" + str(i + 1) + ".csv")))

dfs[4].head(1)

Unnamed: 0,state,p_sequence,p_accession,date,count,n_accession,n_sequence,clade
0,GA,"""MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRS...",QVJ86123,2021,1,MZ217195,ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGA...,20B


In [38]:
def generate_map(dfs, title, colorbar_title, transform_function, zmax, fill_column, save = False):
    data_slider = []
    for df in dfs:
        df = transform_function(df)
        data_slider.append(dict(type = 'choropleth', locations=df['state'], locationmode="USA-states",
                      z=df[fill_column], zmax = zmax, zmin = 0, colorbar = {'title':colorbar_title}))

    steps = []
    for i in range(len(data_slider)):
        step = dict(method='restyle', args= ['visible', [False]*len(data_slider)], label = 'Timespan {}'.format(i+1))
        step['args'][1][i] = True
        steps.append(step)


    sliders = [dict(active=0, pad={"t": 1}, steps=steps)]

    layout = dict(title =title, geo=dict(scope='usa',
                           projection={'type': 'albers usa'}),
                  sliders=sliders)

    fig = dict(data=data_slider, layout=layout)

    if save:
        name =  os.path.join(project_folder, 'visualizations', title.replace(" ", "_")+'.html')
        plot(fig, image_width=2000, image_height=1000, filename=name, validate=True)
    iplot(fig)

In [39]:
def transform_to_total_reports_per_capita(timeperiod_df):
    df = timeperiod_df.groupby(by = 'state', as_index=False).agg({'count': 'sum'})
    df['per_capita'] = (df['count'] / df['state'].map(population_dictionary)) * 100000
    return df

def transform_to_unique_reports_per_capita(timeperiod_df):
    df = timeperiod_df.groupby(by = 'state', as_index=False).agg({'count': 'count'})
    df['per_capita'] = df['count'] / df['state'].map(population_dictionary) * 100000
    return df

def transform_to_unique_clades(timeperiod_df):
    df = timeperiod_df.groupby(by = ['state', 'clade'], as_index=False).agg({"count":"sum"}).groupby(by='state',as_index=False).agg({"clade":"count"})
    return df

In [40]:
generate_map(dfs, title = "Reports per 100,000 people by state and timeperiod", colorbar_title = "Reports per 100,000", transform_function = transform_to_total_reports_per_capita, zmax=50, fill_column = "per_capita",save = True)

In [41]:
generate_map(dfs, title = "Unique sequences reported per 100,000 people by state and timeperiod", colorbar_title = "Unique sequence reports per 100,000", transform_function = transform_to_unique_reports_per_capita, zmax=4, fill_column = "per_capita", save = True)

In [42]:
generate_map(dfs, title = "Number of clades in state and timeperiod", colorbar_title = "Number of clades", transform_function = transform_to_unique_clades, zmax=20, fill_column = "clade", save = True)