Generate interactive maps displaying info for each state (on a per capita basis) and over the timespans.
The main info is general statistics about number or reports, unique sequences, and clades.

In [1]:
# set up
import pandas as pd
from plotly.offline import plot, iplot
import os

project_folder = os.path.join("..", "..")

In [2]:
# read in population dataframe
population_df = pd.read_csv(os.path.join(project_folder, "data", "population", "clean_state_population.csv"))
population_df.head(1)

Unnamed: 0,GEO_ID,Population,Name,Postal Code,FIPS
0,0400000US01,5024279,Alabama,AL,1


In [3]:
# make population dictionary with postal code key and population as the value
population_dictionary = {state:pop for (state, pop) in zip(population_df['Postal Code'], population_df['Population'])}

In [4]:
# read in our timespan dataframes
dfs = []
for i in range(11):
    dfs.append(pd.read_csv(os.path.join(project_folder, 'data', 'final', "T" + str(i + 1) + ".csv")))

dfs[4].head(1)

Unnamed: 0,state,p_sequence,p_accession,date,count,n_accession,n_sequence,clade
0,GA,"""MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRS...",QVJ86123,2021,1,MZ217195,ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGA...,20B


In [26]:
def generate_map(dfs, title, colorbar_title, transform_function, zmax, fill_column, save = False):
    data_slider = []
    for df in dfs:
        df = transform_function(df)
        data_slider.append(dict(type = 'choropleth', locations=df['state'], locationmode="USA-states",
                      z=df[fill_column], zmax = zmax, zmin = 0, colorbar = {'title':colorbar_title}))

    steps = []
    for i in range(len(data_slider)):
        step = dict(method='restyle', args= ['visible', [False]*len(data_slider)], label = 'Timespan {}'.format(i+1))
        step['args'][1][i] = True
        steps.append(step)


    sliders = [dict(active=0, pad={"t": 1}, steps=steps)]

    layout = dict(title =title, geo=dict(scope='usa',
                           projection={'type': 'albers usa'}),
                  sliders=sliders)

    fig = dict(data=data_slider, layout=layout)

    if save:
        name =  os.path.join(project_folder, 'visualizations', 'Overview of reports', 'trial', title.replace(" ", "_")+'.html')
        plot(fig, image_width=2000, image_height=1000, filename=name, validate=True)
    iplot(fig)

In [27]:
def transform_to_total_reports_per_capita(timeperiod_df):
    df = timeperiod_df.groupby(by = 'state', as_index=False).agg({'count': 'sum'})
    df['per_capita'] = (df['count'] / df['state'].map(population_dictionary)) * 100000
    return df

def transform_to_unique_reports_per_capita(timeperiod_df):
    df = timeperiod_df.groupby(by = 'state', as_index=False).agg({'count': 'count'})
    df['per_capita'] = df['count'] / df['state'].map(population_dictionary) * 100000
    return df

def transform_to_unique_clades(timeperiod_df):
    df = timeperiod_df.groupby(by = ['state', 'clade'], as_index=False).agg({"count":"sum"}).groupby(by='state',as_index=False).agg({"clade":"count"})
    return df

In [28]:
generate_map(dfs, title = "Reports per 100,000 people by state and timeperiod", colorbar_title = "Reports per 100,000", transform_function = transform_to_total_reports_per_capita, zmax=50, fill_column = "per_capita",save = True)

In [29]:
generate_map(dfs, title = "Unique sequences reported per 100,000 people by state and timeperiod", colorbar_title = "Unique sequence reports per 100,000", transform_function = transform_to_unique_reports_per_capita, zmax=4, fill_column = "per_capita", save = True)

In [30]:
generate_map(dfs, title = "Number of clades in state and timeperiod", colorbar_title = "Number of clades", transform_function = transform_to_unique_clades, zmax=20, fill_column = "clade", save = True)

Want to generate a plot that can toggle between percapita/not per capita for total reports by state and timeperiod

In [10]:
# read in timespan labels
timespan_labels_df = pd.read_csv(os.path.join(project_folder, 'data','origonal', 'timespan_dates.txt'), sep = '\t')
timestamp_dictionary = {i:start + " to " + end for (i, start, end) in zip(timespan_labels_df['timespan'], timespan_labels_df['start'],timespan_labels_df['end'])}
timestamp_dictionary[5] = "Jan 2021"
timestamp_dictionary[6] = "Feb 2021"
timestamp_dictionary[7] = "Mar 2021"
timestamp_dictionary[8] = "Apr 2021"
timestamp_dictionary[9] = "May 2021"
timestamp_dictionary[10] = "Jun 2021"
timestamp_dictionary[11] = "July 2021"

In [11]:
from copy import deepcopy

In [14]:
import plotly.graph_objects as go

def generate_map(dfs, timestamp_dictionary, save = False):
    # percapita, use per_capita column. not, use count column
    def transform(timeperiod_df):
        # find total count for state and make per capita column
        df = timeperiod_df.groupby(by = 'state', as_index=False).agg({'count': 'sum'})
        df['per_capita'] = (df['count'] / df['state'].map(population_dictionary)) * 100000
        return df

    traces = []
    for df in dfs:
        df = transform(df)
        traces.append(
            go.Choropleth( # per capita cholorpleth
                visible=False,
                locations=df['state'], 
                z = df['per_capita'], 
                locationmode = 'USA-states',
                colorscale = 'Reds',
                colorbar_title = "Sequence reports per 100,00",
                zmax = 50,
                zmin = 0
            )
        ) 
        traces.append(   
             go.Choropleth( # non per capita cholorpleth
                visible=False,
                locations=df['state'], 
                z = df['count'], 
                locationmode = 'USA-states',
                colorscale = 'Reds',
                colorbar_title = "Total Sequence Reports"
          )
        )
        
    fig = go.Figure(data = traces)
    fig.data[1].visible = True
    
    # Create and add slider
    steps_per_capita = []
    steps_counts = []
    for i in range(len(fig.data) // 2):
        step = dict(
            label = '{1}'.format(i+1, timestamp_dictionary[i % 11 + 1]),
            method="restyle",
            args=["visible", [False] * len(fig.data)]
        )
        step_capita = deepcopy(step)
        step_capita["args"][1][i * 2] = True  # Toggle i'th per capita trace to "visible"
        steps_per_capita.append(step_capita)
        step['args'][1][i*2 + 1] = True # toggle i'th count trace to visible
        steps_counts.append(step)
        
    
    def build_slider(kind):
        if kind =='count':
            return  [dict(
                        active=0,
                        currentvalue={"prefix": "Timespan: "},
                        pad={"t": 50},
                        steps=steps_counts,
                        transition = {"duration": 300, "easing": "cubic-in-out"},
                    )]
        elif kind == 'per_capita':
            return  [dict(
                        active=0,
                        currentvalue={"prefix": "Timespan: "},
                        pad={"t": 50},
                        steps=steps_per_capita,
                        transition= {"duration": 300, "easing": "cubic-in-out"},
                    )]
    
    def build_visbile_on_switch(new):
        visible =  [False] * 22
        if new == 'count':
            visible[1]= True
        elif new == 'per_capita':
            visible[2] = True
        return visible

    fig.update_layout(
        width = 1250,
        height = 600,
        sliders=build_slider('count'),
        geo=dict(scope='usa',
                           projection={'type': 'albers usa'}),
        title = "Number of Reports by Timespan and State",
         updatemenus=[
            dict(# switch between total and per capita, restarts slider on timespan 1
                buttons=[
                    dict(label="Total",
                         method="update",
                         args=[{'visible':[False] * 22}, 
                               {"sliders": build_slider('count')}]),
                    dict(label="Per Capita",
                         method="update",
                         args=[{'visible':[False] * 22}, 
                               {"sliders": build_slider('per_capita')}])
                ],
                type = "dropdown",
                direction = 'down',
                pad={"r": 10, "t": 10},
                showactive=True,
                x=.85,
                xanchor="left",
                y=1,
                yanchor="top"
            )
        ]
    )

    fig.show()
    #fig.write_html("../../visualizations/reports_map.html")


In [15]:
generate_map(dfs, timestamp_dictionary)

# problem with above approach:
slider remembers location when I toggle, but there is no way to know when I toggle for the button which trace to display, since it is static/determined at beginning. Therefore, when you switch it goes blank (and loose scale bar), and you have to move the slider to see data.
# try px

In [16]:
df = pd.read_csv(os.path.join(project_folder, 'data', 'final', "all.csv"))
print(df.head(5))

  state                                         p_sequence p_accession  \
0    MA  "MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRS...    QTP71261   
1    WA  "MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRS...    QLJ57227   
2    WA  "MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRS...    QLJ57383   
3    CA  "MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRS...    QHW06059   
4    WA  "MFVFLVLLPLVSSQCVNLTTRTQLPPAYTNSFTRGVYYPDKVFRS...    QKS89879   

         date  count n_accession  \
0        2020      2    MW885877   
1        2020   1367    MT252714   
2        2020      1    MT252753   
3  2020-01-29      2    MT027064   
4  2020-02-29     26    MT627216   

                                          n_sequence clade  timespan  
0  GGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCTGTTCTCTA...   20A         1  
1  CTTGTAGATCTGTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGT...   19B         1  
2  CCCTTHAACTTTCGATCTCTTGTAGATCTGTTCTCTAAACGAACTT...   19A         1  
3  ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGA...  

In [17]:
df = df.groupby(by = ['timespan','state'], as_index=False).agg({'count': 'sum', 'p_accession':'count'})
df['per_capita_total'] = (df['count'] / df['state'].map(population_dictionary)) * 100000
df['per_capita_unique'] = (df['p_accession'] / df['state'].map(population_dictionary)) * 100000

In [18]:
df.head(1)

Unnamed: 0,timespan,state,count,p_accession,per_capita_total,per_capita_unique
0,1,AZ,3,2,0.041949,0.027966


In [19]:
df2 = df[['timespan','state', 'per_capita_total', 'per_capita_unique']].melt(['timespan', 'state'])
df2

Unnamed: 0,timespan,state,variable,value
0,1,AZ,per_capita_total,0.041949
1,1,CA,per_capita_total,0.078405
2,1,CO,per_capita_total,0.017320
3,1,CT,per_capita_total,0.055464
4,1,FL,per_capita_total,0.013929
...,...,...,...,...
931,11,UT,per_capita_unique,0.183396
932,11,VA,per_capita_unique,0.069514
933,11,WA,per_capita_unique,0.298497
934,11,WI,per_capita_unique,0.067869


In [20]:
import plotly.express as px

    
px.choropleth(df2, locationmode = 'USA-states', locations="state", color="value", 
              hover_name="state", 
              animation_frame="timespan",
             color_continuous_scale='Reds',  scope = 'usa', facet_col = "variable", range_color = (0,65))

Know I know to use pandas melt function to take advantage of facet in plotly express, but it uses the same scale which is no good for this use. Does not support the toggling between maps like I want. Px cannot be used in traces.

In the end, we just use seperate graphs.

In [21]:
df['Timespan'] = df['timespan'].map(timestamp_dictionary)

In [31]:
fig = px.choropleth(df, locationmode = 'USA-states', locations="state", color="count", 
              hover_name="state", 
              animation_frame="Timespan",
             color_continuous_scale='Reds',  scope = 'usa', range_color = (0,5500), 
              title = "Number of Reports by Timespan and State",
             labels  = {'per_capita':"Reports"})
fig.update_layout(height = 500, width = 800)
fig.show()
fig.write_html(os.path.join(project_folder, 'visualizations', 'Overview of reports','Number_of_reports_by_timespan_and_state.html'))

In [32]:

fig = px.choropleth(df, locationmode = 'USA-states', locations="state", color="per_capita_total", 
              hover_name="state", 
              animation_frame="Timespan",
             color_continuous_scale='Reds',  scope = 'usa', range_color = (0,65), 
              title = "Per Capita Reports by Timespan and State",
             labels  = {'per_capita_total':"Reports  per 100,000"})
fig.update_layout(height = 500, width = 800)
fig.show()
fig.write_html(os.path.join(project_folder, 'visualizations', 'Overview of reports',"Number_of_reports_per_capita_by_timespan_and_state.html"))