In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.express as px
import numpy as np
from urllib.request import urlopen
import json
import plotly
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)
import statsmodels.api as sm
import statsmodels.formula.api as smf

with open('data/google_mobility/EDA_and_Viz/states_hash.json', 'r+') as file:
    states_hash = json.load(file)
states_hash = { v:k for (k,v) in states_hash.items()}

In [2]:
def plotByState(dataset, valueColName, colorbarLabel, saveFilePath, stateColName = 'state_code'):
    #Extract only state data

    valueRange = (min(dataset[valueColName]), max(dataset[valueColName]))

    fig = px.choropleth(dataset, locations=dataset[stateColName], locationmode="USA-states",
    color=valueColName,color_continuous_scale="Viridis",
    range_color=valueRange,scope="usa",labels={valueColName: colorbarLabel},
    hover_name=stateColName)
    
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    fig.update_layout(coloraxis_colorbar=dict(lenmode="pixels", len=600))
    fig.write_html(saveFilePath, auto_open=True)
    
def plotByDay(dataset, valueColName, stateColName = 'state_code'):
    data_slider = []
    valueRange = (min(dataset[valueColName]), max(dataset[valueColName]))
    
    scl = [[0.0, '#440154'],[0.2, '#404788'],[0.4, '#2d708e'], \
       [0.6, '#20a387'],[0.8, '#73d055'],[1.0, '#fde725']] # viridis
    
    for col in dataset.columns:  # I transform the columns into string type so I can:
        dataset[col] = dataset[col].astype(str)
    
    plotly.offline.init_notebook_mode()
    for date in dataset['date'].unique():
        dataset_segmented =  dataset[(dataset['date']== date)]
        dataset_segmented['text'] = dataset_segmented['state'] + '<br>' + 'Non-residential mobility change: ' + dataset_segmented[valueColName]
        data_each_day = dict(
                            type='choropleth',
                            locations = dataset_segmented['state_code'].astype(str),
                            locationmode='USA-states',
                            z=dataset_segmented[valueColName].astype(float),
                            colorscale = scl,
                            text = dataset_segmented['text'],
                            colorbar = dict(title = 'Non-residential mobility % change')
                            )

        data_slider.append(data_each_day)

    steps = []
    for i in range(len(data_slider)):
        step = dict(method='restyle',
                    args=['visible', [False] * len(data_slider)],
                    label='Date {}'.format(dataset['date'].unique()[i]))
        step['args'][1][i] = True
        steps.append(step)

    sliders = [dict(active=0,
                    pad={"t": 1},
                    steps=steps)]    
    layout = dict(geo=dict(scope='usa',
                           projection={'type': 'albers usa'}),

                  sliders=sliders)

    fig = dict(data=data_slider, layout=layout)
    plotly.offline.plot(fig)


def plotByCounty(dataset, valueColName, countyColName = "fips_code"):
    #Extract county data
    #dataset = dataset[dataset[countyColName]%1000 != 0]
    #print(dataset[dataset[countyColName]%1000 != 0])
    fig = px.choropleth(dataset, geojson=counties, locations=countyColName, 
    color=valueColName,color_continuous_scale="Viridis", range_color=(-61, 35),scope="usa",
    labels={valueColName:"Value"},hover_name=countyColName)
    
    fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
    fig.write_html('data/google_mobility/mobilityLevelsCounty.html', auto_open=True)

In [3]:
#demographic prediction mappings
demographic_predictions = pd.read_csv('data/results/demographicPredictions.csv')
demographic_predictions['state_code'] = demographic_predictions['state'].map(states_hash)



plotByState(demographic_predictions, 'Positivity Rate', 'Positive COVID test rate', 'data/results/plots/state_pos_rate.html')
plotByState(demographic_predictions, 'Deaths/Population', 'Deaths/Total Population', 'data/results/plots/state_death_per_pop.html')

In [4]:
#State mobility year and day mappings
state_mobility = pd.read_csv('data/cleaned/US_mobility_state_cleaned.csv')

state_mobility['state_code'] = state_mobility['state'].map(states_hash)
state_mobility = state_mobility.dropna()

state_avg = state_mobility.groupby('state_code', as_index=False)['non_residential_percent_change_from_baseline'].mean()

#state_mobility
plotByState(state_avg, 'non_residential_percent_change_from_baseline', "Non-Residential Mobility % Change", 'data/google_mobility/mobilityYearLevelsState.html')
plotByDay(state_mobility, 'non_residential_percent_change_from_baseline')

In [14]:
#County mapping
county_mobility = pd.read_csv('data/cleaned/US_mobility_county_cleaned.csv')
county_avg = county_mobility.groupby('fips_code', as_index=False)['non_residential_percent_change_from_baseline'].mean()
print(np.min(county_avg['non_residential_percent_change_from_baseline']))
print(np.max(county_avg['non_residential_percent_change_from_baseline']))
plotByCounty(county_avg, 'non_residential_percent_change_from_baseline')

In [5]:
state_mobility
# for state_code in state_mobility['state_code'].unique():
#     print(state_code)

Unnamed: 0.1,Unnamed: 0,date,retail_and_recreation_percent_change_from_baseline,grocery_and_pharmacy_percent_change_from_baseline,parks_percent_change_from_baseline,transit_stations_percent_change_from_baseline,workplaces_percent_change_from_baseline,residential_percent_change_from_baseline,state,non_residential_percent_change_from_baseline,state_code
364,364,2020-02-15,5.0,2.0,39.0,7.0,2.0,-1.0,Alabama,11.0,AL
365,365,2020-02-16,0.0,-2.0,-7.0,3.0,-1.0,1.0,Alabama,-1.4,AL
366,366,2020-02-17,3.0,0.0,17.0,7.0,-17.0,4.0,Alabama,2.0,AL
367,367,2020-02-18,-4.0,-3.0,-11.0,-1.0,1.0,2.0,Alabama,-3.6,AL
368,368,2020-02-19,4.0,1.0,6.0,4.0,1.0,0.0,Alabama,3.2,AL
...,...,...,...,...,...,...,...,...,...,...,...
18923,918187,2021-02-08,-19.0,-9.0,-9.0,-7.0,-18.0,6.0,Wyoming,-12.4,WY
18924,918188,2021-02-09,-18.0,-8.0,-12.0,-11.0,-15.0,6.0,Wyoming,-12.8,WY
18925,918189,2021-02-10,-18.0,-8.0,-14.0,-11.0,-18.0,6.0,Wyoming,-13.8,WY
18926,918190,2021-02-11,-21.0,-7.0,-23.0,-13.0,-17.0,8.0,Wyoming,-16.2,WY


In [14]:
state_mobility['non_res_moving_avg'] = state_mobility.groupby('state_code').rolling(15)['non_residential_percent_change_from_baseline'].mean().reset_index(0,drop=True)
#state_mobility

fig = px.line(state_mobility, x="date", y="non_res_moving_avg", color='state', line_group="state")
fig.update_yaxes(categoryorder="category ascending")
fig.update_layout(
    title="Non-Residential Mobility Across All States",
    xaxis_title="Date",
    yaxis_title="Non-Residental Mobility % Change from Baseline",
    legend_title="States",
)
fig.write_html('data/google_mobility/state_mobility_lines.html', auto_open=True)
fig.show()

In [15]:
states_of_interest = ['SD', 'WY', 'MT', 'ME', 'CA', 'TX', 'HI', 'NY', 'ID', 'FL', 'NV', 'AZ']

soi_mobility = state_mobility[state_mobility['state_code'].isin(states_of_interest)]

fig = px.line(soi_mobility, x="date", y="non_res_moving_avg", color='state', line_group="state")
fig.update_yaxes(categoryorder="category ascending")
fig.update_layout(
    title="Non-Residential Mobility In Contrasting States",
    xaxis_title="Date",
    yaxis_title="Non-Residental Mobility % Change from Baseline",
    legend_title="States",
)
fig.write_html('data/google_mobility/soi_mobility_lines.html', auto_open=True)
fig.show()
    
    

In [22]:
# For report regression table bar chart
mis_regression = {'Metric': ['Median Age', 'Urbanization %', '% > Bachelors Degree', '% < High School Diploma', '% Only High School Diploma'],
                  'Misconceptions Index': [-3.8078, -2.2237, 0.0027, 4.3312, -0.0397],
                  'Misinformation Index': [1.0737, 3.3105, 6.4662, 8.0138, -0.6586]
                 }

mis_reg_df = pd.DataFrame(mis_regression)

#mis_reg_df

fig = px.bar(mis_reg_df, x='Metric', y=['Misconceptions Index', 'Misinformation Index'], barmode='group')
fig.update_layout(
    title="Regression coefficients for demographic factors against COVID misconception/misinformation",
    xaxis_title="Metric",
    yaxis_title="Value",
    legend_title="Variable",
)
fig.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.01
))
fig.write_html('data/models/demographic_regression_bar.html', auto_open=True)
fig.show()