# Notebook to create the visualizations


# Import modules

In [None]:
import os
import sys

import random
import numpy as np
import pandas as pd

import plotly
import colorlover as cl
import matplotlib.pyplot as plt
import plotly.figure_factory as ff
import plotly.graph_objects as go


from IPython.display import display

In [None]:
nyt_county_data_path = os.path.join('..','..','covid-19-data','us-counties.csv')
nyt_df = pd.read_csv(nyt_county_data_path)
nyt_df = nyt_df.fillna(0)
nyt_df['fips'] = nyt_df['fips'].astype(int)
latest_nyt_df = nyt_df.loc[nyt_df['date'] == nyt_df['date'].max(),:]
display(latest_nyt_df.head(10))

nyt_state_data_path = os.path.join('..','..','covid-19-data','us-states.csv')
nyt_state_df = pd.read_csv(nyt_state_data_path)
latest_nyt_state_df = nyt_state_df.loc[nyt_state_df['date'] == nyt_state_df['date'].max(),:]
display(latest_nyt_state_df.head(10))

population_path = os.path.join('..','data','county_population_2019_estimate.csv')
pop_df = pd.read_csv(population_path)
pop_df['Population'] = [pop.replace(',', '') for pop in pop_df['Population']]
pop_df['Population'] = pop_df['Population'].astype(float)
display(pop_df.head(10))

county_path = os.path.join('..','data','county_fips.csv')
county_df = pd.read_csv(county_path)
county_df['fips'] = county_df['fips'].astype(int)

df = pd.merge(county_df,latest_nyt_df[['fips','cases','deaths']],on='fips',how='left')
df['County'] = '.' + df['county_name'] + ', ' + df['state_name']

display(df)
df = pd.merge(df,pop_df,on='County',how='left')
df['cases'] = df['cases'].fillna(0)
df['deaths'] = df['deaths'].fillna(0)
df['cases_log10'] = np.log10(df['cases'])
df['cases_log10'].loc[np.isinf(df['cases_log10'])] = 0
df['cases_per_100k'] = df['cases']/df['Population']*1e5
df['deaths_per_100k'] = df['deaths']/df['Population']*1e5

display(df.head(10))


display(county_df.head(10))


df_state = df.groupby(['state_name','state_abbr']).sum().reset_index()
df_state = pd.merge(latest_nyt_state_df,df_state[['state_name','state_abbr','Population']],left_on='state',right_on='state_name')
df_state['cases_log10'] = np.log10(df_state['cases'])
df_state['cases_log10'].loc[np.isinf(df_state['cases_log10'])] = 0
df_state['cases_per_100k'] = df_state['cases']/df_state['Population']*1e5
df_state['deaths_per_100k'] = df_state['deaths']/df_state['Population']*1e5

df_state[['state','cases','cases_per_100k','deaths','deaths_per_100k']].to_html(os.path.join('..','_includes','current_state_table.html'),table_id='myTable')
tmp = df_state.groupby('date').sum().reset_index()
tmp[['cases','cases_per_100k','deaths','deaths_per_100k']].to_html(os.path.join('..','_includes','current_us_table.html'),table_id='myTable')
display(df_state.head(10))

us_ts_df = nyt_state_df.groupby('date').sum().reset_index()
display(us_ts_df.head(10))

In [None]:
colors = [
    '#ffffb2',
    '#fed976',
    '#feb24c',
    '#fd8d3c',
    '#f03b20',
    '#bd0026'
]
# Check the County/Building America Climate Zone Definitions
fig = ff.create_choropleth(fips=df['fips'],
                           values=df['cases'],
                           legend_title='Number of COVID-19 cases', 
                           binning_endpoints=[1e0, 1e1, 1e2, 1e3, 1e4],                           
                           #width=892, height=838,
                           title='Number of cases by county',
                          colorscale=colors)


fig.layout.template = None
fig.update_layout(paper_bgcolor='rgba(0,0,0,0)')
fig.update_layout(plot_bgcolor='rgba(0,0,0,0)')
fig.update_layout(
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'),
        showlakes=True, # lakes
        lakecolor='rgb(255, 255, 255)'),
)

plotly.offline.plot(fig, filename=os.path.join('..','_includes','cases_by_county.html'),auto_open=False)
fig.show()

In [None]:
colors = [
    '#ffffb2',
    '#fecc5c',
    '#fd8d3c',
    '#e31a1c'
]
# Check the County/Building America Climate Zone Definitions
fig = ff.create_choropleth(fips=df['fips'],
                           values=df['cases_per_100k'],
                           legend_title='Cases per 100,000 people', 
                           binning_endpoints=[1e0, 1e1, 1e2],                           
                           #width=892, height=838,
                           title='Number of cases per 100,000 people by county',
                          colorscale=colors)


fig.layout.template = None
fig.update_layout(paper_bgcolor='rgba(0,0,0,0)')
fig.update_layout(plot_bgcolor='rgba(0,0,0,0)')
fig.update_layout(
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'),
        showlakes=True, # lakes
        lakecolor='rgb(255, 255, 255)'),
)

plotly.offline.plot(fig, filename=os.path.join('..','_includes','cases_per_100k_by_county.html'),auto_open=False)
fig.show()

In [None]:
colors = [
    '#ffffb2',
    '#fecc5c',
    '#fd8d3c',
    '#e31a1c'
]
# Check the County/Building America Climate Zone Definitions
fig = ff.create_choropleth(fips=df['fips'],
                           values=df['deaths_per_100k'],
                           legend_title='Deaths per 100,000 people', 
                           binning_endpoints=[1e0, 1e1, 1e2],                           
                           #width=892, height=838,
                           title='Number of deaths per 100,000 people by county',
                          colorscale=colors)

fig.layout.template = None
fig.update_layout(paper_bgcolor='rgba(0,0,0,0)')
fig.update_layout(plot_bgcolor='rgba(0,0,0,0)')

plotly.offline.plot(fig, filename=os.path.join('..','_includes','deaths_per_100k_by_county.html'),auto_open=False)
fig.show()

In [None]:
colors = [
    '#ffffb2',
    '#fecc5c',
    '#fd8d3c',
    '#e31a1c'
]
# Check the County/Building America Climate Zone Definitions
fig = ff.create_choropleth(fips=df['fips'],
                           values=df['deaths'],
                           legend_title='Number of COVID-19 related deaths', 
                           binning_endpoints=[1e0, 1e1, 1e2],                           
                           #width=892, height=838,
                           title='Number of deaths by county',
                          colorscale=colors)


fig.layout.template = None
fig.update_layout(paper_bgcolor='rgba(0,0,0,0)')
fig.update_layout(plot_bgcolor='rgba(0,0,0,0)')
fig.update_layout(
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'),
        showlakes=True, # lakes
        lakecolor='rgb(255, 255, 255)'),
)

plotly.offline.plot(fig, filename=os.path.join('..','_includes','deaths_by_county.html'),auto_open=False)
fig.show()

In [None]:
fig = go.Figure(data=go.Choropleth(
    locations=df_state['state_abbr'],
    z=df_state['cases_log10'],
    locationmode='USA-states',
    colorscale='Reds',
    autocolorscale=False,
    marker_line_color='white', # line markers between states
    colorbar_title="Log10 COVID-19 cases"
))

fig.update_layout(
    title_text='Log10 number of COVID-19 cases by state',
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'),
        showlakes=True, # lakes
        lakecolor='rgb(255, 255, 255)'),
)

plotly.offline.plot(fig, filename=os.path.join('..','_includes','cases_by_state.html'),auto_open=False)

fig.show()

In [None]:
fig = go.Figure(data=go.Choropleth(
    locations=df_state['state_abbr'],
    z=df_state['deaths'],
    locationmode='USA-states',
    colorscale='Reds',
    autocolorscale=False,
    text=df_state['deaths'],
    marker_line_color='white', # line markers between states
    colorbar_title="COVID-19 related deaths"
))

fig.update_layout(
    title_text='Number of COVID-19 deaths by state',
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'),
        showlakes=True, # lakes
        lakecolor='rgb(255, 255, 255)'),
)

plotly.offline.plot(fig, filename=os.path.join('..','_includes','deaths_by_state.html'),auto_open=False)

fig.show()

In [None]:
fig = go.Figure(data=go.Choropleth(
    locations=df_state['state_abbr'],
    z=df_state['cases_per_100k'],
    locationmode='USA-states',
    colorscale='Reds',
    autocolorscale=False,
    text=df_state['cases_per_100k'],    
    marker_line_color='white', # line markers between states
    colorbar_title="COVID-19 cases per 100,000"
))

fig.update_layout(
    title_text='Number of COVID-19 cases per 100,000 people by state',
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'),
        showlakes=True, # lakes
        lakecolor='rgb(255, 255, 255)'),
)

plotly.offline.plot(fig, filename=os.path.join('..','_includes','cases_per_100k_by_state.html'),auto_open=False)

fig.show()

In [None]:
fig = go.Figure(data=go.Choropleth(
    locations=df_state['state_abbr'],
    z=df_state['deaths_per_100k'],
    locationmode='USA-states',
    colorscale='Reds',
    autocolorscale=False,
    text=df_state['deaths_per_100k'],        
    marker_line_color='white', # line markers between states
    colorbar_title="deaths per 100,000"
))

fig.update_layout(
    title_text='Number of COVID-19 deaths per 100,000 people by state',
    geo = dict(
        scope='usa',
        projection=go.layout.geo.Projection(type = 'albers usa'),
        showlakes=True, # lakes
        lakecolor='rgb(255, 255, 255)'),
)

plotly.offline.plot(fig, filename=os.path.join('..','_includes','deaths_per_100k_by_state.html'),auto_open=False)

fig.show()

In [None]:
from plotly.subplots import make_subplots

fig = make_subplots(specs=[[{"secondary_y": True}]])

#fig = px.scatter(us_ts_df, x='date', y='cases')
fig.add_trace(
    go.Scatter(x=us_ts_df['date'], y=us_ts_df['cases'],
    mode='lines+markers',
    name='Total Cases'),
    secondary_y=False
)

fig.add_trace(
    go.Scatter(x=us_ts_df['date'], y=us_ts_df['deaths'],
    mode='lines+markers',
    name='Total Deaths'),
    secondary_y=True
)

fig.update_layout(
    title="Total Confirmed COVID-19 Cases in the US",
    font=dict(
        size=18,
        color="#7f7f7f"
    )
)
fig.update_yaxes(title_text="Cases", secondary_y=False)
fig.update_yaxes(title_text="Deaths", secondary_y=True)


plotly.offline.plot(fig, filename=os.path.join('..','_includes','usa_cumulative_cases_deaths.html'),auto_open=False)

fig.show()

In [None]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

#fig = px.scatter(us_ts_df, x='date', y='cases')
fig.add_trace(
    go.Scatter(x=us_ts_df['date'], y=np.log10(us_ts_df['cases']),
    mode='lines+markers',
    name='Total Cases'),
    secondary_y=False
)

fig.add_trace(
    go.Scatter(x=us_ts_df['date'], y=np.log10(us_ts_df['deaths']),
    mode='lines+markers',
    name='Total Deaths'),
    secondary_y=True
)

fig.update_layout(
    title="Log10 cases and deaths in the US",
    font=dict(
        size=18,
        color="#7f7f7f"
    )
)
fig.update_yaxes(title_text="Log10 cases", secondary_y=False)
fig.update_yaxes(title_text="Log10 deaths", secondary_y=True)


plotly.offline.plot(fig, filename=os.path.join('..','_includes','log10_usa_cumulative_cases_deaths.html'),auto_open=False)

fig.show()

In [None]:
df_state = df.groupby(['state_name','state_abbr']).sum().reset_index()
df_state = pd.merge(nyt_state_df,df_state[['state_name','state_abbr','Population']],left_on='state',right_on='state_name')
df_state['cases_log10'] = np.log10(df_state['cases'])
df_state['cases_log10'].loc[np.isinf(df_state['cases_log10'])] = 0
df_state['cases_per_100k'] = df_state['cases']/df_state['Population']*1e5
df_state['deaths_per_100k'] = df_state['deaths']/df_state['Population']*1e5

fig = go.Figure()

for state in np.sort(df_state['state'].unique()):
    tmp = df_state.loc[df_state['state'] == state,:].reset_index(drop=True)
    fig.add_trace(go.Scatter(x=tmp['date'], y=np.log10(tmp['cases']),
                        mode='lines+markers',
                        name=state))

fig.update_layout(
    title="Log10 number of cases by state",
    yaxis_title="Log10 Number of Cases",
    font=dict(
        size=18,
        color="#7f7f7f"
    )
)

plotly.offline.plot(fig, filename=os.path.join('..','_includes','log10_states_cumulative_cases.html'),auto_open=False)

fig.show()

fig = go.Figure()

for state in np.sort(df_state['state'].unique()):
    tmp = df_state.loc[df_state['state'] == state,:].reset_index(drop=True)
    fig.add_trace(go.Scatter(x=tmp['date'], y=tmp['cases'],
                        mode='lines+markers',
                        name=state))

fig.update_layout(
    title="Number of cases by state",
    yaxis_title="Number of Cases",
    font=dict(
        size=18,
        color="#7f7f7f"
    )
)

plotly.offline.plot(fig, filename=os.path.join('..','_includes','states_cumulative_cases.html'),auto_open=False)

fig.show()

In [None]:
fig = go.Figure()

for state in np.sort(df_state['state'].unique()):
    tmp = df_state.loc[df_state['state'] == state,:].reset_index(drop=True)
    fig.add_trace(go.Scatter(x=tmp['date'], y=np.log10(tmp['cases_per_100k']),
                        mode='lines+markers',
                        name=state))

fig.update_layout(
    title="Log10 number of cases per 100,000 people",
    yaxis_title="Log10 cases per 100,000 people",
    font=dict(
        size=18,
        color="#7f7f7f"
    )
)

plotly.offline.plot(fig, filename=os.path.join('..','_includes','log10_states_cumulative_cases_per_100k.html'),auto_open=False)

fig.show()

fig = go.Figure()

for state in np.sort(df_state['state'].unique()):
    tmp = df_state.loc[df_state['state'] == state,:].reset_index(drop=True)
    fig.add_trace(go.Scatter(x=tmp['date'], y=tmp['cases_per_100k'],
                        mode='lines+markers',
                        name=state))

fig.update_layout(
    title="Number of cases per 100,000 people",
    yaxis_title="Cases per 100,000 people",
    font=dict(
        size=18,
        color="#7f7f7f"
    )
)

plotly.offline.plot(fig, filename=os.path.join('..','_includes','states_cumulative_cases_per_100k.html'),auto_open=False)

fig.show()

In [None]:
fig = go.Figure()

for state in np.sort(df_state['state'].unique()):
    tmp = df_state.loc[df_state['state'] == state,:].reset_index(drop=True)
    fig.add_trace(go.Scatter(x=tmp['date'], y=np.log10(tmp['cases_per_100k']),
                        mode='lines+markers',
                        name=state))

fig.update_layout(
    title="Log10 number of deaths per 100,000 people",
    yaxis_title="Log10 deaths per 100,000 people",
    font=dict(
        size=18,
        color="#7f7f7f"
    )
)

plotly.offline.plot(fig, filename=os.path.join('..','_includes','log10_states_cumulative_deaths_per_100k.html'),auto_open=False)

fig.show()

fig = go.Figure()

for state in np.sort(df_state['state'].unique()):
    tmp = df_state.loc[df_state['state'] == state,:].reset_index(drop=True)
    fig.add_trace(go.Scatter(x=tmp['date'], y=tmp['cases_per_100k'],
                        mode='lines+markers',
                        name=state))

fig.update_layout(
    title="Number of deaths per 100,000 people",
    yaxis_title="Deaths per 100,000 people",
    font=dict(
        size=18,
        color="#7f7f7f"
    )
)

plotly.offline.plot(fig, filename=os.path.join('..','_includes','states_cumulative_deaths_per_100k.html'),auto_open=False)

fig.show()