In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [None]:
from plotly import __version__
import cufflinks as cf
from plotly.offline import download_plotlyjs,init_notebook_mode,plot,iplot
import plotly.graph_objects as go
import plotly.figure_factory as ff
import plotly.express as px
from plotly.subplots import make_subplots
from urllib.request import urlopen
import json
init_notebook_mode(connected=True)
cf.go_offline()

### NYTimes dataframe

In [None]:
df = pd.read_csv('https://raw.githubusercontent.com/nytimes/covid-19-data/master/us-counties.csv',
                 dtype={"fips": str})

#### NYC dataframe

In [None]:
df_ny = df[df['county'] == 'New York City']
df_ny['new_cases'] = df_ny['cases'] - df_ny['cases'].shift(1)
df_ny['growth_factor'] = df_ny['new_cases']/df_ny.shift(1)['new_cases']
df_ny['new_deaths'] = df_ny['deaths'] - df_ny['deaths'].shift(1)

#### Fairfield dataframe

In [None]:
df_ff = df[df['fips'] == '09001'] 
df_ff['new_cases'] = df_ff['cases'] - df_ff['cases'].shift(1)
df_ff['growth_factor'] = df_ff['new_cases']/df_ff.shift(1)['new_cases']
df_ff['new_deaths'] = df_ff['deaths'] - df_ff['deaths'].shift(1)

### Preparing line and bar plots

In [None]:
fig_cases_nyc = make_subplots(specs=[[{"secondary_y": True}]],subplot_titles=['Coronavirus cases in NYC'])

fig_cases_nyc.add_trace(
    go.Scatter(
        x=df_ny['date'][df_ny['date']>'2020-03-15'],
        y=df_ny['cases'][df_ny['date']>'2020-03-08'].\
          shift().rolling(window=7).mean().dropna().apply(lambda x: int(x)),
          name="Total number of cases <br> 7-day average",line=dict(width=4)),
          secondary_y=True)

fig_cases_nyc.add_trace(
    go.Bar(
        x=df_ny['date'][df_ny['date']>'2020-03-15'],
        y=df_ny['new_cases'][df_ny['date']>'2020-03-15'],
          name="Daily new cases"),
          secondary_y=False)

fig_cases_nyc.add_trace(
    go.Scatter(
        x=df_ny['date'][df_ny['date']>'2020-03-15'],
        y=df_ny['new_cases'][df_ny['date']>'2020-03-08'].\
          shift().rolling(window=7).mean().dropna().apply(lambda x: int(x)),
          name="Daily new cases <br> 7-day average",line=dict(width=4)),
          secondary_y=False)

fig_cases_nyc.update_xaxes(title_text="Date");
fig_cases_nyc.update_layout(margin={"r":0,"t":30,"l":0,"b":0},font=dict(size=18))

In [None]:
fig_deaths_nyc = make_subplots(specs=[[{"secondary_y": True}]],subplot_titles=['Coronavirus deaths in NYC'])

fig_deaths_nyc.add_trace(
    go.Scatter(
        x=df_ny['date'][df_ny['date']>'2020-03-15'],
        y=df_ny['deaths'][df_ny['date']>'2020-03-08'].\
          shift().rolling(window=7).mean().dropna().apply(lambda x: int(x)),
        name="Total number of Deaths <br> 7-day average",line=dict(width=4)),
        secondary_y=True)

fig_deaths_nyc.add_trace(
    go.Bar(
        x=df_ny['date'][df_ny['date']>'2020-03-15'],
        y=df_ny['new_deaths'][df_ny['date']>'2020-03-15'],
        name="Daily new deaths",),
        secondary_y=False)

fig_deaths_nyc.add_trace(
    go.Scatter(
        x=df_ny['date'][df_ny['date']>'2020-03-15'],
        y=df_ny['new_deaths'][df_ny['date']>'2020-03-08'].\
          shift().rolling(window=7).mean().dropna().apply(lambda x: int(x)),
        name="Daily new deaths <br> 7-day average",line=dict(width=4)),
        secondary_y=False)

ymax_daily = np.ceil(df_ny['new_deaths'].max()/100)*100
ymax_total = np.ceil(df_ny['deaths'][df_ny['date']>'2020-03-08'].shift().rolling(window=7).\
                                                                  mean().dropna().max()/1000)*1000
fig_deaths_nyc.update_xaxes(title_text="Date")
fig_deaths_nyc.update_yaxes(dict(range=[0, ymax_daily]),secondary_y=False)
fig_deaths_nyc.update_yaxes(dict(range=[0, ymax_total]),secondary_y=True);
fig_deaths_nyc.update_layout(margin={"r":0,"t":30,"l":0,"b":0},font=dict(size=18))

In [None]:
fig_ff_cases = make_subplots(specs=[[{"secondary_y": True}]],subplot_titles=['Coronavirus cases in Fairfield'])

fig_ff_cases.add_trace(
    go.Scatter(
        x=df_ff['date'][df_ff['date']>'2020-03-15'],
        y=df_ff['cases'][df_ff['date']>'2020-03-08'].\
          shift().rolling(window=7).mean().dropna().apply(lambda x: int(x)),
        name="Total number of cases <br> 7-day average",line=dict(width=4)),
        secondary_y=True)

fig_ff_cases.add_trace(
    go.Bar(
        x=df_ff['date'][df_ff['date']>'2020-03-15'],
        y=df_ff['new_cases'][df_ff['date']>'2020-03-15'],
        name="Daily new cases"),
        secondary_y=False)

fig_ff_cases.add_trace(
    go.Scatter(
        x=df_ff['date'][df_ff['date']>'2020-03-15'],
        y=df_ff['new_cases'][df_ff['date']>'2020-03-08'].\
          shift().rolling(window=7).mean().dropna().apply(lambda x: int(x)),
        name="Daily new cases <br> 7-day average",line=dict(width=4)),
        secondary_y=False)

fig_ff_cases.update_xaxes(title_text="Date");
fig_ff_cases.update_layout(margin={"r":0,"t":30,"l":0,"b":0},font=dict(size=18))

In [None]:
fig_ff_deaths = make_subplots(specs=[[{"secondary_y": True}]],subplot_titles=['Coronavirus deaths in Fairfield'])

fig_ff_deaths.add_trace(
    go.Scatter(
        x=df_ff['date'][df_ff['date']>'2020-03-15'],
        y=df_ff['deaths'][df_ff['date']>'2020-03-08'].\
          shift().rolling(window=7).mean().dropna().apply(lambda x: int(x)),
        name="Total number of Deaths <br> 7-day average",line=dict(width=4)),
        secondary_y=True)

fig_ff_deaths.add_trace(
    go.Bar(
        x=df_ff['date'][df_ff['date']>'2020-03-15'],
        y=df_ff['new_deaths'][df_ff['date']>'2020-03-15'],
        name="Daily new deaths",),
        secondary_y=False)

fig_ff_deaths.add_trace(
    go.Scatter(
        x=df_ff['date'][df_ff['date']>'2020-03-15'],
        y=df_ff['new_deaths'][df_ff['date']>'2020-03-08'].\
          shift().rolling(window=7).mean().dropna().apply(lambda x: int(x)),
        name="Daily new deaths <br> 7-day average",line=dict(width=4)),
        secondary_y=False)

ymax_daily = np.ceil(df_ff['new_deaths'].max()/100)*100
ymax_total = np.ceil(df_ff['deaths'][df_ff['date']>'2020-03-08'].shift().rolling(window=7).\
                                                                  mean().dropna().max()/1000)*1000
fig_ff_deaths.update_xaxes(title_text="Date")
fig_ff_deaths.update_yaxes(dict(range=[0, ymax_daily]),secondary_y=False)
fig_ff_deaths.update_yaxes(dict(range=[0, ymax_total]),secondary_y=True);
fig_ff_deaths.update_layout(margin={"r":0,"t":30,"l":0,"b":0},font=dict(size=18))

### Geographical plotting

#### Preparing dataframes for geographical plotting

In [None]:
"""
The NYTimes dataset does not contain seperate information for each county in NYC. These are created here. 
The ratios of cases in each county are approximate, taken from NYS website.
"""
df_geo = df.copy()

for x,fips in enumerate(df_geo['fips']):
    if fips != fips:
        df_geo['fips'].iloc[x] = '36061'
        
NY = df_geo[df_geo['date'] == df_geo['date'].max()][(df_geo['county'] == 'New York City') |
                                                    (df_geo['county'] == 'New York')]
NY['county'] = 'New York'
NY['cases'] = int(NY['cases']*.147)
NY['deaths'] = int(NY['deaths']*.147)

Bronx = df_geo[df_geo['date'] == df_geo['date'].max()][df_geo['county'] == 'New York City']
Bronx['county'] = 'Bronx'
Bronx['fips'] = '36005'
Bronx['cases'] = int(Bronx['cases']*.194)
Bronx['deaths'] = int(Bronx['deaths']*.194)

Kings = df_geo[df_geo['date'] == df_geo['date'].max()][df_geo['county'] == 'New York City']
Kings['county'] = 'Kings'
Kings['fips'] = '36047'
Kings['cases'] = int(Kings['cases']*.287)
Kings['deaths'] = int(Kings['deaths']*.287)

Queens = df_geo[df_geo['date'] == df_geo['date'].max()][df_geo['county'] == 'New York City']
Queens['county'] = 'Queens'
Queens['fips'] = '36081'
Queens['cases'] = int(Queens['cases']*.291)
Queens['deaths'] = int(Queens['deaths']*.291)

Richmond = df_geo[df_geo['date'] == df_geo['date'].max()][df_geo['county'] == 'New York City']
Richmond['county'] = 'Richmond'
Richmond['fips'] = '36085'
Richmond['cases'] = int(Richmond['cases']*0.081)
Richmond['deaths'] = int(Richmond['deaths']*0.081)

df_geo = df_geo.append([NY,Bronx,Kings,Queens,Richmond],ignore_index=True)
df_geo.drop(index=df_geo[df_geo['date'] == df_geo['date'].max()][df_geo['county'] == 'New York City'].index[0],
            inplace=True)

df_nyct = df_geo[(df_geo['date'] == df_geo['date'].max()) & 
                 ((df_geo['state'] == 'New York') | 
                 (df_geo['state'] == 'Connecticut')) &~
                 (df_geo['county'] == 'Unknown')].reset_index()
df_nyct.drop('index',axis=1,inplace=True)

df_NE = df_geo[(df_geo['date'] == df_geo['date'].max()) & 
               ((df_geo['state'] == 'New York') | 
               (df_geo['state'] == 'Connecticut') |
               (df_geo['state'] == 'Massachusetts') |
               (df_geo['state'] == 'Maine') |
               (df_geo['state'] == 'New Hampshire') |
               (df_geo['state'] == 'Vermont') |
               (df_geo['state'] == 'New Jersey') |
               (df_geo['state'] == 'Pennsylvania') |
               (df_geo['state'] == 'Rhode Island')) &~
               (df_geo['county'] == 'Unknown')].reset_index()
df_NE.drop('index',axis=1,inplace=True)

#### Get county information and fips for choropleth plotting

In [None]:
with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    counties = json.load(response)

#### Preparing geographical plots of NY/CT and Northeast

In [None]:
fig_cases_nyct = px.choropleth(df_nyct, geojson=counties, locations='fips', color='cases',
                               projection="mercator", hover_data=['state','county'],
                               title='Total number of coronavirus cases in NY and CT',
                               color_continuous_scale="RdBu_r",
                               range_color=(0, np.round(df_nyct['cases'].max()/100000)*100000))

fig_cases_nyct.update_geos(fitbounds="locations", visible=False)
fig_cases_nyct.update_layout(margin={"r":0,"t":50,"l":0,"b":0},font=dict(size=18))

In [None]:
fig_deaths_nyct = px.choropleth(df_nyct, geojson=counties, locations='fips', color='deaths',
                                projection="mercator", hover_data=['state','county'],
                                title='Total number of coronavirus deaths in NY and CT',
                                color_continuous_scale="RdBu_r",
                                range_color=(0, np.round(df_nyct['deaths'].max()/4/1000)*1000))

fig_deaths_nyct.update_geos(fitbounds="locations", visible=False)
fig_deaths_nyct.update_layout(margin={"r":0,"t":50,"l":0,"b":0},font=dict(size=18))

In [None]:
fig_cases_ne = px.choropleth(df_NE, geojson=counties, locations='fips', color='cases',
                             projection="mercator", hover_data=['state','county'],
                             title='Total number of coronavirus cases in Northeastern US',
                             color_continuous_scale="RdBu_r",
                             range_color=(0, np.round(df_NE['cases'].max()/4/10000)*10000))
fig_cases_ne.update_geos(fitbounds="locations", visible=False)
fig_cases_ne.update_layout(margin={"r":0,"t":50,"l":0,"b":0},font=dict(size=18))

In [None]:
fig_deaths_ne = px.choropleth(df_NE, geojson=counties, locations='fips', color='deaths',
                              projection="mercator", hover_data=['state','county'],
                              title='Total number of coronavirus deaths in Northeastern US',
                              color_continuous_scale="RdBu_r",
                              range_color=(0, np.round(df_NE['deaths'].max()/8/1000)*1000))
fig_deaths_ne.update_geos(fitbounds="locations", visible=False)
fig_deaths_ne.update_layout(margin={"r":0,"t":50,"l":0,"b":0},font=dict(size=18))
#fig_deaths_ne.show(renderer='browser')

#### Prepare data from all US

In [None]:
df_US = df_geo[(df_geo['date'] == df_geo['date'].max()) &~
                 (df_geo['county'] == 'Unknown')].reset_index()
df_US.drop('index',axis=1,inplace=True)
df_US.dropna(inplace=True)

In [None]:
fig_cases_us = px.choropleth(df_US, geojson=counties, locations='fips', color='cases',
                             hover_data=['state','county'],
                             title='Total number of coronavirus cases in US',
                             color_continuous_scale="RdBu_r",
                             range_color=(0, np.round(df_US['cases'].max()/10/50000)*50000),scope='usa')
fig_cases_us.update_layout(margin={"r":0,"t":50,"l":0,"b":0},font=dict(size=18))

In [None]:
fig_deaths_us = px.choropleth(df_US, geojson=counties, locations='fips', color='deaths',
                              hover_data=['state','county'],
                              title='Total number of coronavirus deaths in US',
                              color_continuous_scale="RdBu_r",
                              range_color=(0, np.round(df_US['deaths'].max()/20/1000)*1000),scope='usa')
fig_deaths_us.update_layout(margin={"r":0,"t":50,"l":0,"b":0},font=dict(size=18))

## Plots

In [None]:
fig_cases_nyc.show()#renderer='browser')

In [None]:
df_ny.iplot(x='date',y='cases',width=4,yaxis_type="log",
            title='Accumulated number of cases in NYC, logarithmic y-scale',
            xaxis_range=['2020-03-7',df_ny['date'].max()])

In [None]:
fig_deaths_nyc.show()#renderer='browser')

In [None]:
df_ny.iplot(x='date',y='deaths',width=4,yaxis_type="log",
            title='Accumulated number of deaths in NYC, logarithmic y-scale',
            xaxis_range=['2020-03-13',df_ny['date'].max()])

In [None]:
fig_ff_cases.show()#renderer='browser')

In [None]:
fig_ff_deaths.show()#renderer='browser')

In [None]:
fig_cases_nyct.show()#renderer='browser')

In [None]:
fig_deaths_nyct.show()#renderer='browser')

In [None]:
fig_cases_ne.show()#renderer='browser')

In [None]:
fig_deaths_ne.show()#renderer='browser')

In [None]:
fig_cases_us.show()#renderer='browser')

In [None]:
fig_deaths_us.show()#renderer='browser')