# NYC Covid-19 Dashboard

## NYC Covid Data

For this task, we are going to scrape publically available covid-19 data in NYC.

In [1]:
import os
import csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime
import seaborn as sns # Using seaborn for visualization
import plotly.express as px
plt.rcParams['figure.figsize'] = [18, 6]

In [2]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

### COVID-19 Outcomes by Testing Cohorts: Cases, Hospitalizations, and Deaths
This will be the dataset to develop the overall NYC timeseries for Covid positivity

#### Import Data

In [3]:
chd = pd.read_csv('https://data.cityofnewyork.us/resource/cwmx-mvra.csv')

In [4]:
chd.head()

Unnamed: 0,extract_date,specimen_date,number_tested,number_confirmed,number_hospitalized,number_deaths
0,2020-08-27T00:00:00.000,2020-04-24T00:00:00.000,9587,2532,511,173
1,2020-08-27T00:00:00.000,2020-06-11T00:00:00.000,13428,291,42,5
2,2020-08-27T00:00:00.000,2020-04-02T00:00:00.000,9402,5813,1902,717
3,2020-08-27T00:00:00.000,2020-03-14T00:00:00.000,1706,696,237,67
4,2020-08-27T00:00:00.000,2020-08-15T00:00:00.000,8546,116,17,1


##### Data Structure and Schema

| Column Name            | Description                                                       | Data Type      |
| :-----------           | :-----------                                                      | :-----------   |
| extract_date           | Date of extraction from live disease surveillance database        | Date & Time    |
| specimen_date          | Date of specimen collection, equivalent to diagnosis date         | Date & Time    |
| number_tested          | Count of NYC residents newly tested for SARS-CoV-2                | Number         |
| number_confirmed       | Count of patients tested who were confirmed to be COVID-19 cases  | Number         |
| number_hospitalized    | Count of confirmed COVID-19 cases among patients ever hospitalized| Number         |
| number_deaths          | Count of confirmed COVID-19 cases among patients who died         | Number         |

#### Data Cleanup

Convert specimen_date to dt

In [5]:
chd['specimen_date_dt'] = [pd.to_datetime(row).strftime('%Y-%m-%d') for row in chd['specimen_date']]
chd['dt'] = [pd.to_datetime(row).strftime('%Y-%m-%d') for row in chd['extract_date']]

Select latest extract date

In [6]:
chd.head()

Unnamed: 0,extract_date,specimen_date,number_tested,number_confirmed,number_hospitalized,number_deaths,specimen_date_dt,dt
0,2020-08-27T00:00:00.000,2020-04-24T00:00:00.000,9587,2532,511,173,2020-04-24,2020-08-27
1,2020-08-27T00:00:00.000,2020-06-11T00:00:00.000,13428,291,42,5,2020-06-11,2020-08-27
2,2020-08-27T00:00:00.000,2020-04-02T00:00:00.000,9402,5813,1902,717,2020-04-02,2020-08-27
3,2020-08-27T00:00:00.000,2020-03-14T00:00:00.000,1706,696,237,67,2020-03-14,2020-08-27
4,2020-08-27T00:00:00.000,2020-08-15T00:00:00.000,8546,116,17,1,2020-08-15,2020-08-27


##### Add NYC data label to a new column called Boro

In [7]:
chd['boro'] = 'NYC'

In [8]:
# Get max dt of data load
chd = chd[(chd['dt'] == chd['dt'].max())]

##### Sort dataframe by specimen_date

In [9]:
chd.sort_values(by=['specimen_date'], inplace=True)
chd.reset_index(inplace=True, drop=True)

##### Add % Tested Positive

In [10]:
chd['pct_tested_positive'] = chd['number_confirmed']/chd['number_tested']

##### Add Running Total of Tests, Confirmed Tests & Deaths

In [11]:
chd['cumsum_number_tested'] = chd['number_tested'].cumsum()

In [12]:
chd['cumsum_number_confirmed'] = chd['number_confirmed'].cumsum()

In [13]:
chd['cumsum_number_deaths'] = chd['number_deaths'].cumsum()

In [14]:
chd['cumsum_number_hospitalized'] = chd['number_hospitalized'].cumsum()

##### Add Percentage Total of Tests Confirmed Tests Cumulative

In [15]:
chd['cumsum_pct_tested_positive'] = chd['cumsum_number_confirmed']/chd['cumsum_number_tested']

##### Add Rolling 7-Day Moving Averages of Positve Test Percentage

In [16]:
chd['7-day_rolling_avg_pct_tested_positive'] = chd['number_confirmed'].rolling(7).sum()/chd['number_tested'].rolling(7).sum()

In [17]:
chd.head()

Unnamed: 0,extract_date,specimen_date,number_tested,number_confirmed,number_hospitalized,number_deaths,specimen_date_dt,dt,boro,pct_tested_positive,cumsum_number_tested,cumsum_number_confirmed,cumsum_number_deaths,cumsum_number_hospitalized,cumsum_pct_tested_positive,7-day_rolling_avg_pct_tested_positive
0,2020-08-27T00:00:00.000,2020-02-02T00:00:00.000,2,0,2,0,2020-02-02,2020-08-27,NYC,0.0,2,0,0,2,0.0,
1,2020-08-27T00:00:00.000,2020-02-04T00:00:00.000,1,0,0,0,2020-02-04,2020-08-27,NYC,0.0,3,0,0,2,0.0,
2,2020-08-27T00:00:00.000,2020-02-05T00:00:00.000,1,0,1,0,2020-02-05,2020-08-27,NYC,0.0,4,0,0,3,0.0,
3,2020-08-27T00:00:00.000,2020-02-08T00:00:00.000,1,0,1,0,2020-02-08,2020-08-27,NYC,0.0,5,0,0,4,0.0,
4,2020-08-27T00:00:00.000,2020-02-26T00:00:00.000,1,0,1,0,2020-02-26,2020-08-27,NYC,0.0,6,0,0,5,0.0,


In [18]:
boro = 'NYC'
container = "The boro chosen by user was: " + (boro)
container

'The boro chosen by user was: NYC'

In [30]:
chd[(chd['7-day_rolling_avg_pct_tested_positive'].notnull())]

Unnamed: 0,extract_date,specimen_date,number_tested,number_confirmed,number_hospitalized,number_deaths,specimen_date_dt,dt,boro,pct_tested_positive,cumsum_number_tested,cumsum_number_confirmed,cumsum_number_deaths,cumsum_number_hospitalized,cumsum_pct_tested_positive,7-day_rolling_avg_pct_tested_positive
6,2020-08-27T00:00:00.000,2020-03-01T00:00:00.000,1,0,0,0,2020-03-01,2020-08-27,NYC,0.000000,8,1,0,5,0.125000,0.125000
7,2020-08-27T00:00:00.000,2020-03-02T00:00:00.000,5,0,3,0,2020-03-02,2020-08-27,NYC,0.000000,13,1,0,8,0.076923,0.090909
8,2020-08-27T00:00:00.000,2020-03-03T00:00:00.000,13,2,7,0,2020-03-03,2020-08-27,NYC,0.153846,26,3,0,15,0.115385,0.130435
9,2020-08-27T00:00:00.000,2020-03-04T00:00:00.000,32,6,16,2,2020-03-04,2020-08-27,NYC,0.187500,58,9,2,31,0.155172,0.166667
10,2020-08-27T00:00:00.000,2020-03-05T00:00:00.000,63,5,25,1,2020-03-05,2020-08-27,NYC,0.079365,121,14,3,56,0.115702,0.120690
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
180,2020-08-27T00:00:00.000,2020-08-22T00:00:00.000,6234,61,9,0,2020-08-22,2020-08-27,NYC,0.009785,1965406,228668,19478,57082,0.116346,0.011078
181,2020-08-27T00:00:00.000,2020-08-23T00:00:00.000,4588,56,3,0,2020-08-23,2020-08-27,NYC,0.012206,1969994,228724,19478,57085,0.116104,0.010806
182,2020-08-27T00:00:00.000,2020-08-24T00:00:00.000,8856,80,4,0,2020-08-24,2020-08-27,NYC,0.009033,1978850,228804,19478,57089,0.115625,0.010334
183,2020-08-27T00:00:00.000,2020-08-25T00:00:00.000,3060,46,2,1,2020-08-25,2020-08-27,NYC,0.015033,1981910,228850,19479,57091,0.115469,0.010427


##### Export Data to Local Folder

In [17]:
chd.to_csv('data_output/chd.csv', index=False)

#### Build Dashboard

##### App Layout

In [33]:
app = dash.Dash(__name__)
app.layout = html.Div([
    
    html.H1("NYC COVID-19 Overall Confirmed Cases", style={"text-align":"left"}),
    
    dcc.Dropdown(id="select_boro",
                options=[
                    {"label": "NYC", "value":'NYC'},
                    {"label": "Brooklyn", "value":'Brooklyn'},
                    {"label": "Bronx", "value":'Bronx'},
                    {"label": "Manhattan", "value":'Manhattan'},
                    {"label": "Queens", "value":'Queens'},
                    {"label": "Staten Island", "value":'SI'}],
                 value= "NYC",
                 #multi-False,
                 style={"width":"40%"}
                ),
    html.Div(id="output_container", children=[]),
    html.Br(),
    dcc.Graph(id="covid_19_chart", figure={})
    
    
])

if __name__ == '__main__':
    app.run_server(debug=True)

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


---

### COVID-19 Outcomes by Testing Cohorts: Cases, Hospitalizations, and Deaths by Boro and Cohort

#### Import Data

In [18]:
chd_boro = pd.read_csv('https://raw.githubusercontent.com/nychealth/coronavirus-data/master/boro/boroughs-case-hosp-death.csv')

In [21]:
chd_boro.tail()

Unnamed: 0,DATE_OF_INTEREST,BK_CASE_COUNT,BK_HOSPITALIZED_COUNT,BK_DEATH_COUNT,BX_CASE_COUNT,BX_HOSPITALIZED_COUNT,BX_DEATH_COUNT,MN_CASE_COUNT,MN_HOSPITALIZED_COUNT,MN_DEATH_COUNT,QN_CASE_COUNT,QN_HOSPITALIZED_COUNT,QN_DEATH_COUNT,SI_CASE_COUNT,SI_HOSPITALIZED_COUNT,SI_DEATH_COUNT
174,08/21/2020,61,7,0,41,14,0,40,1,0,48,5,2,10,0,0
175,08/22/2020,19,0,1,16,4,1,17,5,1,14,3,1,4,0,1
176,08/23/2020,24,5,0,12,2,3,4,0,0,19,4,0,3,0,0
177,08/24/2020,23,0,1,16,1,1,20,0,0,14,0,0,4,0,0
178,08/25/2020,4,0,0,2,0,0,1,0,0,1,0,0,0,0,0


In [20]:
chd_boro['BK_CASE_COUNT'].sum() + chd_boro['BX_CASE_COUNT'].sum() + chd_boro['MN_CASE_COUNT'].sum() + chd_boro['QN_CASE_COUNT'].sum() + chd_boro['SI_CASE_COUNT'].sum()

228902

In [None]:
chd_boro['BK_CASE_COUNT'].sum() + chd_boro['BX_CASE_COUNT'].sum() + chd_boro['MN_CASE_COUNT'].sum() + chd_boro['QN_CASE_COUNT'].sum() + chd_boro['SI_CASE_COUNT'].sum()