# Import modules

In [1]:
from math import inf
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd
from datetime import date, timedelta
import numpy as np

from itertools import chain

from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures

import statsmodels.api as sm

from tqdm.notebook import tqdm

# Function definition to preprocess and create new dataframe

In [2]:
def create_new_df(from_df, create_for, level):
    def state_pop(x):
        return np.ceil(x/x.population*county_wise_popn[x.name[0]]).astype('int')

    temp_df = from_df.copy()
    level, groupby = ('county', ['state', 'county']) if level.lower() == "county" else ('State', ['state'])
    opposite = "_deaths" if create_for == "cases" else "_cases"
    type_data = temp_df[[i for i in temp_df.columns if opposite not in i]]
    type_data.columns = [i.replace("_{0}".format(create_for), "") for i in type_data.columns]
    type_data = pd.concat([type_data.iloc[:, :6], type_data.iloc[:, 5:].diff(axis=1).iloc[:, 1:]], axis=1)
    # Replace all negative values with 0
    temp = type_data.iloc[:,5:]
    temp[temp < 0] = 0
    type_data = pd.concat([type_data.iloc[:, :5], temp], axis=1)
    if level == "State":
        type_data = type_data.groupby(['State']).sum().drop(['countyFIPS', 'StateFIPS'], axis=1)
        max_popn = type_data.population.max()
        temp = pd.concat([type_data.iloc[:,0], np.ceil(type_data.apply(lambda x: x/x[0]*max_popn, axis=1)).astype('int').iloc[:, 1:]], axis=1)
        temp = temp.reset_index().set_index(['State', 'population'])
        final_data = type_data.reset_index().set_index(['State', 'population'])
    else:
        type_data.drop(type_data[type_data.population == 0].index, inplace=True)
        type_data.drop(['countyFIPS', 'StateFIPS'], axis=1, inplace=True)
        type_data.set_index(['State', 'County Name'], inplace=True)
        county_wise_popn = temp_df.groupby(['State']).max().population.to_dict()
        temp = pd.concat([type_data[type_data.population != 0][['population']], type_data[type_data.population != 0].apply(state_pop, axis=1).iloc[:,1:]], axis=1)
        temp = temp.reset_index().set_index(['State', 'County Name', 'population'])
        final_data = type_data.reset_index().set_index(['State', 'County Name', 'population'])
    final_data = final_data.stack().reset_index()
    temp = temp.stack().reset_index()
    final_data.columns = groupby + ['population', 'date', create_for]
    indices = []
    roll_avg = []
    for state, group in final_data.groupby(groupby):
        first = group[group[create_for] != 0].index
        indices += list(range(group.index[0], first[0]) if not first.empty else [])
        roll_avg += [np.ceil(group[create_for].rolling(window=7, min_periods=1).mean()).astype('int')]
    final_data['roll_avg_7'] = pd.concat(roll_avg)
    final_data.drop(indices, inplace=True)
    temp.drop(indices, inplace=True)
    final_data['linear_norm'] = temp[0]
    final_data.reset_index(drop=True, inplace=True)
    final_data['days'] = final_data.groupby(groupby).cumcount() + 1
    final_data['log_norm'] = np.log(final_data['roll_avg_7'])
    return final_data

# Function definition to find best degree for polynomial features in polynomial regression

In [3]:
def choose_best_degree(pd_df, fit_for):
    temp = pd_df.copy()
    rmse = inf
    req = 0
    for i in range(20):
        polynomial_features = PolynomialFeatures(degree=i)
        y = temp[fit_for]
        xp = polynomial_features.fit_transform(temp.days.values.reshape(temp.days.shape[0], 1))
        pm = sm.OLS(y.values.reshape(y.shape[0], 1), xp).fit()
        temp['poly_pred'] = np.ceil(pm.predict(xp))
        temp_rmse = np.sqrt(mean_squared_error(temp[fit_for], temp.poly_pred))
        if temp_rmse < rmse:
            rmse = temp_rmse
            req = i
    return req

# Function definition to fit data for polynomial regression

In [4]:
def poly_fit(data, fit_for):
    temp = data.copy()
    go_objs = []
    degree = choose_best_degree(temp, fit_for)
    polynomial_features = PolynomialFeatures(degree=degree)
    y = temp[fit_for]
    xp = polynomial_features.fit_transform(temp.days.values.reshape(temp.days.shape[0], 1))
    poly_model = sm.OLS(y.values.reshape(y.shape[0], 1), xp).fit()
    temp['poly_pred'] = np.ceil(poly_model.predict(xp))
    return temp

# Function definition to fit data for linear regression

In [5]:
def linear_fit(data, fit_for):
    temp = data.copy()
    lr_model_cases = sm.OLS(temp[fit_for], temp.days).fit()
    temp['lr_pred'] = np.ceil(lr_model_cases.predict(temp.days))
    return temp

# Load dataset

In [6]:
super_dataset = pd.read_csv("../../../data/stage_IV/superDataset.csv")
state_names = pd.read_csv("../../../data/stage_I/name-abbr.csv", header=None, names=['label', 'value'])

# Call above defined functions to create country, state and county level dataframes

In [7]:
all_dfs = []
pbar = tqdm(total=4)
for i in ['cases', 'deaths']:
    for j in ['state', 'county']:
        pbar.set_description(desc="Creating {0}-wise {1} dataframe".format(i, j))
        temp = create_new_df(super_dataset, i, j)
        x = []
        for state, group in temp.groupby(['state'] + [j] if j == 'county' else [j]):
            new = poly_fit(linear_fit(group, i), 'roll_avg_7')
            x.append(new)
        all_dfs.append(pd.concat(x))
        pbar.update(1)
pbar.close()
state_df = {"cases": all_dfs[0], "deaths": all_dfs[2]}
county_df = {"cases": all_dfs[1], "deaths": all_dfs[3]}
us_df = {}
for i in ['cases', 'deaths']:
    temp = state_df[i].groupby('date').sum()[[i, 'roll_avg_7']]
    temp['days'] = np.arange(1, temp.shape[0]+1)
    temp.reset_index(inplace=True)
    us_df.update({i: poly_fit(linear_fit(temp, i), 'roll_avg_7')})

  0%|          | 0/4 [00:00<?, ?it/s]

  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)
  result = getattr(ufunc, method)(*inputs, **kwargs)


# Create dashboard

In [107]:
app = JupyterDash(__name__, external_stylesheets=['https://codepen.io/chriddyp/pen/bWLwgP.css'])

# Create dashboard layout

In [108]:
app.layout = html.Div([
    html.H1("COVID - 19 Data Visualization", style={'text-align': 'center'}),
    dcc.Dropdown(
        id='state_dropdown',
        options=[{'label': 'All States', 'value': 'all'}] + state_names.to_dict('records'),
        value='all'
    ),
    dcc.Dropdown(
        id='county_dropdown',
        placeholder='Select a county',
    ),
    dcc.DatePickerSingle(
        id='pre_date_picker',
        min_date_allowed=[i for i in super_dataset.columns if "_cases" in i][0].replace("_cases", ""),
        max_date_allowed=date.today(),
        date = [i for i in super_dataset.columns if "_cases" in i][0].replace("_cases", ""),
    ),
    dcc.DatePickerSingle(
        id='post_date_picker',
        min_date_allowed=[i for i in super_dataset.columns if "_cases" in i][0].replace("_cases", ""),
        max_date_allowed=date.today(),
        date=date.today()
    ),
    dcc.RadioItems(
        id='graph_type_picker',
        options=[
                {'label' : 'Plot', 'value' : 'Plot'},
                {'label' : 'Map', 'value': 'Map'}
            ],
        value='Plot'
    ),
    dcc.Dropdown(
        id='data_normalization',
        placeholder = 'Select the normalization',
        options=[
            {'label' : 'Linear', 'value' : 'Linear'},
            {'label' : 'Log', 'value' : 'Log'}
        ],   
    ),
    dcc.RadioItems(
        id='data_type_picker',
        options=[
            {'label' : 'Cases', 'value' : 'cases'},
            {'label' : 'Deaths', 'value' : 'deaths'}
        ],
        value='cases'
    ),
    dcc.Dropdown(
        id='prediction',
        placeholder = 'Select the Prediction Model',
        options=[
            {'label' : 'Linear Model', 'value' : 'Linear'},
            {'label' : 'Non-Linear Model', 'value' : 'Non-Linear Model'}
        ],
    ),
    dcc.DatePickerSingle(
        id='prediction_date_picker',
        date=date(2021,4,26)
    ),
    dcc.Checklist(
        id = 'roll_avg_checklist',
        options = [
            {'label' : ' 7-day moving average', 'value': 'true'},
        ],
        value=['true']
    ),
    html.P(
        id='para_text',
        children='Dash converts Python classes into HTML'
    ),
    dcc.Graph(
        id = 'viz_graph',
        figure = {
            
        },
    ),
])

# Callback for state and county dropdown

In [109]:
@app.callback(
    [Output(component_id='county_dropdown', component_property='options'),
     Output(component_id='county_dropdown', component_property='disabled')],
    [Input(component_id='state_dropdown', component_property='value')]
)
def update_county(value):
    county_disabled = True
    if not value:
        ret = []
    elif value == 'all':
        ret = []
    else:
        county_disabled = False
        placeholder = "Select a county"
        ret = [{'label': 'All Counties', 'value': 'all'}] + [{'label': i, 'value': i} for i in super_dataset[(super_dataset.State == value)&(super_dataset.countyFIPS != 0)]['County Name']]
    return ret, county_disabled

@app.callback(
    Output(component_id='county_dropdown', component_property='value'),
    [Input(component_id='county_dropdown', component_property='options')]
)
def update_county_vals(value):
    return 'all'

# Callback for graphs

In [110]:
@app.callback(
    [
        Output(component_id = 'viz_graph', component_property = 'figure'),
        Output(component_id='para_text', component_property='children')
    ],
    [
        Input(component_id='state_dropdown', component_property='value'),
        Input(component_id='county_dropdown', component_property='value'),
        Input(component_id='pre_date_picker', component_property='date'),
        Input(component_id='post_date_picker', component_property='date'),
        Input(component_id='data_type_picker', component_property='value'),
        Input(component_id='roll_avg_checklist', component_property='value'),
    ]
)

def update_graph(state, county, start_date, end_date, data_type, roll_avg_state):
    if state == 'all':
        df = us_df[data_type]
        state = "United States".upper()
    else:
        if county == "all":
            df = state_df[data_type]
            df = df[df.state == state]
        else:
            df = county_df[data_type]
            df = df[(df.state == state) & (df.county == county)]
        state = state_names[state_names.value == state].label.values[0].upper()
    df = df[(df.date >= start_date) & (df.date <=end_date)]
    fig = go.Figure()
    fig.add_trace(
        go.Bar(x=df.date, y=df[data_type], name=data_type, marker=dict(color='silver')),
    )
    if roll_avg_state == ['true']:
        fig.add_trace(
            go.Scatter(x=df.date, y=df.roll_avg_7, mode='lines', name='7 - day moving avg', marker=dict(color='blue')),
        )
    fig.update_layout(
        title="COVID 19 {0} across {1} from {2} to {3}".format(data_type, state, start_date, end_date),
        title_x=0.5,
        xaxis_title="Date",
        yaxis_title="Frequency (NEW {0} per day)".format(data_type.upper()),
        legend=dict(orientation="h", yanchor="bottom", y=1, x=0.5),
        height=600
    )
    return fig, str(type(roll_avg_state))

# Run server

In [111]:
app.run_server(mode="external", debug=True, port=8085, use_reloader=False)

Dash app running on http://127.0.0.1:8085/
