# Enhancement

## 1. Retrieve Data from MongoDB
Retreive all documents from MongoDB and convrt it to a `pandas.DataFrame`

In [1]:
import pymongo
client = pymongo.MongoClient()

## 2. Dash in JupyterLab

In [41]:
# Build AppViewer 
from jupyterlab_dash import AppViewer
viewer = AppViewer()

from utils import get_state_codes, get_state_name, daily_increase, moving_average
from utils import all_states, state_code_dict, state_map_dict, fip_to_county, fip_to_state

import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

import json
import numpy as np
import pandas as pd
from functools import reduce
from datetime import datetime
from urllib.request import urlopen

import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

from database import fetch_all_db_as_df

# Definitions of constants. This projects uses extra CSS stylesheet at `./assets/style.css`
COLORS = ['rgb(67,67,67)', 'rgb(115,115,115)', 'rgb(49,130,189)', 'rgb(189,189,189)']
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css', '/assets/style.css']
colors = { 
"cases": 'rgba(80, 26, 80, 0.2)', 
"deaths": 'rgba(16, 112, 2, 0.2)'
}
colors_bar = { 
"cases": 'mediumturquoise', 
"deaths": 'tomato'
}
colors_line = {
    "cases": "mediumturquoise",
    "deaths": "tomato"
}
colors_text = {
    "cases": "purple",
    "deaths": "olivedrab"   
}

with urlopen('https://raw.githubusercontent.com/plotly/datasets/master/geojson-counties-fips.json') as response:
    county_json = json.load(response)

# Define the dash app first
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

df_dict = fetch_all_db_as_df()


# Define component functions

def page_header():
    """
    Returns the page header as a dash `html.Div`
    """
    return html.Div(id='header', children=[
        html.Div([html.H3('DATA1050 Final Project')],
                 className="ten columns"),
        html.A([html.Img(id='logo', src=app.get_asset_url('github.png'),
                         style={'height': '35px', 'paddingTop': '7%'}),
                html.Span('Old boy', style={'fontSize': '2rem', 'height': '35px', 'bottom': 0,
                                                'paddingLeft': '4px', 'color': '#a3a7b0',
                                                'textDecoration': 'none'})],
               className="two columns row",
               href='https://github.com/cengc13/data1050-final-project'),
    ], className="row")


def project_description():
    """
    Returns overall project description in markdown
    """
    return html.Div(children=[dcc.Markdown('''
        # US COVID-19 Tracker
        
        The coronavirus pandemic has caused more than one and half million deaths over the world. 
        The COVID-19 has exhausted the United States, and it seems a dark and deadly winter is waiting ahead. 
        Therefore, it is of crucial importance to understand and project the trend of COVID-19 cases in US 
        so that policy-makers can come up with short-term and long-term strategies to limit the spread and
        mitigate the effect of another outbreak in the near future.

        **US COVID-19 tracker is also a "What-If" tool to assist making strategies.**
        It can be used to understand and project the trend if more precautions and restrictions are imposed.

        ## Data Source
        Covid-19 tracker mainly utilizes historical and live covid-19 data from 
        [New York Times github repository](https://github.com/nytimes/covid-19-data).
        The hirarchical case and death [data](https://raw.githubusercontent.com/nytimes/covid-19-data/master/us.csv) 
        **is regularly updated every day**.
        
        Also, the data for state and county population is merged to obtain the positive rate over population at different 
        geographical levels.
        
        Additionally, the [survey data](https://raw.githubusercontent.com/nytimes/covid-19-data/master/mask-use/mask-use-by-county.csv) 
        by New York time on maks use by county is investigated to see if there exists a 
        correlation between the outbreak and mask use frequency in each state. All data sets in this project are well-structured.
        ''', className='eleven columns', style={'paddingLeft': '5%'})], className="row")


def visualization_description():
    """
    Returns the text and plots of EDA and interactive visualization of this project.
    """
    return html.Div(children=[
      dcc.Markdown('''
            ## EDA & Interactive Visualization
            This project uses `Dash` and `Plotly` for visualization. We use the 
            high-level components/tools in Dash to provide compact figures which allow user
            to choose what to display. For example, we utilized radio items to select targets 
            (case or death number), dropdown layout to select the state, and slider to select
            time points.
            
            Curve plots are used to show the time variation of cumulative and daily reported 
            cases and deaths for the  national-level and state-level covid-19 data. Heat maps are used
            to track the outbreak geographically. Bar/pie plots are mainly for comparison. 
            
        ''', className='row eleven columns', style={'paddingLeft': '5%'}),      
    ]
    )

def enhancement_description():
    """
    Returns the text and plots of Enhancements of this project.
    """
    return html.Div(children=[
      dcc.Markdown('''
      ## Enhancement
      Public health experts suggest that face coverings can substantially slow the transmission
      of covid. In this section, we firstly use a heatmap to show the propensity of people to wear
      masks in each county. This heat map is based on the survey data from a large number of interviews
      conducted by the global data and survey firm Dynata at the request of The New York Times.

      Next, we attempt to understand what factors might affect the spread of the pandamic in US states.
      For this analysis, we select two responsive variables including case fatality rate and infection rate,
      and two predictors, namely average wear-mask probability and population density. A simple and intuitive
      linear correlation analysis is conducted. For Covid data, we use the latest state-level data for demonstration.
      In order to obtain the state-level mask-use data, the county-level data is aggregated over states and we take
      the average of features in each state to get the state-level features.
            
        ''', className='row eleven columns', style={'paddingLeft': '5%'}),      
    ]
    )
    

# Defines the dependencies of interactive components
@app.callback(Output('time-series-total', 'figure'),
             Input('target-label', 'value'))
def time_series_cumulative(label):
    df = df_dict['covid-us']
    x = df['date']
    trace = go.Scatter(x=x, y=df[label], mode='lines', name=label, fill='tozeroy',
                       fillcolor=colors[label], 
                       line={'width': 2, 'color': colors[label]},
                       hovertemplate='%{x|%b %d, %Y} <br> %{y:-.0f}' 
                      )

    title = f'Cumulative Covid {label.lower()} in U.S. over time'
    layout = dict(title=title,
                  yaxis_title=f'# of {label}',
                  xaxis_title='Date/Time',
                  font=dict(family="Courier New, monospace",
                            size=16))
    data = [trace]
    fig = dict(data=data, layout=layout)
    return fig

@app.callback(Output('time-series-daily', 'figure'),
             Input('daily-label', 'value'))
def time_series_daily(label, window_size=7):
    df = df_dict['covid-us']
    x = df['date']
    daily = daily_increase(df[label])
    moving_avg = moving_average(daily, window_size)
    
    trace1 = go.Bar(x=x, y=daily, name=f'Daily new {label}',
                    marker = dict(color = colors_bar[label],
                                  line=dict(color=colors_bar[label],width=1.5),
                                  opacity=0.2),
                    hovertemplate='%{x|%b %d, %Y} <br>Daily: %{y:-.0f}' 
                   )
    trace2 = go.Scatter(x=x, y=moving_avg, 
                        name=f'Moving average in {window_size} days',
                        line={'width':3, 'color': colors_line[label]},
                        hovertemplate='Moving average: %{y:-.0f}' 
                        )
    
    title = f'Daily reported new Covid {label.lower()} in U.S. over time'
    layout = dict(title=title,
                  yaxis_title=f'# of {label} per day',
                  xaxis_title='Date/Time',
                  font=dict(family="Courier New, monospace",
                            size=16),
                  hoverlabel=dict(
                            bgcolor="white",
                            font_size=16,
                            font_family="Rockwell"),
                  hovermode='x Unified',
                  legend=dict(
                        yanchor="top",
                        y=0.99,
                        xanchor="left",
                        x=0.01)
                 )
    data = [trace1, trace2]
    fig = dict(data=data, layout=layout)
    return fig

@app.callback(Output('time-series-state', 'figure'),
            Input('plot-type', 'value'),
            Input('state-name', 'value'),
            Input('label-by-state', 'value'),
             )
def time_series_state(plot_type='daily', state_name='Rhode Island', label='cases',):
#     print(label, plot_type, state_name)
    df = df_dict['covid-us-state']
    df['state_code'] = df['state'].apply(lambda x: state_code_dict[x])
    state_code = state_code_dict[state_name]
    df_state = df[df.state_code == state_code]
    state = state_name
    df_state = df_state.sort_values(by='date')
    df_state = pd.DataFrame(df_state, columns=df_state.columns)
    x = df_state.date
    y = df_state[label].values
    if plot_type == 'daily':
        window_size = 7
        daily_cases = daily_increase(y)
        moving_avg = moving_average(daily_cases, window_size)
        trace_bar = go.Bar(x=x, y=daily_cases, name=f'Daily new {label}',
                    marker = dict(color = colors_bar[label],
                                  line=dict(color=colors_bar[label],width=1.5),
                                  opacity=0.2),
                    hovertemplate='Date: %{x|%A, %b %d, %Y} <br> Daily increase : %{y:.0f}'
                   )
        trace_line = go.Scatter(
            x=x,
            y=moving_avg, 
            name=f'Moving average in {window_size} days',
            line={'width':1.5, 'color': colors_line[label]},
            hovertemplate='7 Day Avg. : %{y:.0f}')
        title = f'Daily reported new Covid {label.lower()} in {state}'
        layout = dict(title=title,
              yaxis_title=f'# of {label} per day',
              xaxis_title='Date/Time',
              font=dict(family="Courier New, monospace",
                        size=16),
              hoverlabel=dict(
                bgcolor="white",
                font_size=16,
                font_family="Rockwell"),
              hovermode='x Unified',
              legend=dict(
                        yanchor="top",
                        y=0.99,
                        xanchor="left",
                        x=0.01))
        fig = dict(data=[trace_bar, trace_line], layout=layout)
        return fig
    elif plot_type == 'cumulative':
        trace = go.Scatter(x=x, y=y, mode='lines', name=label, fill='tozeroy',
                       fillcolor=colors[label], 
                       line={'width': 2, 'color': colors[label]},
                       hovertemplate='%{x|%b %d, %Y} <br> %{y:-.0f}' 
                      )

        title = f'Cumulative Covid {label.lower()} in {state}'
        layout = dict(title=title,
                      yaxis_title=f'Confirmed # of {label}',
                      xaxis_title='Date/Time',
                      font=dict(family="Courier New, monospace",
                                size=16))
        data = [trace]
        fig = dict(data=data, layout=layout)
        return fig

    
@app.callback(Output('heat-map-by-state', 'figure'),
              Input('label-radioitems', 'value'))
def heat_map(label):
    """Create the heap map of given label in US at the beginning of given month"""
    df = df_dict['covid-us-state']
    df['month'] = df.date.dt.month_name()
    df['state_code'] = df['state'].apply(lambda x: get_state_codes(x))
    df_month = df[((df.date.dt.day == 1) | (df.date == max(df.date)))]
    fig = px.choropleth(df_month, 
                    locations='state_code',
                    locationmode="USA-states",
                    scope="usa",
                    color=label, # a column in the dataset
                    hover_name='state', # column to add to hover information
                    hover_data = {'cases': ':.0f', 'deaths': ':.0f', 'state_code': False, 'month': False},
                    color_continuous_scale=px.colors.sequential.Sunsetdark if \
                        label == 'cases' else px.colors.sequential.Greys,
                    animation_group='state',
                    animation_frame='month'
                   )
    fig.update_layout(title_text=f"Heat Map - Total {label.title()} in US States"),
    fig.update_layout(margin={"r":0,"l":0,"b":0})
    fig.update_layout(transition_duration=500)
    
    last_frame_num = len(fig.frames) -1

    fig.layout['sliders'][0]['active'] = last_frame_num

    fig = go.Figure(data=fig['frames'][-1]['data'], frames=fig['frames'], layout=fig.layout)
    fig.update_coloraxes(colorbar_title=f"<b>Color</b><br>Confirmed {label.title()}")
    fig.layout.pop('updatemenus')
    return fig


def heat_map_mask_use():
    df = df_dict['mask-use-by-county']
    df['countyfp'] = df['countyfp'].apply(lambda x: str(int(x)).zfill(5))
    df['wear_mask_prob'] = 0.25 * df['rarely'] + 0.5 * df['sometimes'] + \
                0.75 * df['frequently'] + 1.0 * df['always']
    df['county'] = df.apply(lambda x: fip_to_county(x.countyfp), axis=1)
    df['state_code'] = df.apply(lambda x: fip_to_state(x.countyfp), axis=1)
    df = df.drop(df[df['state_code'] == 'N/A'].index).reset_index(drop=True)
    df['state'] = df['state_code'].apply(lambda x: state_map_dict[x])
    fig = px.choropleth(df,
                        locations='countyfp',
                        geojson=county_json,
                        scope="usa",
                        color='wear_mask_prob', # a column in the dataset
                        hover_name='state', # column to add to hover information
                        hover_data = {'county': True, 'countyfp': False, 'wear_mask_prob': ':.3f'},
                        color_continuous_scale=px.colors.sequential.Reds,
                       )
    fig.update_layout(title_text="Heat Map - Who is Wearing Masks in US Counties"),
    fig.update_coloraxes(colorbar_title="<b>Color</b><br>Wear Mask Prob")
    #fig.update(layout_coloraxis_showscale=False)
    fig.update_layout(margin={"r":0,"l":0,"b":0})
    return fig

def scatter_matrix():
    df = df_dict['covid-us-state']
    df.fips = df.fips.apply(lambda x: str(x).zfill(2))
    df = df[df.date == max(df.date)]
    df = df.drop(columns='date', axis=1).reset_index(drop=True)
    state_pop = df_dict['state-population']
    state_area =  df_dict['state-area']
    
    mask_use = df_dict['mask-use-by-county']
    mask_use.countyfp = mask_use.countyfp.apply(lambda x: str(x).zfill(5))
    mask_use['wear_mask_prob'] = 0.25 * mask_use['rarely'] + 0.5 * mask_use['sometimes'] + \
                    0.75 * mask_use['frequently'] + 1.0 * mask_use['always']
    mask_use['state_code'] = mask_use.apply(lambda x: fip_to_state(x.countyfp), axis=1)
    mask_use['county'] = mask_use.apply(lambda x: fip_to_county(x.countyfp), axis=1)
    df_agg = mask_use.groupby('state_code').agg(['mean'])
    df_agg.columns = ["_".join(x) for x in df_agg.columns.ravel()]
    df_agg.reset_index(inplace=True)
    df_agg.rename(columns={'wear_mask_prob_mean' : 'wear_mask_prob'}, inplace=True)
    df_agg = df_agg[['state_code', 'wear_mask_prob']]
    df_agg.drop(df_agg[df_agg['state_code'] == 'N/A'].index, inplace = True)
    df_agg.drop(df_agg[df_agg['state_code'] == 'DC'].index, inplace = True)
    df_agg['state'] = df_agg['state_code'].apply(lambda x: state_map_dict[x])
    df_agg = df_agg[['state', 'wear_mask_prob']]
    data_frames = [df, state_pop, state_area, df_agg]
    df_merged = reduce(lambda left, right: pd.merge(left,right,on=['state'],
                                                how='inner'), data_frames)
    
    df_merged['CFR'] = df_merged['deaths'] / df_merged['cases'] 
    df_merged['IR'] = df_merged['cases'] / df_merged['total']
    df_merged['PD'] = df_merged['total'] / df_merged['area']
    df_merged['WMP'] = df_merged['wear_mask_prob']
    df_ana = df_merged[['state', 'CFR', 'IR', 'PD', 'WMP']]
    df_ana[['CFR', 'IR', 'PD', 'WMP']] = np.round(df_ana[['CFR', 'IR', 'PD', 'WMP']], 3)
    
    fig = go.Figure(data=go.Splom(
                dimensions=[dict(label='CFR', # 'Fatality rate',
                                 values=df_ana['CFR']),
                            dict(label='IR', #'Infection rate',
                                 values=df_ana['IR']),
                            dict(label='PD', #'Population density',
                                 values=df_ana['PD']),
                            dict(label='WMP', #'Wear mask prob.',
                                 values=df_ana['WMP'])],
                text=df_ana['state'],
#                 hovertemplate="%{x}, %{y}",
                marker=dict(showscale=False, # colors encode categorical variables
                            line_color='white', line_width=0.5),
                showupperhalf=False,
                ))

    fig.update_layout(
    title='Scatter Matrix',
    dragmode='select',
    width=600,
    height=600,
    hovermode='closest',
    )
    return fig
    
    
def correlation_matrix():
    df = df_dict['covid-us-state']
    df.fips = df.fips.apply(lambda x: str(x).zfill(2))
    df = df[df.date == max(df.date)]
    df = df.drop(columns='date', axis=1).reset_index(drop=True)
    state_pop = df_dict['state-population']
    state_area =  df_dict['state-area']
    
    mask_use = df_dict['mask-use-by-county']
    mask_use.countyfp = mask_use.countyfp.apply(lambda x: str(x).zfill(5))
    mask_use['wear_mask_prob'] = 0.25 * mask_use['rarely'] + 0.5 * mask_use['sometimes'] + \
                    0.75 * mask_use['frequently'] + 1.0 * mask_use['always']
    mask_use['state_code'] = mask_use.apply(lambda x: fip_to_state(x.countyfp), axis=1)
    mask_use['county'] = mask_use.apply(lambda x: fip_to_county(x.countyfp), axis=1)
    df_agg = mask_use.groupby('state_code').agg(['mean'])
    df_agg.columns = ["_".join(x) for x in df_agg.columns.ravel()]
    df_agg.reset_index(inplace=True)
    df_agg.rename(columns={'wear_mask_prob_mean' : 'wear_mask_prob'}, inplace=True)
    df_agg = df_agg[['state_code', 'wear_mask_prob']]
    df_agg.drop(df_agg[df_agg['state_code'] == 'N/A'].index, inplace = True)
    df_agg.drop(df_agg[df_agg['state_code'] == 'DC'].index, inplace = True)
    df_agg['state'] = df_agg['state_code'].apply(lambda x: state_map_dict[x])
    df_agg = df_agg[['state', 'wear_mask_prob']]
    data_frames = [df, state_pop, state_area, df_agg]
    df_merged = reduce(lambda left, right: pd.merge(left,right,on=['state'],
                                                how='inner'), data_frames)
    
    df_merged['CFR'] = df_merged['deaths'] / df_merged['cases'] 
    df_merged['IR'] = df_merged['cases'] / df_merged['total']
    df_merged['PD'] = df_merged['total'] / df_merged['area']
    df_merged['WMP'] = df_merged['wear_mask_prob']
    df_ana = df_merged[['state', 'CFR', 'IR', 'PD', 'WMP']]
    df_ana[['CFR', 'IR', 'PD', 'WMP']] = np.round(df_ana[['CFR', 'IR', 'PD', 'WMP']], 3)
    df_corr = df_ana[['CFR', 'IR', 'PD', 'WMP']].corr()
    
    fig = go.Figure(data=go.Heatmap(z=df_corr, 
                                    x=['CFR', 'IR', 'PD', 'WMP'], 
                                    y=['CFR', 'IR', 'PD', 'WMP'],
                                    colorscale='Blues',
                                   hovertemplate=" Corr(%{x}, %{y}) = %{z:.2f}"),
                   )
    fig.update_layout(
        title='Correlation Matrix',
        height=600,
        width=600,
        )
    return fig

    
def architecture_summary():
    """
    Returns the text and image of architecture summary of the project.
    """
    return html.Div(children=[
        dcc.Markdown('''
            ## Project Architecture
            This project uses MongoDB as the database. All data acquired are stored in raw form to the
            database (with de-duplication). An abstract layer is built in `database.py` so all queries
            can be done via function call. For a more complicated app, the layer will also be
            responsible for schema consistency. A `plot.ly` & `dash` app is serving this web page
            through. Actions on responsive components on the page is redirected to `app.py` which will
            then update certain components on the page.  
        ''', className='row eleven columns', style={'paddingLeft': '5%'}),

        html.Div(children=[
            html.Img(src="https://docs.google.com/drawings/d/e/2PACX-1vQNerIIsLZU2zMdRhIl3ZZkDMIt7jhE_fjZ6ZxhnJ9bKe1emPcjI92lT5L7aZRYVhJgPZ7EURN0AqRh/pub?w=670&amp;h=457",
                     className='row'),
        ], className='row', style={'textAlign': 'center'}),

        dcc.Markdown('''
        
        ''')
    ], className='row')


def visualization_summary():
    """
    All EDA figures should be arranged in this function.
    """
    return html.Div(children=[
        dcc.Markdown('''
        ### US Case and Death Count
        ''', className='row eleven columns', style={'paddingLeft': '5%'}),
        
            # Time series curves for cumulative cases and deaths in US
            dcc.Markdown('''
            #### Time-series cumulative cases and deaths
            ''', className='row eleven columns', style={'paddingLeft': '5%'}),
        
            html.Div([
                html.Div([                    
                    html.Label( ['Label:'],
                        style={'font-weight': 'bold', 'float': 'left', 
                               'color': 'white', 'display': 'inline-block', 
                               },
                        ),
                    dcc.RadioItems(
                        id='target-label',
                        options=[{'label': i.title(), 'value': i} for i in ['cases', 'deaths']],
                        value='cases',
                        labelStyle={
                        'display': 'inline-block',
                        },
                        style={
                        'width': '20%',
                        'float': 'left',
                        'font-weight': 'bold',
                        'color': 'white',
                        }),],  style={'width': '98%', 'display': 'inline-block'}),
                dcc.Graph(id='time-series-total', style={'height': 500, 'width': 1100})
                ],
                style={'width': '98%', 'float': 'right', 'display': 'inline-block'}),

            # Time series curves for daily cases and deaths in US
                    dcc.Markdown('''
            #### Time-series daily reported cases and deaths
            ''', className='row eleven columns', style={'paddingLeft': '5%'}), 
            html.Div([
                html.Div([ 
                    html.Label( ['Label:'],
                        style={'font-weight': 'bold', 'float': 'left', 
                               'color': 'white', 'display': 'inline-block', 
                               },
                        ),
                    dcc.RadioItems(
                        id='daily-label',
                        options=[{'label': i.title(), 'value': i} for i in ['cases', 'deaths']],
                        value='cases',
                        labelStyle={
                        'display': 'inline-block',
                        },
                        style={
                        'width': '20%',
                        'float': 'left',
                        'font-weight': 'bold',
                        'color': 'white',
                        }),],  style={'width': '98%', 'display': 'inline-block'}),
                dcc.Graph(id='time-series-daily', style={'height': 500, 'width': 1100})
            ],
                style={'width': '98%', 'float': 'right', 'display': 'inline-block'}),
        
        dcc.Markdown('''
        ### Case and Death Count by State
        ''', className='row eleven columns', style={'paddingLeft': '0%'}),
        
             # Time series curves for cases and deaths by state
            dcc.Markdown('''
            #### Time-series cases and deaths by state
            ''', className='row eleven columns', style={'paddingLeft': '5%'}), 
        
            html.Div([
                html.Div([
                    html.Label( ['Label:'],
                        style={'font-weight': 'bold', 'float': 'left', 
                               'color': 'white', 'display': 'inline-block', 
                               },
                        ),
                    dcc.RadioItems(
                        id='label-by-state',
                        options=[{'label': i.title(), 'value': i} for i in ['cases', 'deaths']],
                        value='cases',
                        labelStyle={
                        'display': 'inline-block',
                        },
                        style={
                        'width': '20%',
                        'float': 'left',
                        'font-weight': 'bold',
                        'color': 'white',
                        }),
                    html.Label( ['Plot type:'],
                        style={'font-weight': 'bold', 'float': 'left', 
                               'color': 'white', 'display': 'inline-block', 
                               },
                        ),
                    dcc.RadioItems(
                        id='plot-type',
                        options=[{'label': i.title(), 'value': i} for i in ['cumulative', 'daily']],
                        value='daily',
                        labelStyle={
                        'display': 'inline-block',
                        },
                        style={
                        'width': '20%',
                        'float': 'left',
                        'font-weight': 'bold',
                        'color': 'white',
                        }),
                    html.Label( ['State:'],
                        style={'font-weight': 'bold', 'float': 'left', 
                               'color': 'white', 'display': 'inline-block', 
                               'margin-right': '10px'
                               },
                        ),
                    dcc.Dropdown(
                        id='state-name',
                        options=[{'label': i, 'value': i} for i in list(all_states)],
                        value='Rhode Island',
                        style={'width': '40%', 'float':'left', 'display': 'inline-block'}
                    ),],  style={'width': '98%', 'display': 'inline-block'}),
                dcc.Graph(id='time-series-state', style={'height': 500, 'width': 1100})
                ],
                style={'width': '98%', 'float': 'right', 'display': 'inline-block'}),
           
            # Heat map by month
            dcc.Markdown('''
            #### Heat Map - Covid in US states
            We use the state-level COVID-19 data to power the heat map and track the outbreak
            over all states of US. 
            ''', className='row eleven columns', style={'paddingLeft': '0%'}),
        
            html.Div([
                html.Div([ 
                    html.Label( ['Label:'],
                        style={'font-weight': 'bold', 'float': 'left', 
                               'color': 'white', 'display': 'inline-block', 
                               },
                        ),
                    dcc.RadioItems(
                        id='label-radioitems',
                        options=[{'label': i.title(), 'value': i} for i in ['cases', 'deaths']],
                        value='cases',
                        labelStyle={
                        'display': 'inline-block',
                        },
                        style={
                        'width': '20%',
                        'float': 'left',
                        'font-weight': 'bold',
                        'color': 'white',
                        }),],  style={'width': '98%', 'display': 'inline-block'}),
                dcc.Graph(id='heat-map-by-state', style={'height': 800, 'width': 1000})
            ],
                style={'width': '100%', 'float':'right', 'display': 'inline-block'}),        
     
    ])

def enhancement_summary():
    """
    All Enhancement details should be arranged here.
    """
    return html.Div(children=[
         dcc.Markdown('''
          ### Who is Wearing Masks in US Counties
         ''', className='row eleven columns', style={'paddingLeft': '6%'}),
         dcc.Graph(id='mask-use-by-county', figure=heat_map_mask_use(), 
                   style={'height': 800, 'width': 1000, 'display': 'inline-block'}),
         
         dcc.Markdown('''
          ### Whether Population Density and Propensity of Wearing Masks Affect the Spread?
         ''', className='row eleven columns', style={'paddingLeft': '0%'}),        
        html.Div([
             html.Label( ['CFR: Case Fatality Rate'],
                        style={'font-weight': 'bold', 'float': 'left',
                               'color': 'white', 'display': 'inline-block', 
                               },
                        ),
             html.Label( ['IR: Infeaction Rate',],
                        style={'font-weight': 'bold', 'float': 'left','margin-left': '100px',
                               'color': 'white', 'display': 'inline-block', 
                               },
                        ),
             html.Label( ['PD: Population Density',],
                        style={'font-weight': 'bold', 'float': 'left', 'margin-left': '100px',
                               'color': 'white', 'display': 'inline-block', 
                               },
                        ),
             html.Label( ['WMP: Wear Mask Probability'],
                style={'font-weight': 'bold', 'float': 'left','margin-left': '100px',
                       'color': 'white', 'display': 'inline-block', 
                       },
                ),
             dcc.Graph(id='scatter-matrix', figure=scatter_matrix(), 
                       style={'width': '48%',  'display': 'inline-block'}),
             dcc.Graph(id='correlation-matrix', figure=correlation_matrix(),
                       style={'width': '48%', 'float':'right', 'display': 'inline-block'}),
         ], style={'width': '100%',  'display': 'inline-block'}),
    ]
                   )

# Sequentially add page components to the app's layout
def dynamic_layout():
    return html.Div([
        page_header(),
        html.Hr(),
        project_description(),
        visualization_description(),
        visualization_summary(),
        enhancement_description(),
        enhancement_summary(),
        architecture_summary(),
    ], className='row', id='content')

# set layout to a function which updates upon reloading
app.layout = dynamic_layout

# if __name__ == '__main__':
#     app.run_server(debug=True, port=1050, host='0.0.0.0')

2020-12-06 21:50:22,314 [fetch_all_db]: 320 documents read from the db
2020-12-06 21:50:22,431 [fetch_all_db]: 15304 documents read from the db
2020-12-06 21:50:22,440 [fetch_all_db]: 3142 documents read from the db
2020-12-06 21:50:22,441 [fetch_all_db]: 55 documents read from the db
2020-12-06 21:50:22,447 [fetch_all_db]: 1877 documents read from the db
2020-12-06 21:50:22,453 [fetch_all_db]: 1933 documents read from the db
2020-12-06 21:50:22,455 [fetch_all_db]: 55 documents read from the db


In [43]:
if __name__ == '__main__':
    viewer.show(app)

Dash is running on http://localhost:42619/proxy/42619/

Dash is running on http://localhost:42619/proxy/42619/

Dash is running on http://localhost:42619/proxy/42619/

Dash is running on http://localhost:42619/proxy/42619/

Dash is running on http://localhost:42619/proxy/42619/

Dash is running on http://localhost:42619/proxy/42619/

Dash is running on http://localhost:42619/proxy/42619/

Dash is running on http://localhost:42619/proxy/42619/

Dash is running on http://localhost:42619/proxy/42619/

Dash is running on http://localhost:42619/proxy/42619/

Dash is running on http://localhost:42619/proxy/42619/

Dash is running on http://localhost:42619/proxy/42619/

Dash is running on http://localhost:42619/proxy/42619/

Dash is running on http://localhost:42619/proxy/42619/

Dash is running on http://localhost:42619/proxy/42619/

Dash is running on http://localhost:42619/proxy/42619/

Dash is running on http://localhost:42619/proxy/42619/

Dash is running on http://localhost:42619/proxy/

In [None]:
# @app.callback(Output('heat-map-by-county', 'figure'),
#               Input('target-for-county', 'value'))
# def heat_map_county(label):
#     """Create the heap map of given label in US at the beginning of given month"""
#     df = df_dict['covid-us-county']
#     df['month'] = df.date.dt.month_name()
#     df_month = df[((df.date.dt.day == 1) | (df.date == max(df.date)))]
# #     df_month = df[((df.date.dt.day == 1) | (df.date == max(df.date)))]
#     df_month['fips'] =  df_month['fips'].apply(lambda x: str(x).zfill(5))
#     fig = px.choropleth(df_month, 
#                     locations='fips',
#                     geojson=county_json,
#                     scope="usa",
#                     color=label, # a column in the dataset
#                     hover_name='state', # column to add to hover information
#                     hover_data = {'cases': ':.0f', 'deaths': ':.0f', 'month': False,
#                                  'county': True},
#                     color_continuous_scale=px.colors.sequential.Sunsetdark if \
#                         label == 'cases' else px.colors.sequential.Greys,
#                     animation_group='county',
#                     animation_frame='month'
#                    )
#     fig.update_layout(title_text=f"Heat Map - Total {label.title()} in US Counties"),
#     fig.update_layout(margin={"r":0,"l":0,"b":0})
#     fig.update_layout(transition_duration=500)
    
#     last_frame_num = len(fig.frames) -1

#     fig.layout['sliders'][0]['active'] = last_frame_num

#     fig = go.Figure(data=fig['frames'][-1]['data'], frames=fig['frames'], layout=fig.layout)
#     fig.update_coloraxes(colorbar_title=f"<b>Color</b><br>Confirmed {label.title()}")
#     fig.layout.pop('updatemenus')
#     return fig


        
#         dcc.Markdown('''
#         ### Case and Death Count by County
#         ''', className='row eleven columns', style={'paddingLeft': '0%'}),
#             # Heat map by month　for county-level data
#             dcc.Markdown('''
#             #### Heat Map - Covid in US counties
#             ''', className='row eleven columns', style={'paddingLeft': '0%'}),
        
#             html.Div([
#                 html.Div([ 
#                     html.Label( ['Label:'],
#                         style={'font-weight': 'bold', 'float': 'left', 
#                                'color': 'white', 'display': 'inline-block', 
#                                },
#                         ),
#                     dcc.RadioItems(
#                         id='target-for-county',
#                         options=[{'label': i.title(), 'value': i} for i in ['cases', 'deaths']],
#                         value='cases',
#                         labelStyle={
#                         'display': 'inline-block',
#                         },
#                         style={
#                         'width': '20%',
#                         'float': 'left',
#                         'font-weight': 'bold',
#                         'color': 'white',
#                         }),],  style={'width': '98%', 'display': 'inline-block'}),
#                 dcc.Graph(id='heat-map-by-county', style={'height': 800, 'width': 1000})
#             ],
#                 style={'width': '100%', 'float':'right', 'display': 'inline-block'}),

In [3]:
# Build AppViewer
from jupyterlab_dash import AppViewer
viewer = AppViewer()

import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output


# Definitions of constants. This projects uses extra CSS stylesheet at `./assets/style.css`
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css', '/assets/style.css']

# Define the dash app first
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)


# Define component functions
def page_header():
    """
    Returns the page header as a dash `html.Div`
    """
    return html.Div(id='header', children=[
        html.Div([html.H3('DATA1050 Final Project')],
                 className="ten columns"),
        html.A([html.Img(id='logo', src=app.get_asset_url('github.png'),
                         style={'height': '35px', 'paddingTop': '7%'}),
                html.Span('Old boy', style={'fontSize': '2rem', 'height': '35px', 'bottom': 0,
                                                'paddingLeft': '4px', 'color': '#a3a7b0',
                                                'textDecoration': 'none'})],
               className="two columns row",
               href='https://github.com/cengc13/data1050-final-project'),
    ], className="row")


def architecture_summary():
    """
    Returns the text and image of architecture summary of the project.
    """
    return html.Div(children=[
        dcc.Markdown('''
            ## Project Architecture
            This project uses MongoDB as the database. All data acquired are stored in raw form to the
            database (with de-duplication). An abstract layer is built in `database.py` so all queries
            can be done via function call. For a more complicated app, the layer will also be
            responsible for schema consistency. A `plot.ly` & `dash` app is serving this web page
            through. Actions on responsive components on the page is redirected to `app.py` which will
            then update certain components on the page.
        ''', className='row eleven columns', style={'paddingLeft': '5%'}),

        html.Div(children=[
            html.Img(src="https://docs.google.com/drawings/d/e/2PACX-1vQNerIIsLZU2zMdRhIl3ZZkDMIt7jhE_fjZ6ZxhnJ9bKe1emPcjI92lT5L7aZRYVhJgPZ7EURN0AqRh/pub?w=670&amp;h=457",
                     className='row'),
        ], className='row', style={'textAlign': 'center'}),

        dcc.Markdown('''

        ''')
    ], className='row')


def project_about():
    """
    Returns the outline of the project
    """
    return html.Div(children=[dcc.Markdown('''
        ## About

        * Names of all team members: Cheng Zeng, Tianqi Tang, Zhi Wang

        * Project & Executive Summary

            * We will create a live data-science web application named “Covid-19 tracker”.
            It uses covid-19 data from the New York Times to understand and project the
            spread of the outbreak in the United States at hierarchical granularity,
            ranging from national to county level. It will allow users to interactively
            view the covid cases and death at different levels.

            * This final project uses gitpod as the platform, an online IDE for github
            repo for data collection, clean-up, transformation and visualization.
            The data will be stored in Mongodb, through the adaptor of a python module named “pymongo”.
            The EDA, visualization and enhancement will be  in jupyter notebooks.
            The interactive web application will be realized using plotly and Dash.
            It will mainly comprise three sections, namely Introduction, EDA & Visualization and Enhancement.
            The enhancement section is aimed at figure out whether two factors of interest might affect the transmission
            in states, and also a simple regression model is constructed to  project the trend of the pandemic in US.

            * At the end of this project, we hope to build up a web application which tracks the up-to-date Covid-19 situation
             at various geographical levels. Meanwhile it aims to provide some insights on if restrictions, such as wearing masks,
             can help to contain the pandemic.

        * Datasets used:

            * the covid national-level and state-level datasets  are from The New York Times,
         based on reports from state and local health agencies. They contain a series of data files with cumulative counts of
         coronavirus cases in the United States, at the national and state level, over time. They are regularly updated every day.
         The national level data is about 7 KB. The state level data is 463 KB. The covid datasets lives on the github repo by the New York Times.
         These are raw texts that can be scraped in a straightforward way using the “request” python module.
         The raw data will be updated every day. So the incremental updates using the web scraping method will be done automatically.

            * the static survey data regarding the propensity to wearing masks (109 KB), state-level population data (1 KB),
            and state area data (884 B) are used to understand how the role of wearing face coverings and population density
            in the course of the pandemic.

        * Summary of performance with respect to the baseline model(s)

            * We figure out that there is a strong correlation between infection rate and population density in US states.

            * There exists a high negative correlation between the propensity to wear masks and the case fatality rate in states.

            * A simple regression model is built to predict the trend of outbreak in US. It behaves much better than a simple baseline model
            predicting using the average number of past week.

        * Possible next steps

            * Since the county-level data contains mountains of items, it is not shown in the web application. In view of this, next we can
            move on to some other cloud platforms for data storage/update/fetch, and web engine, for example GCP and AWS.

            * We could explore more features to improve the model performance (accuracy and training efficiency)

        * References to related work

            * A detailed map of who is wearing masks in the U.S. from
            [NYTimes](https://www.nytimes.com/interactive/2020/07/17/upshot/coronavirus-face-mask-map.html).
            This website uses the mask-use-by-counties data to show patterns of wearing masks by county.

            * Covid in the U.S.: Latest Map and Case Count from [NYTimes](https://www.nytimes.com/interactive/2020/us/coronavirus-us-cases.html).
            Combining demographic and population data, it shows the map for positive and death rates at state and county level.

            * CDC calls on Americans to wear masks to prevent COVID-19 spread from
            [CDC](https://www.cdc.gov/media/releases/2020/p0714-americans-to-wear-masks.html). It highlights the importance of
            wearing face coverings in slowing the spread of covid.


        ''', className='eleven columns', style={'paddingLeft': '5%'})], className="row")


def additional_project_details():
    """
    Returns the additional project details.
    """
    return html.Div(children=[dcc.Markdown('''
        ## Additional details

        * Development Process and Final Technology Stach

            Please find the project architecture for details.

        * Data Acquisition, Caching, ETL Processing, Database Design

            * Data Acquisition: the covid datasets and mask-use dataset are scraped from NYTime github repo. The population and area data for states come
            from wikipedia.

            * Caching: the datasets are cached with the aid of `expiringdict` module. No more than 10 elements can be in the caching and if the length exceeds
            the limit, the oldest item will be removed.

            * ETL Processing: datasets are upserted in to local MongoDB databases through the python 'adapter' `pymongo` module. Then the dataset can be
            readily loaded.

            * Database Design: all datasets in this project are saved in a MongoDB database named 'covid-us'. Then each dataset, corresponding to
            a collection in MongoDB, is updated. In this way the database in this project looks like a two-level tree structure.


        * Link to the 'ETL_EDA.ipynb' notebook: [ETL_EDA](https://github.com/cengc13/data1050-final-project/blob/main/ETL_EDA.ipynb)

        * Link to the 'Enhancement.ipynb' notebook: [Enhancement-1](https://github.com/cengc13/data1050-final-project/blob/main/Enhancement.ipynb)
        and [Enhancement-2](https://github.com/cengc13/data1050-final-project/blob/main/Enhancement_Tianqi.ipynb).Note that figures plotted with
        `plotly` cannot be shown in jupyter notebooks on github.


        ''', className='eleven columns', style={'paddingLeft': '5%'})], className="row")

# Sequentially add page components to the app's layout
def dynamic_layout():
    return html.Div([
        page_header(),
        html.Hr(),
        project_about(),
        additional_project_details(),
        architecture_summary(),
    ], className='row', id='content')

# set layout to a function which updates upon reloading
app.layout = dynamic_layout

if __name__ == '__main__':
#     app.run_server(debug=True, port=8080, host='0.0.0.0')
    viewer.show(app)

Dash is running on http://localhost:43833/proxy/43833/

Dash is running on http://localhost:43833/proxy/43833/

Dash is running on http://localhost:43833/proxy/43833/

