In [101]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go

In [102]:
state_mandate = pd.read_csv(
    "https://healthdata.gov/node/3281076/download",
    index_col=False)
state_count = pd.read_csv(
    "https://data.cdc.gov/api/views/9mfq-cb36/rows.csv?accessType=DOWNLOAD",
    index_col=False)
state_population = pd.read_csv(
    "../data/states_population.csv",
    index_col=False),
    index_col=False)

In [105]:
def count_processing(case_df, pop_df):
    '''
    Function used to process the case count dataset downloaded from github.

    Argument:
    case_count_df -- The original dataset

    Return:
    case_count_monthly --
    The result dataframe has the monthly case count and death count
    for each state per hundred thousand of state population size
    '''
    try:
        case_df = case_df[['state', 'submission_date', 'new_case',
                          'new_death']]
    except KeyError:
        print("The dataset is from different data source")
    case_count_df = pd.merge(case_df, pop_df,
                             left_on="state",
                             right_on="state_id")
    case_count_df['new_case_norm'] = case_count_df['new_case'] / \
        case_count_df['population'] * 100000
    case_count_df['new_death_norm'] = case_count_df['new_death'] / \
        case_count_df['population'] * 100000
    case_count_df = case_count_df.rename(columns={'submission_date': 'date',
                                                  'state_x': 'state'})
    case_count_df['date'] = pd.to_datetime(case_count_df['date'])
    case_count_df['month'] = pd.to_datetime(case_count_df['date']).dt.month
    case_count_monthly = case_count_df.groupby(['state',
                                                'month']).sum().reset_index()
    return case_count_df, case_count_monthly

In [106]:
def mandate_processing(mandate_df):
    '''
    Function used to process the mandate dataset downloaded from healthdata.gov

    Argument:
    mandate_df -- The original mandate dataset

    Return:
    mandate_df -- Five most common mandates announced throughout the states,
    and drop the duplicate rows in the dataframe.
    '''
    if 'state_id' not in mandate_df.keys():
        raise KeyError("The input dataframe is not from the same source")

    policy_mandates = ["Shelter in Place",
                       "Food and Drink",
                       "Non-Essential Businesses",
                       "Outdoor and Recreation",
                       "Mandate Face Mask Use By All Individuals In Public"
                       " Facing Businesses",
                       "Mask Requirement",
                       "Mandate Face Mask Use By All Individuals In Public"
                       " Spaces"]

    mandate_columns = mandate_df[["state_id",
                                  "policy_level",
                                  "date",
                                  "policy_type",
                                  "start_stop"]]

    mandate_rows = mandate_columns[(mandate_columns["start_stop"] == "start")
                                   & (mandate_columns["policy_level"] ==
                                   "state")
                                   & (mandate_columns["policy_type"].
                                   isin(policy_mandates))]

    mandate_rows.loc[mandate_rows["policy_type"]
                     .str.contains("Mask"), "policy_type"] = "Mask Wearing"

    mandate_rows = mandate_rows.drop(['policy_level', 'start_stop'], axis=1)

    mandate_rows = mandate_rows.rename(columns={'state_id': 'state'})
    mandate_rows.drop_duplicates(inplace=True)
    return mandate_rows

In [107]:
state_mandate = mandate_processing(state_mandate)
state_count, state_count_monthly =\
    count_processing(state_count, state_population)




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [108]:
### Choropleth

fig = px.choropleth(state_count_monthly_oct,
                        color='new_case',
                        locations='state',
                        locationmode="USA-states",
                        scope="usa",
                        range_color=[0, 200000])
fig

In [91]:
state_count_cp = state_count.copy()
state_count_cp_1 = state_count_cp[state_count_cp["state"] == 'WA']
state_count_cp_2 = state_count_cp[state_count_cp["state"] == 'OR']

state_mandate_cp = state_mandate.copy()
state_mandate_cp_1 = state_mandate_cp[state_mandate_cp["state"] == 'WA']
state_mandate_cp_2 = state_mandate_cp[state_mandate_cp["state"] == 'OR']


In [93]:
### Line graph for one state
fig1 = go.Figure()
fig1.add_traces(go.Scatter(
    x=state_count_cp_1['date'],
    y=state_count_cp_1['new_case'],
    mode='lines',
    name=" count"))
fig1.update_layout(
    title='Washington',
    yaxis_title="count(per 100,000 people)",
    xaxis_title="date"
)
color = ['black', 'brown', 'aqua', 'green', 'grey', 'yellowgreen', 'red']
sm = state_mandate_cp_1['policy_type'].tolist()
count = state_count_cp_1['new_case'].tolist()
count.sort()
i = 0
for date, mandate in\
        zip(state_mandate_cp_1['date'], state_mandate_cp_1['policy_type']):
    fig1.add_traces(go.Scatter(
        x=[date, date],
        y=[0, count[-1]],
        line={
            'color': color[i],
            'width': 1,
            'dash': 'dashdot',
        },
        name=mandate
        ))
    i += 1
fig1

In [100]:
### Line graph for the second state

fig2 = go.Figure()
fig2.add_traces(go.Scatter(
    x=state_count_cp_2['date'],
    y=state_count_cp_2['new_case'],
    mode='lines',
    name=" count"))
fig2.update_layout(
    title='Oregon',
    yaxis_title="count(per 100,000 people)",
    xaxis_title="date"
)
color = ['black', 'brown', 'aqua', 'green', 'grey', 'yellowgreen', 'red']
sm = state_mandate_cp_2['policy_type'].tolist()
count = state_count_cp_2['new_case'].tolist()
count.sort()
i = 0
for date, mandate in\
        zip(state_mandate_cp_2['date'], state_mandate_cp_2['policy_type']):
    fig2.add_traces(go.Scatter(
        x=[date, date],
        y=[0, count[-1]],
        line={
            'color': color[i],
            'width': 1,
            'dash': 'dashdot',
        },
        name=mandate
        ))
    i += 1
fig2