### Situation Indicators for Covid-19 response

In [None]:
import requests
import pandas as pd
import time

import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
from jupyter_dash import JupyterDash
import plotly.express as px

#### Data Wrangling

In [None]:
# per state time-series json API
url = "https://api.covid19india.org/v4/min/timeseries.min.json"
response_ts = requests.get(url)

In [None]:
# read json and normalize
start_time = time.time()
wide_ts_df = pd.json_normalize(response_ts.json())
total_sec = time.time() - start_time
print(f"{round(total_sec,1)} secs execution")

# transform to long format
long_ts_df = wide_ts_df.columns.str.split(".", expand=True).droplevel(1).to_frame(
    index=False, name=["state", "time_period", "obs_type", "obs_cat"]
)
long_ts_df["val"] = wide_ts_df.values[0]

In [None]:
# target states and districts current day data and metadata
url = "https://api.covid19india.org/v4/min/data.min.json"
response_data = requests.get(url)

In [None]:
# filter state metadata and districts out from json data
json_st = {
    key_1: {
        key_2: response_data.json()[key_1][key_2]
        for key_2 in response_data.json()[key_1] if key_2 not in ['districts', 'meta']
    } for key_1 in response_data.json()
}

# read json_st and normalize
wide_st_df = pd.json_normalize(json_st)
# build long format from column names structure (renames as desired)
long_st_df = wide_st_df.columns.str.split(".", expand=True).to_frame(
    index=False, name=["state", "obs_type", "obs_cat"]
)

In [None]:
# filter states delta confirmed for the previous week --> delta14_7
def conf_st_deltaX_Y(st_ts_df, x=14, y=7):
    '''
    :param st_ts_df: state data timeseries Covid19 India API
    :param x: lower limit number of days (integer)
    :param y: upper limit number of days (integer)
    :return: dataframe to append (current day state data structure)
    '''
    # latest reported date assumed equal to all states/obs_types/obs_cat
    last_date = pd.to_datetime(st_ts_df.time_period).max()
    # filter range of days
    cut_date_0 = (last_date - pd.to_timedelta(x, unit='d')).strftime('%Y-%m-%d')
    cut_date_1 = (last_date - pd.to_timedelta(y, unit='d')).strftime('%Y-%m-%d')
    # obs_cat is confirmed
    obs_cat = 'confirmed'
    # query state timeseries (delta confirmed in range of days)
    query = "obs_type == 'delta' & obs_cat == @obs_cat  & time_period > @cut_date_0 & time_period <= @cut_date_1"
    # deltaX_Y calculated
    deltaX_Y_calc = st_ts_df.query(query).groupby('state').agg({'val': 'sum'}).reset_index()
    # obs_type is deltaX_Y
    obs_type = f"delta{x}_{y}"
    # fill cols obs_cat, obs_type with constants (match current day state data structure)
    deltaX_Y_calc['obs_cat'] = obs_cat
    deltaX_Y_calc['obs_type'] = obs_type
    return deltaX_Y_calc

In [None]:
# pandas concat works with different column order (keeps first)
long_st_df = pd.concat([long_st_df, conf_st_deltaX_Y(long_ts_df)], ignore_index=True)

In [None]:
url = "https://api.covid19india.org/v4/min/data-all.min.json"
response_all = requests.get(url)

In [None]:
# our aim here --> districts delta confirmed if present for the previous week --> delta14_7
def conf_ds_deltaX_Y(json_resp, x=14, y=7):
    '''
    Json normalize from json_resp is time-processing unfeasible
    Thus, extract only 'delta confirmed' for all districts previous week
    :param json_resp: json response from 'data-all' Covid19 India API
    :param x: lower limit number of days (integer)
    :param y: upper limit number of days (integer)
    :return: truncated json, all districts data restricted to: prev. week delta confirmed
    '''
    # reported days series
    dates = pd.Series(list(json_resp.keys()))
    # latest reported date assumed equal to all districts
    last_date = pd.to_datetime(dates).max()
    # filter range of days
    cut_date_0 = (last_date - pd.to_timedelta(x, unit='d')).strftime('%Y-%m-%d')
    cut_date_1 = (last_date - pd.to_timedelta(y, unit='d')).strftime('%Y-%m-%d')
    filter_x_y = (dates > cut_date_0) & (dates <= cut_date_1)
    # loop through range of days and return data in nested dictionary
    trunc_json = {}
    for day in dates[filter_x_y]:
        trunc_json[day] = {}
        for st in json_resp[day]:
            if 'districts' in json_resp[day][st]:
                trunc_json[day][st] = {}
                for ds in json_resp[day][st]['districts']:
                    if 'delta' in json_resp[day][st]['districts'][ds]:
                        if 'confirmed' in json_resp[day][st]['districts'][ds]['delta']:
                            trunc_json[day][st][ds] = json_resp[day][st]['districts'][ds]['delta']['confirmed']
    return trunc_json

In [None]:
start_time = time.time()
trunc_json = conf_ds_deltaX_Y(response_all.json())
total_sec = time.time() - start_time
print(f"{round(total_sec,1)} secs execution")
# normalize truncated json with range of days
wide_ds_range_df = pd.json_normalize(trunc_json, sep='//')

# build long format from column names (renames as desired)
long_ds_range_df = wide_ds_range_df.columns.str.split("//", expand=True).to_frame(
    index=False, name=["time_period", "state", "district"]
)
# add delta confirmed values from series
long_ds_range_df["val"] = wide_ds_range_df.values[0]

In [None]:
# filter state metadata from json data
json_meta_st = {
    key_1: {
        key_2: response_data.json()[key_1][key_2]
        for key_2 in response_data.json()[key_1] if key_2 == 'meta'
    } for key_1 in response_data.json()
}
# filter district data and metadata from json data
# TODO: filter out current day data
json_ds = {
    key_1: {
        key_2: response_data.json()[key_1][key_2]
        for key_2 in response_data.json()[key_1] if key_2 == 'districts'
    } for key_1 in response_data.json()
}

In [None]:
# read json_meta_st and normalize
wide_meta_st_df = pd.json_normalize(json_meta_st, max_level=2)
# build temporary long format from column names
long_meta_st_df = wide_meta_st_df.columns.str.split(".", expand=True).droplevel(1).to_frame(
    index=False, name=["state", "column"]
)
long_meta_st_df["val"] = wide_meta_st_df.values[0]
# pivot temporary long into state metadata table
meta_st_df = long_meta_st_df.pivot(index='state', columns='column', values='val').reset_index()
# delete index name `column` from pivot
meta_st_df.rename_axis(None, axis=1, inplace=True)

# un nest state metadata tested column
tested_df = meta_st_df.tested.apply(pd.Series).rename(
    columns={"date": "test_date", "source": "test_source"}
)
# concat back to metadata
meta_st_df = pd.concat([meta_st_df, tested_df], axis = 1).drop('tested', axis = 1)

# un nest state metadata vaccinated column if present
if 'vaccinated' in meta_st_df.columns:
    vac_df = meta_st_df.vaccinated.apply(pd.Series).rename(
        columns={"date": "vaccinated_date", "source": "vaccinated_source"}
    )
    # concat back to metadata
    meta_st_df = pd.concat([meta_st_df, vac_df], axis = 1).drop('vaccinated', axis = 1)

In [None]:
# read json_ds and normalize - use custom separator: district names have points!
wide_ds_df = pd.json_normalize(json_ds, max_level=4, sep='//')
# build long format from column names
long_ds_df = wide_ds_df.columns.str.split("//", expand=True).droplevel(1).to_frame(
    index=False, name=["state", "district", "obs_type", "obs_cat"]
)
# add values from series
long_ds_df["val"] = wide_ds_df.values[0]
# filter metadata in temporary long format
filter_meta = long_ds_df.obs_type == 'meta'
long_meta_ds_df = long_ds_df[filter_meta]

# district data in long format (drop metadata)
long_data_ds_df = long_ds_df.drop(long_meta_ds_df.index)
# use data-all range dataframe to compute delta14_7
ds_delta_14_7 = long_ds_range_df.groupby(['state', 'district']).agg({'val': 'sum'}).reset_index()
# fill cols obs_cat, obs_type with constants (match current day district data structure)
ds_delta_14_7['obs_cat'] = 'confirmed'
ds_delta_14_7['obs_type'] = 'delta14_7'
# pandas concat works with different column order (keeps first)
long_data_ds_df = pd.concat([long_data_ds_df, ds_delta_14_7], ignore_index=True)

# pivot temporary long into district metadata table
meta_ds_df = long_meta_ds_df.drop(columns='obs_type').set_index(
    ['state', 'district', 'obs_cat']
).unstack(level=-1).reset_index(col_level=1).droplevel(level=0, axis=1).rename_axis(None, axis=1)

# un nest district tested column
ds_tested_df = meta_ds_df.tested.apply(pd.Series).drop(0, axis = 1).rename(
    columns={"date": "test_date", "source": "test_source"}
)
# concat back to metadata
meta_ds_df = pd.concat([meta_ds_df, ds_tested_df], axis = 1).drop('tested', axis = 1)

# un nest district vaccinated column if present
if 'vaccinated' in meta_ds_df.columns:
    ds_vac_df = meta_ds_df.vaccinated.apply(pd.Series).drop(0, axis = 1).rename(
        columns={"date": "vaccinated_date"}
    )
    # concat back to metadata
    meta_ds_df = pd.concat([meta_ds_df, ds_vac_df], axis = 1).drop('vaccinated', axis = 1)

#### Current week data vis
Based on these situation analysis indicators:

|  |  |
| --- | --- |
| <img src="https://drive.google.com/uc?export=view&id=1X1hVR5y00vprU1jFT20nSP3Jc41jVsWY" width="200"> | <img src="https://drive.google.com/uc?export=view&id=1saMjeevjiVlv_Dq7BNRNUgKdOApjwFeS" width="200"> |
| <img src="https://drive.google.com/uc?export=view&id=10frXzVNHFAFNW1GrErj3QwKZRZGPZl9A" width="200"> | <img src="https://drive.google.com/uc?export=view&id=1AdDqL3kVyjaepYR8N9t6Q5Y2iwaXCkYK" width="200"> |
| <img src="https://drive.google.com/uc?export=view&id=1dg3qZfbjQFuxCyLsmTS6iLLD2BKT5yL2" width="200"> |  |

In [None]:
# detect proxy configuration for JupyterHub or Binder
JupyterDash.infer_jupyter_proxy_config()

In [None]:
# dropdowns: state/district, situation indicators
geo_level = ['State', 'District']
dd_level = dcc.Dropdown(
    id="my_level",
    options=[
        {"label": value, "value": key}
        for key, value in zip(geo_level, geo_level)
    ],
    value='State'
)
sit_ind = [
    'Case Incidence',
    'Percent change in cases',
    'Test Positivity Rate (TPR)',
    'Case Fatality Ratio (CFR)',
]
dd_ind = dcc.Dropdown(
    id="my_ind",
    options=[
        {"label": value, "value": key}
        for key, value in zip(sit_ind, sit_ind)
    ],
    value='Case Incidence'
)

In [None]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
# Build App: current day
app_c = JupyterDash(__name__, external_stylesheets=external_stylesheets)
# App Layout
app_c.layout = html.Div([
    html.H2("Situation Analysis Framework"),
    html.H6("Switch State/District and select Indicator"),
    html.Div([
        html.Div(
            ["Switch:", dd_level],
            style={'width': '30%', 'display': 'inline-block'},
        ),
        html.Div(
            ["Situation Indicator:", dd_ind],
            style={'width': '65%', 'display': 'inline-block'},
        ),
    ]),
    html.Br(),
    dcc.Graph(id='bar-plot')
])

In [None]:
# Define callback to update graph
@app_c.callback(
    Output("bar-plot", "figure"),
    Input("my_level", "value"),
    Input("my_ind", "value"),
)
def plot_indicator(geo_lev, indicator):
    # don't return plot if any missing values
    if any([not geo_lev, not indicator]):
        return {}
    else:
        # data/metadata level
        data = long_st_df if geo_lev == 'State' else long_data_ds_df
        meta = meta_st_df if geo_lev == 'State' else meta_ds_df
        # left join data/meta
        key_join = "state" if geo_lev == 'State' else ["state", "district"]
        data_meta_df = data.merge(meta, on=key_join, how="left", sort=False)
        query = "obs_cat == 'confirmed'"
        df = data_meta_df.query(query).set_index(key_join)
        obs_d07 = df.obs_type == 'delta7'
        query_t = "obs_cat == 'tested'"
        df_t = data_meta_df.query(query_t).set_index(key_join)
        obs_t_d07 = df_t.obs_type == 'delta7'
        query_d = "obs_cat == 'deceased'"
        df_d = data_meta_df.query(query_d).set_index(key_join)
        obs_d_d07 = df_d.obs_type == 'delta7'
        
        if "change" in indicator:
            obs_d14 = df.obs_type == 'delta14_7'
            # assumes no delta zeros or instead Inf will result
            ind_calc = (df.val[obs_d07] - df.val[obs_d14]) / df.val[obs_d07] * 100
        elif "Incidence" in indicator:
            # newly confirmed per million population (per week --> delta7)
            ind_calc = df.val[obs_d07] * 1e6 / df.population[obs_d07]
        elif "Fatality" in indicator:
            # total deaths over total confirmed
            # assumes no delta zeros or instead Inf will result
            ind_calc = df_d.val[obs_d_d07] / df.val[obs_d07] * 100
        else:
            # test positivity rate (per week --> delta7)
            # assumes no delta zeros or instead Inf will result
            ind_calc = df.val[obs_d07] / df_t.val[obs_t_d07] * 100
        
        fig = px.bar(
                ind_calc.reset_index().rename(columns={0: "val"}),
                x=geo_lev.lower(),
                y="val",
            ).update_layout(xaxis={'categoryorder':'total descending'})
        return fig

In [None]:
# Run app and display result inline in the notebook
app_c.run_server(mode='external')