### [Source covid19](https://www.covid19india.org/)
https://api.covid19india.org/

Recommended approach from doc: json parsing of V4 endopoints
		
| Status | Link to API | Description |
| --- | --- | --- |
| <img src=https://github.githubassets.com/images/icons/emoji/unicode/1f49a.png width="20"> | https://api.covid19india.org/v4/min/timeseries.min.json | Daily numbers across C,R,D and Tested per state (historical data) |
| <img src=https://github.githubassets.com/images/icons/emoji/unicode/1f49a.png width="20"> | https://api.covid19india.org/v4/min/data.min.json | Current day numbers across districts and states |
| <img src=https://github.githubassets.com/images/icons/emoji/unicode/1f49a.png width="20"> | https://api.covid19india.org/v4/min/data-all.min.json | Per day numbers across districts and states - consider using timeseries in place of this. This is a huge file and is a mix of timeseries and data.min.json |

**Doc Note**: *Please consider using the above endpoints for all your data needs. All the data we show on the website is fuelled by the above endpoints.*

#### Time-series structure
state level data time-series: *doesn't go into district as descripted above*

https://api.covid19india.org/documentation/timeseries.min.html

In [None]:
import requests
import pandas as pd
import time

In [None]:
url = "https://api.covid19india.org/v4/min/timeseries.min.json"
response_ts = requests.get(url)

In [None]:
# read json and normalize
start_time = time.time()
wide_ts_df = pd.json_normalize(response_ts.json())
total_sec = time.time() - start_time
print(f"{round(total_sec,1)} secs execution")

In [None]:
# build long format from column names structure (renames as desired)
long_ts_df = wide_ts_df.columns.str.split(".", expand=True).droplevel(1).to_frame(
    index=False, name=["state", "time_period", "obs_type", "obs_cat"]
)

In [None]:
# add values from series
long_ts_df["val"] = wide_ts_df.values[0]

**Notes**

- no key for `delta` should take the neareast previous, eg: `AN, 2020-04-10, delta, recovered` not present means `AN, 2020-04-09, delta, recovered: 10` value has not changed
- `delta7` means "*seven day moving average*"

In [None]:
long_ts_df.loc[30:40]

In [None]:
print(f"Total data points number: {len(long_ts_df.state)}")
states = long_ts_df.state.unique()
print(f"{len(long_ts_df.state.unique())} states:\n{states}")
types = long_ts_df.obs_type.unique()
print(f"obs_type:\n{types}")
categs = long_ts_df.obs_cat.unique()
print(f"obs_cat:\n{categs}")

#### Time-series data vis

In [None]:
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
from jupyter_dash import JupyterDash
import plotly.express as px

In [None]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
# Build App
app = JupyterDash(__name__, external_stylesheets=external_stylesheets)
# detect proxy configuration for JupyterHub or Binder
JupyterDash.infer_jupyter_proxy_config()

In [None]:
# dropdowns: state, obs_type, obs_cat, time_period
dd_st = dcc.Dropdown(
    id="my_st",
    options=[
        {"label": value, "value": key}
        for key, value in zip(states, states)
    ],
    value='AN'
)
dd_type = dcc.Dropdown(
    id="my_typ",
    options=[
        {"label": value, "value": key}
        for key, value in zip(types, types)
    ],
    value='delta7'
)
dd_cat = dcc.Dropdown(
    id="my_cat",
    options=[
        {"label": value, "value": key}
        for key, value in zip(categs, categs)
    ],
    value='confirmed'
)
time_ps = long_ts_df.time_period.unique()
dd_time = dcc.Dropdown(
    id="my_time",
    options=[
        {"label": value, "value": key}
        for key, value in zip(time_ps, time_ps)
    ],
    value='2021-05-01'
)

In [None]:
# App Layout
app.layout = html.Div([
    html.H2("Indian States Covid Time-Series"),
    html.H6("Browse by State, type and category of obs_values and cut off time"),
    html.Div([
        html.Div(
            ["Select State", dd_st],
            style={'width': '24%', 'display': 'inline-block'},
        ),
        html.Div(
            ["Select type", dd_type],
            style={'width': '24%', 'display': 'inline-block'},
        ),
        html.Div(
            ["Select category", dd_cat],
            style={'width': '24%', 'display': 'inline-block'},
        ),
        html.Div(
            ["Select time", dd_time],
            style={'width': '24%', 'display': 'inline-block'},
        ),
    ]),
    html.Br(),
    dcc.Graph(id='time-series')
])

In [None]:
# Define callback to update graph
@app.callback(
    Output("time-series", "figure"),
    Input("my_st", "value"),
    Input("my_typ", "value"),
    Input("my_cat", "value"),
    Input("my_time", "value"),
)
def query_2_plot(state, obs_type, obs_cat, co_time):
    # return all times if co_time None
    co_time = co_time if co_time else long_ts_df.time_period.min()
    # don't return plot if missing values for query
    if any([not state, not obs_type, not obs_cat]):
        return {}
    else:
        query = "state == @state & obs_type == @obs_type & obs_cat == @obs_cat & time_period > @co_time"
        fig = px.line(
            long_ts_df.query(query),
            x="time_period",
            y="val",
            line_shape="spline",
        ).update_traces(mode="lines+markers")
        return fig

In [None]:
# Run app and display result inline in the notebook
app.run_server(mode='inline')

#### Current day data structure
State and details as of the current day: *contains information about districts*

https://api.covid19india.org/documentation/v4_data.html

In [None]:
url = "https://api.covid19india.org/v4/min/data.min.json"
response_data = requests.get(url)

##### State data
We here parse data at state level

In [None]:
# filter state metadata and districts out from json data
json_st = {
    key_1: {
        key_2: response_data.json()[key_1][key_2]
        for key_2 in response_data.json()[key_1] if key_2 not in ['districts', 'meta']
    } for key_1 in response_data.json()
}

In [None]:
# read json_st and normalize
wide_st_df = pd.json_normalize(json_st)
# build long format from column names structure (renames as desired)
long_st_df = wide_st_df.columns.str.split(".", expand=True).to_frame(
    index=False, name=["state", "obs_type", "obs_cat"]
)

In [None]:
# add values from series
long_st_df["val"] = wide_st_df.values[0]

**Notes**

- Doc caveat: any **obs_cat** category under key `delta` won't be present if a state/district doesn't see a change in such category (eg: `recovered`) for the current day
- Could any state/district not be even reported for the current day?
- research `delta21_14` meaning

In [None]:
long_st_df

In [None]:
print(f"{len(long_st_df.state.unique())} states:")
print(long_st_df.state.unique())

##### State metadata
Metadata at state level, **important** information here: population of the state (based on NCP projections)

To join eventually into state data

In [None]:
# filter state metadata from json data
json_meta_st = {
    key_1: {
        key_2: response_data.json()[key_1][key_2]
        for key_2 in response_data.json()[key_1] if key_2 == 'meta'
    } for key_1 in response_data.json()
}

In [None]:
# read json_meta_st and normalize
wide_meta_st_df = pd.json_normalize(json_meta_st, max_level=2)
# build temporary long format from column names
long_meta_st_df = wide_meta_st_df.columns.str.split(".", expand=True).droplevel(1).to_frame(
    index=False, name=["state", "column"]
)
long_meta_st_df["val"] = wide_meta_st_df.values[0]
# pivot temporary long into state metadata table
meta_st_df = long_meta_st_df.pivot(index='state', columns='column', values='val').reset_index()
# delete index name `column` from pivot
meta_st_df.rename_axis(None, axis=1, inplace=True)

In [None]:
# un nest metadata tested column
tested_df = meta_st_df.tested.apply(pd.Series).rename(
    columns={"date": "test_date", "source": "test_source"}
)
# concat back to metadata
meta_st_df = pd.concat([meta_st_df, tested_df], axis = 1).drop('tested', axis = 1)

In [None]:
meta_st_df

##### District data
Eventually join into state data and metadata

In [None]:
# filter district data and metadata from json data
json_ds = {
    key_1: {
        key_2: response_data.json()[key_1][key_2]
        for key_2 in response_data.json()[key_1] if key_2 == 'districts'
    } for key_1 in response_data.json()
}

In [None]:
# read json_ds and normalize - use custom separator: district names have points!
start_time = time.time()
wide_ds_df = pd.json_normalize(json_ds, max_level=4, sep='//')
total_sec = time.time() - start_time
print(f"{round(total_sec,1)} secs execution")

In [None]:
# build long format from column names (renames as desired)
long_ds_df = wide_ds_df.columns.str.split("//", expand=True).droplevel(1).to_frame(
    index=False, name=["state", "district", "obs_type", "obs_cat"]
)
# add values from series
long_ds_df["val"] = wide_ds_df.values[0]

In [None]:
# filter metadata in temporary long format
filter_meta = long_ds_df.obs_type == 'meta'
long_meta_ds_df = long_ds_df[filter_meta]
# district data in long format (drop metadata)
long_data_ds_df = long_ds_df.drop(long_meta_ds_df.index)

In [None]:
long_data_ds_df

##### District metadata
Metadata at district level, **important** and **outdated** information: population of the district (based on 2011 census)

**Note**: district names could be repeated among states

To join eventually into state data and metadata

In [None]:
# pivot temporary long into district metadata table
meta_ds_df = long_meta_ds_df.drop(columns='obs_type').set_index(
    ['state', 'district', 'obs_cat']
).unstack(level=-1).reset_index(col_level=1).droplevel(level=0, axis=1).rename_axis(None, axis=1)

In [None]:
# un nest district tested column
ds_tested_df = meta_ds_df.tested.apply(pd.Series).drop(0, axis = 1).rename(
    columns={"date": "test_date", "source": "test_source"}
)
# concat back to metadata
meta_ds_df = pd.concat([meta_ds_df, ds_tested_df], axis = 1).drop('tested', axis = 1)

In [None]:
meta_ds_df

#### "Data-all" data structure
Described as: *Per day numbers across states and districts - consider using timeseries in place of this -. This is a huge file and is a mix of time-series and current day data*

No documentantion @https://api.covid19india.org/

**Note**: time-series data don't go into district as descripted. Is state time-series and current day data enough?

### [Source CoWIN](https://dashboard.cowin.gov.in/)
Is API documented?

#### Yves shared link 1
https://api.cowin.gov.in/api/v1/reports/v2/getPublicReports?state_id=&district_id=&date=2021-07-15

- Check out structure

In [None]:
# API parameters
st_id = ""
ds_id = ""
date = "2021-07-21"
url = "https://api.cowin.gov.in/api/v1/reports/v2/getPublicReports"
api_param = {
    "state_id": st_id,
    "district_id": ds_id,
    "date": date,
}
response_cowi = requests.get(url, params=api_param)
response_cowi.url

In [None]:
# keys in data_structure levels
if response_cowi.status_code == 200:
    keys_1 = [key for key in response_cowi.json()]
    print(f"Keys @level 1:\n{keys_1}")
    keys_2 = []
    for key in keys_1:
        # check keys for dicts or list of dicts
        if isinstance(response_cowi.json()[key], dict):
            keys_2.append(list(response_cowi.json()[key].keys()))
        elif isinstance(response_cowi.json()[key], list):
            keys_list = []
            for elem in response_cowi.json()[key]:
                keys_list.append(list(elem.keys()))
            keys_2.append(keys_list)
        else:
            keys_2.append('None')
    print(f"Keys @level 2:\n{keys_2}")

In [None]:
# check if nested info at level 2
data_types = []
for i, key in enumerate(keys_1):
    if isinstance(keys_2[i], list):
        for j, elem in enumerate(keys_2[i]):            
            # check not list of list
            if not isinstance(elem, list):
                data = response_cowi.json()[key][elem]
                data_types.append(type(data))
#                 print(type(data))
            else:
                for key_2 in elem:
                    data = response_cowi.json()[key][j][key_2]
                    data_types.append(type(data))
#                     print(type(data))

In [None]:
print(set(data_types))
data_types.count(dict)

- `topBlock` Extraction

In [None]:
# read json and normalize
wide_top_df = pd.json_normalize(response_cowi.json()['topBlock'])
long_top_df = wide_top_df.columns.str.split(".", expand=True).to_frame(
    index=False, name=["obs_type", "obs_cat"]
)
long_top_df["val"] = wide_top_df.values[0]
long_top_df.set_index(["obs_type", "obs_cat"])

- `vaccinationDoneByTime` Extraction

Doesn't look relevant for our analysis

In [None]:
# read json and normalize
vac_by_time_df = pd.json_normalize(response_cowi.json()['vaccinationDoneByTime'])
vac_by_time_df

- `last7DaysRegistration` Extraction

Doesn't look relevant for our analysis <!-- -->

In [None]:
# read json and normalize
reg7_df = pd.json_normalize(response_cowi.json()['last7DaysRegistration'])
reg7_df

- `last30DaysAefi` Extraction

**AEFI**: Adverse event following immunization

Doesn't look relevant for our analysis <!-- -->

In [None]:
# read json and normalize
aefi30_df = pd.json_normalize(response_cowi.json()['last30DaysAefi'])
aefi30_df

- `last5daySessionStatus` Extraction

Doesn't look relevant for our analysis, *data length doesn't match key name* <!-- -->

In [None]:
# read json and normalize
ses5_df = pd.json_normalize(response_cowi.json()['last5daySessionStatus'])
ses5_df

- `getBeneficiariesGroupBy` Extraction

Data at state - *name not code* - level: `state_id` could be tested as API parameter if required

<!-- Doesn't look relevant for our analysis, *data length doesn't match key name* -->

In [None]:
# read json and normalize
ben_df = pd.json_normalize(response_cowi.json()['getBeneficiariesGroupBy'])
ben_df

- `aefiPercentage`: is this for the day or the total among time-series?

In [None]:
print(f"{response_cowi.json()['aefiPercentage']} %")

#### Yves shared link 2
https://api.cowin.gov.in/api/v1/reports/v2/getVacPublicReports?state_id=&district_id=&date=2021-07-15

- Check out structure

In [None]:
# API parameters
st_id = ""
ds_id = ""
date = "2021-07-21"
url = "https://api.cowin.gov.in/api/v1/reports/v2/getVacPublicReports"
api_param = {
    "state_id": st_id,
    "district_id": ds_id,
    "date": date,
}
response_cowi = requests.get(url, params=api_param)
response_cowi.url

In [None]:
# keys in data_structure levels
if response_cowi.status_code == 200:
    keys_1 = [key for key in response_cowi.json()]
    print(f"Keys @level 1:\n{keys_1}")
    keys_2 = []
    for key in keys_1:
        # check keys for dicts or list of dicts
        if isinstance(response_cowi.json()[key], dict):
            keys_2.append(list(response_cowi.json()[key].keys()))
        elif isinstance(response_cowi.json()[key], list):
            keys_list = []
            for elem in response_cowi.json()[key]:
                keys_list.append(list(elem.keys()))
            keys_2.append(keys_list)
        else:
            keys_2.append('None')
    print(f"Keys @level 2:\n{keys_2}")

In [None]:
# check if nested info at level 2
data_types = []
for i, key in enumerate(keys_1):
    if isinstance(keys_2[i], list):
        for j, elem in enumerate(keys_2[i]):            
            # check not list of list
            if not isinstance(elem, list):
                data = response_cowi.json()[key][elem]
                data_types.append(type(data))
#                 print(type(data))
            else:
                for key_2 in elem:
                    data = response_cowi.json()[key][j][key_2]
                    data_types.append(type(data))
#                     print(type(data))

In [None]:
print(set(data_types))
data_types.count(dict)