In [1]:
import json
import random
from pathlib import Path

import pandas as pd

%load_ext autoreload
%autoreload 2
Path.ls = lambda x: list(x.iterdir())
# from urllib.parse import quote_plus

In [19]:
from datetime import date

today = date.today()

# dd/mm/YY
current_date = today.strftime("%Y%m%d")
print("d1 =", current_date)

d1 = 20200410


In [2]:
import urllib.request, json
from urllib.error import HTTPError

# def get_raw_data(raw_web_url="https://api.covid19india.org/raw_data.json"):
#     with urllib.request.urlopen(raw_web_url) as url:
#         data_dict = json.loads(url.read().decode())
#         return data_dict["raw_data"]


def get_stats_history(stats_history_url="https://api.rootnet.in/covid19-in/stats/history"):
    try:
        with urllib.request.urlopen(stats_history_url) as url:
            data_dict = json.loads(url.read().decode())
            return data_dict
    except HTTPError as e:
        print(f"Using local backup of {stats_history_url}")
        with open("history.json") as f:
            return json.loads(f.read())
        
# def get_state_data(
#     case_count_url="https://api.covid19india.org/state_district_wise.json",
# ):
#     with urllib.request.urlopen(case_count_url) as url:
#         data_dict = json.loads(url.read().decode())
#         return data_dict


# def get_case_count(data, state="Karnataka"):
#     df = pd.DataFrame(data[state]["districtData"])
#     df = df.transpose()
#     df.reset_index(inplace=True)
#     df.rename(columns={"confirmed": "CaseCount", "index": "District"}, inplace=True)
#     df.drop(columns=["lastupdatedtime"], inplace=True)
#     return df

In [3]:
from typing import List, Dict
stats:List = get_stats_history()["data"]

In [4]:
def get_date_df(stat: Dict) -> pd.DataFrame:
    day = stat["day"]
#     print(stat.keys())
    regional = stat["regional"]
    elements = [{"Day":day, "Province/State":region["loc"], "CaseCount": region["confirmedCasesIndian"]+region["confirmedCasesForeign"]} for region in regional]
    return pd.DataFrame(elements)


In [5]:
regionwise_df = [get_date_df(stat) for stat in stats]
regionwise_df = pd.concat(regionwise_df)
regionwise_df.reset_index(inplace=True)
regionwise_df.drop(columns=["index"], inplace=True)

In [6]:
regionwise_df.columns

Index(['CaseCount', 'Day', 'Province/State'], dtype='object')

## Transform to the JHU data format
See this [Github link to data](https://github.com/CSSEGISandData/COVID-19/blob/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv) for reference

In [7]:
pd.set_option("display.max_rows", 500)
datewise_pivot_df = pd.pivot_table(
    regionwise_df,
    index=["Province/State"],
    values=["CaseCount"],
    columns=["Day"],
    aggfunc=sum,
).fillna(0)

In [15]:
datewise_pivot_df.columns = [
    element[1] for element in datewise_pivot_df.columns
]

In [16]:
datewise_pivot_df.insert(0, "Country/Region", "India")
datewise_pivot_df.insert(1, "Lat", 20)
datewise_pivot_df.insert(2, "Long", 70)

In [17]:
datewise_pivot_df.head()

Unnamed: 0_level_0,Country/Region,Lat,Long,2020-03-10,2020-03-11,2020-03-12,2020-03-13,2020-03-14,2020-03-15,2020-03-16,...,2020-04-01,2020-04-02,2020-04-03,2020-04-04,2020-04-05,2020-04-06,2020-04-07,2020-04-08,2020-04-09,2020-04-10
Province/State,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Andaman and Nicobar Islands,India,20,70,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,11.0,11.0
Andhra Pradesh,India,20,70,0.0,0.0,0.0,1.0,1.0,1.0,1.0,...,83.0,86.0,132.0,161.0,190.0,226.0,266.0,305.0,348.0,348.0
Arunachal Pradesh,India,20,70,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
Assam,India,20,70,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,5.0,16.0,24.0,26.0,26.0,26.0,27.0,28.0,29.0
Bihar,India,20,70,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,23.0,24.0,29.0,30.0,30.0,32.0,32.0,38.0,39.0,39.0


In [23]:
fname = f'../data/time_series_covid19_confirmed_India_{current_date}.csv'
datewise_pivot_df.to_csv(fname, header=True)

In [22]:
!cat $fname

Province/State,Country/Region,Lat,Long,2020-03-10,2020-03-11,2020-03-12,2020-03-13,2020-03-14,2020-03-15,2020-03-16,2020-03-17,2020-03-18,2020-03-19,2020-03-20,2020-03-21,2020-03-22,2020-03-23,2020-03-24,2020-03-25,2020-03-26,2020-03-27,2020-03-28,2020-03-29,2020-03-30,2020-03-31,2020-04-01,2020-04-02,2020-04-03,2020-04-04,2020-04-05,2020-04-06,2020-04-07,2020-04-08,2020-04-09,2020-04-10
Andaman and Nicobar Islands,India,20,70,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,6.0,9.0,9.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,10.0,11.0,11.0
Andhra Pradesh,India,20,70,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0,3.0,5.0,7.0,8.0,9.0,11.0,12.0,14.0,19.0,23.0,40.0,83.0,86.0,132.0,161.0,190.0,226.0,266.0,305.0,348.0,348.0
Arunachal Pradesh,India,20,70,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
Assam,India,20,70,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0