In [2]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import datetime as dt
from jinja2 import Template
from IPython.display import HTML
from pathlib import Path
import os

In [91]:
## Abbrevations mapping
url = 'https://slusi.dacnet.nic.in/watershedatlas/list_of_state_abbreviation.htm'

# GET request
web_content = requests.get(url).content
#parse the html content
soup = BeautifulSoup(web_content, "html.parser")
# remove new line and extra spaces
extract_contents = lambda row: [x.text.replace('\n','') for x in row]
stats = []
all_rows = soup.find_all('tr')
for row in all_rows:
    stat = extract_contents(row.find_all('td'))
    if len(stat) == 3:
        stats.append(stat)
new_cols = ["no","state","abb"]
abb_data = pd.DataFrame(data = stats, columns = new_cols)
for i in 'state, abb'.split(', '):
    abb_data[i] = abb_data[i].map(str) 

In [95]:
abb_data.drop(0, axis = 0, inplace = True)

In [99]:
abb_data.drop(columns = ["no"], inplace = True)

In [102]:
abb_dict = dict(zip(abb_data.abb.values,abb_data.state.values))

### Above I have a dictionary that converts the table

In [132]:
df = pd.read_csv('http://api.covid19india.org/states_daily_csv/confirmed.csv', header = 0) 

In [133]:
df.set_index('date', inplace=True)
df = df.T

In [134]:
df = df.iloc[:-1,:]

In [136]:
df.sort_values(by = [df.columns[-1]], ascending=False, inplace = True)
df = df.fillna(0)

In [137]:
df = df[df.columns].astype(int)

In [138]:
df = df.rename_axis('states').reset_index()

In [139]:
df['statesfull'] = df['states'].map(abb_dict)

In [153]:
df = df[[df.columns[-1]] + list(df.columns[:-1])]

## Using the wayback machine to get started
url = 'https://web.archive.org/web/20200421011447/https://www.mohfw.gov.in/'
```
# GET request
web_content = requests.get(url).content
#parse the html content
soup = BeautifulSoup(web_content, "html.parser")
# remove new line and extra spaces
extract_contents = lambda row: [x.text.replace('\n','') for x in row]
stats = []
all_rows = soup.find_all('tr')
for row in all_rows:
    stat = extract_contents(row.find_all('td'))
    if len(stat) == 5:
        stats.append(stat)
new_cols = ["no","state","confirmed","recovered","deceased"]
state_data = pd.DataFrame(data = stats, columns = new_cols)
for i in 'confirmed, recovered, deceased'.split(', '):
    state_data[i] = state_data[i].map(int) 
    
```

In [9]:
url = 'https://www.mohfw.gov.in/'
# GET request
web_content = requests.get(url).content
#parse the html content
soup = BeautifulSoup(web_content, "html.parser")
# remove new line and extra spaces
extract_contents = lambda row: [x.text.replace('\n','') for x in row]
stats = []
all_rows = soup.find_all('tr')
for row in all_rows:
    stat = extract_contents(row.find_all('td'))
    if len(stat) == 5:
        stats.append(stat)
new_cols = ["no","state","confirmed","recovered","deceased"]
state_data = pd.DataFrame(data = stats, columns = new_cols)
for i in 'confirmed, recovered, deceased'.split(', '):
    state_data[i] = state_data[i].map(int) 

In [10]:
# This was a one time thing
# dft_cases = pd.DataFrame({'states':state_data.state.values, str(dt_yday): state_data.confirmed.values})
# dft_deaths = pd.DataFrame({'states':state_data.state.values, str(dt_yday): state_data.deceased.values})

In [11]:
#Import files
path = Path(os.getcwd())
os.chdir(path)
dft_cases = pd.read_csv('confirmedcases.csv')
dft_deaths = pd.read_csv('deathcases.csv')

In [12]:
#Get latest date
dt_today = dt.date.today()
dt_yday = dt_today - dt.timedelta(days = 1)

In [13]:
# Check if the updating day is different from last day
if not str(dt_today) == dft_cases.columns[-1]:
    dft_cases[str(dt_today)] = state_data.confirmed
    dft_deaths[str(dt_today)] = state_data.deceased

In [14]:
#Export csv for next day
dft_cases.to_csv('confirmedcases.csv', index=False)
dft_deaths.to_csv('deathcases.csv', index = False)

In [15]:
dfc_cases = dft_cases.groupby('states')[str(dt_today)].sum()
dfc_deaths = dft_deaths.groupby('states')[str(dt_today)].sum()
dfp_cases = dft_cases.groupby('states')[str(dt_yday)].sum()
dfp_deaths = dft_deaths.groupby('states')[str(dt_yday)].sum()

In [16]:
# Build the cases table
df_table = (pd.DataFrame(dict(Cases = dfc_cases, Deaths = dfc_deaths, PCases = dfp_cases, PDeaths = dfp_deaths))
            .sort_values(by = ['Cases','Deaths'], ascending = [False, False])
            .reset_index())
for c in 'Cases, Deaths'.split(', '):
    df_table[f'{c} (+)'] = (df_table[c] - df_table[f'P{c}']).clip(0)
    #Clip ça veut dire, les chiffres negatif sont interdit
df_table['Fatality Rate'] = (100* df_table['Deaths']/ df_table['Cases']).round(2)

In [17]:
#Building the summary tab
metrics = [df_table.columns[index] for index in [1,2,5,6]]
s_mah = df_table[df_table.states == 'Maharashtra'][metrics].sum().add_prefix('MH ')
s_del = df_table[df_table.states =='Delhi'][metrics].sum().add_prefix('DEL ')
s_guj = df_table[df_table.states == 'Gujarat'][metrics].sum().add_prefix('GUJ ')
summary = {'updated': dt_today, 'since': dt_yday}
summary = {**summary,**df_table[metrics].sum(), **s_mah, **s_del, **s_guj}

In [18]:
#Data on new cases
dt_cols = dft_cases.columns[-1:-len(dft_cases.columns):-1]
dt_cols = list(dt_cols[i] for i in range(len(dt_cols)-1,-1,-1))
dft_ct_new_cases = dft_cases.groupby('states')[dt_cols].sum().diff(axis=1).fillna(0).astype(int)
dft_ct_new_cases.sort_values(by = [dft_ct_new_cases.columns[-1]], ascending = False)

Unnamed: 0_level_0,2020-04-20,2020-04-21,2020-04-22
states,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Andaman and Nicobar Islands,0,1,0
Madhya Pradesh,0,55,0
Uttarakhand,0,2,0
Uttar Pradesh,0,118,0
Tripura,0,0,0
Telengana,0,46,0
Tamil Nadu,0,43,0
Rajasthan,0,98,0
Punjab,0,26,0
Puducherry,0,0,0


In [19]:
# Get total number from world data only
COL_REGION = 'states'
KPI_CASE = 'India'
kpis_info = [
    {'title': 'Maharashtra', 'prefix': 'MH'},
    {'title': 'Delhi', 'prefix': 'DEL'},
    {'title': 'Gujarat', 'prefix': 'GUJ'}]

In [20]:
#hide
def get_template(path):
    from urllib.parse import urlparse
    if bool(urlparse(path).netloc):
        from urllib.request import urlopen
        return urlopen(path).read().decode('utf8')
    return open(path).read()

In [22]:
#hide_input
template = Template(get_template('https://raw.githubusercontent.com/hargun3045/dashboards/master/overview.tpl'))
#Local method
# with open('overview.tpl','r') as f:
#     temp = f.read()
#template = Template(temp)
html = template.render(
    D=summary, table=df_table.head(10),
    KPI_CASE = KPI_CASE,COL_REGION = COL_REGION,
    KPIS_INFO = kpis_info,# REMOVE .head(20) to see all values
    newcases=dft_ct_new_cases.loc[:, dt_cols[-len(dt_cols)]:dt_cols[-1]],
    np=np, pd=pd, enumerate=enumerate)
HTML(f'<div>{html}</div>')

Unnamed: 0_level_0,10  100  1000,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0
Location,New Cases,Total Cases,Total Cases,Deaths,Deaths,Fatality,Unnamed: 7_level_1,Unnamed: 8_level_1
,Apr. 20  Apr. 22,,"(+NEW) since Apr, 21","(+NEW) since Apr, 21","(+NEW) since Apr, 21","(+NEW) since Apr, 21",,
Maharashtra,,5218.0,(+549),251,(+19),4.81%,,
Gujarat,,2178.0,(+112),90,(+13),4.13%,,
Delhi,,2156.0,(+75),47,(+0),2.18%,,
Punjab,,1659.0,"(+1,414)",25,(+9),1.51%,,
Rajasthan,,1596.0,(+20),18,(+0),1.13%,,
Madhya Pradesh,,1552.0,(+12),76,(+0),4.9%,,
Uttarakhand,,1294.0,"(+1,248)",20,(+20),1.55%,,
Tamil Nadu,,928.0,(+0),23,(+6),2.48%,,
Andhra Pradesh,,757.0,(+0),22,(+0),2.91%,,
