In [71]:
import pandas as pd
import numpy as np
import requests
from bs4 import BeautifulSoup
import datetime as dt

## Using the wayback machine to get started
url = 'https://web.archive.org/web/20200421011447/https://www.mohfw.gov.in/'
```
# GET request
web_content = requests.get(url).content
#parse the html content
soup = BeautifulSoup(web_content, "html.parser")
# remove new line and extra spaces
extract_contents = lambda row: [x.text.replace('\n','') for x in row]
stats = []
all_rows = soup.find_all('tr')
for row in all_rows:
    stat = extract_contents(row.find_all('td'))
    if len(stat) == 5:
        stats.append(stat)
new_cols = ["no","state","confirmed","recovered","deceased"]
state_data = pd.DataFrame(data = stats, columns = new_cols)
for i in 'confirmed, recovered, deceased'.split(', '):
    state_data[i] = state_data[i].map(int) 
    
```

In [130]:
url = 'https://www.mohfw.gov.in/'
# GET request
web_content = requests.get(url).content
#parse the html content
soup = BeautifulSoup(web_content, "html.parser")
# remove new line and extra spaces
extract_contents = lambda row: [x.text.replace('\n','') for x in row]
stats = []
all_rows = soup.find_all('tr')
for row in all_rows:
    stat = extract_contents(row.find_all('td'))
    if len(stat) == 5:
        stats.append(stat)
new_cols = ["no","state","confirmed","recovered","deceased"]
state_data = pd.DataFrame(data = stats, columns = new_cols)
for i in 'confirmed, recovered, deceased'.split(', '):
    state_data[i] = state_data[i].map(int) 

In [126]:
# This was a one time thing
# dft_cases = pd.DataFrame({'states':state_data.state.values, str(dt_yday): state_data.confirmed.values})
# dft_deaths = pd.DataFrame({'states':state_data.state.values, str(dt_yday): state_data.deceased.values})

In [103]:
#Import files
dft_cases = pd.read_csv('confirmedcases.csv')
dft_deaths = pd.read_csv('deathcases.csv')

In [129]:
#Get latest date
dt_today = dt.date.today()
dt_yday = dt_today - dt.timedelta(days = 1)

In [132]:
# Check if the updating day is different from last day
if not str(dt_today) == dft_cases.columns[-1]:
    dft_cases[str(dt_today)] = state_data.confirmed
    dft_deaths[str(dt_today)] = state_data.deceased

In [136]:
#Export csv for next day
dft_cases.to_csv('confirmedcases.csv', index=False)
dft_deaths.to_csv('deathcases.csv', index = False)

In [137]:
dfc_cases = dft_cases.groupby('states')[str(dt_today)].sum()
dfc_deaths = dft_deaths.groupby('states')[str(dt_today)].sum()
dfp_cases = dft_cases.groupby('states')[str(dt_yday)].sum()
dfp_deaths = dft_deaths.groupby('states')[str(dt_yday)].sum()

In [140]:
# Build the cases table
df_table = (pd.DataFrame(dict(Cases = dfc_cases, Deaths = dfc_deaths, PCases = dfp_cases, PDeaths = dfp_deaths))
            .sort_values(by = ['Cases','Deaths'], ascending = [False, False])
            .reset_index())
for c in 'Cases, Deaths'.split(', '):
    df_table[f'{c} (+)'] = (df_table[c] - df_table[f'P{c}']).clip(0)
    #Clip ça veut dire, les chiffres negatif sont interdit
df_table['Fatality Rate'] = (100* df_table['Deaths']/ df_table['Cases']).round(2)

In [153]:
#Building the summary tab
metrics = [df_table.columns[index] for index in [1,2,5,6]]
s_mah = df_table[df_table.states == 'Maharashtra'][metrics].sum().add_prefix('MH ')
s_del = df_table[df_table.states =='Delhi'][metrics].sum().add_prefix('DEL ')
s_guj = df_table[df_table.states == 'Gujarat'][metrics].sum().add_prefix('GUJ ')
summary = {'updated': dt_today, 'since': dt_yday}
summary = {**summary,**df_table[metrics].sum(), **s_mah, **s_del, **s_guj}

In [199]:
#Data on new cases
dt_cols = dft_cases.columns[-1:-len(dft_cases.columns):-1]
dt_cols = list(dt_cols[i] for i in range(len(dt_cols)-1,-1,-1))
dft_ct_new_cases = dft_cases.groupby('states')[dt_cols].sum().diff(axis=1).fillna(0).astype(int)
dft_ct_new_cases.sort_values(by = [dft_ct_new_cases.columns[-1]], ascending = False)

In [214]:
# Get total number from world data only
COL_REGION = 'states'
KPI_CASE = 'India'
kpis_info = [
    {'title': 'Maharashtra', 'prefix': 'MH'},
    {'title': 'Delhi', 'prefix': 'DEL'},
    {'title': 'Gujarat', 'prefix': 'GUJ'}]

In [217]:
from jinja2 import Template
from IPython.display import HTML

In [223]:
#Import template
#hide_input
# template = Template(get_template('https://raw.githubusercontent.com/hargun3045/dashboards/master/overview.tpl'))
with open('overview.tpl','r') as f:
    temp = f.read()
template = Template(temp)
html = template.render(
    D=summary, table=df_table.head(10),
    KPI_CASE = KPI_CASE,COL_REGION = COL_REGION,
    KPIS_INFO = kpis_info,# REMOVE .head(20) to see all values
    newcases=dft_ct_new_cases.loc[:, dt_cols[-len(dt_cols)]:dt_cols[-1]],
    np=np, pd=pd, enumerate=enumerate)
HTML(f'<div>{html}</div>')

Unnamed: 0_level_0,10  100  1000,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0
Location,New Cases,Total Cases,Total Cases,Deaths,Deaths,Fatality,Unnamed: 7_level_1,Unnamed: 8_level_1
,Apr. 20  Apr. 21,,"(+NEW) since Apr, 20","(+NEW) since Apr, 20","(+NEW) since Apr, 20","(+NEW) since Apr, 20",,
Maharashtra,,4669.0,(+466),232,(+9),4.97%,,
Delhi,,2081.0,(+78),47,(+2),2.26%,,
Gujarat,,2066.0,(+215),77,(+10),3.73%,,
Rajasthan,,1576.0,(+98),25,(+11),1.59%,,
Madhya Pradesh,,1540.0,(+55),76,(+2),4.94%,,
Tamil Nadu,,1520.0,(+43),17,(+2),1.12%,,
Uttar Pradesh,,1294.0,(+118),20,(+3),1.55%,,
Telengana,,919.0,(+46),23,(+2),2.5%,,
Andhra Pradesh,,757.0,(+35),22,(+2),2.91%,,
