In [1]:
# import the necessary libraries
import numpy as np 
import pandas as pd 
import databricks.koalas as ks

import requests
import json
from datetime import date, timedelta, datetime

# Visualisation libraries
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()

import warnings
warnings.filterwarnings('ignore')

In [2]:
# read the data
res = requests.get("https://api.covid19india.org/raw_data.json")

j = res.json()

pdf = ks.DataFrame(j["raw_data"])
pdf.shape


# read the data
res = requests.get("https://api.covid19india.org/raw_data.json")

j = res.json()

kdf = ks.DataFrame(j["raw_data"])
kdf.shape

In [3]:
# filter relevant data
kdf = kdf[kdf["dateannounced"]!= '' ]

# change date format
kdf['dateannounced'] = ks.to_datetime(kdf['dateannounced'], format='%d/%m/%Y')

# filter relevant data
sdate = datetime.strptime('2020-03-01' , '%Y-%m-%d')
kdf = kdf[kdf['dateannounced']>sdate.strftime('%Y-%m-%d')]

kdf.shape

In [4]:
print('Total Detected  ' + str(kdf['currentstatus'].count()))
print('-------------------')
print(kdf['currentstatus'].value_counts())

In [5]:
col = 'currentstatus'

# charts of current status

# pie plot for current status
fig1, ax1 = plt.subplots(figsize=(10,5))
#ax1.pie(sizes, labels=labels, autopct='%1.1f%%', shadow=True)
kdf[col].value_counts().plot(kind='pie', autopct='%1.1f%%', figsize=(10,5))
ax1.axis('equal')

#draw circle
centre_circle = plt.Circle((0,0),0.40,fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)

# bar plot for current status
fig2, ax2 = plt.subplots(figsize=(10,5))
kdf[col].value_counts(ascending=True).plot(kind='barh', figsize=(10,5))
plt.title('Current Status', fontsize=14)

plt.show()

In [6]:
kdf["dateannounced"] = ks.to_datetime(kdf["dateannounced"]).dt.date

col = 'dateannounced'

# charts of diagnosed date

# pie plot for diagnosed date
fig1, ax1 = plt.subplots(figsize=(10,5))
#ax1.pie(sizes, labels=labels, autopct='%1.1f%%', shadow=True)
kdf[col].value_counts().plot(kind='pie', autopct='%1.1f%%', figsize=(10,5))
ax1.axis('equal')

#draw circle
centre_circle = plt.Circle((0,0),0.40,fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)

# bar plot for diagnosed date
fig2, ax2 = plt.subplots(figsize=(10,5))
kdf[col].value_counts().sort_index().plot(kind='barh', figsize=(10,5))
plt.title('Diagnosed On', fontsize=14)
plt.show()

In [7]:
col = 'detectedstate'

# charts of detected state

# pie plot for detected state
fig1, ax1 = plt.subplots(figsize=(10,5))
#ax1.pie(sizes, labels=labels, autopct='%1.1f%%', shadow=True)
kdf[col].value_counts().plot(kind='pie', autopct='%1.1f%%', figsize=(10,5))
ax1.axis('equal')

#draw circle
centre_circle = plt.Circle((0,0),0.40,fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)

# bar plot for detected state
fig2, ax2 = plt.subplots(figsize=(10,5))
kdf[col].value_counts(ascending=True).plot(kind='barh', figsize=(10,5))
plt.title('Detected State', fontsize=14)
plt.show()

In [8]:
# status date-wise data prep
tmp = ks.concat([ks.DataFrame(kdf['dateannounced'].value_counts().sort_index()).rename(columns={'dateannounced': 'Overall'}),
           ks.DataFrame(kdf[kdf['currentstatus'] == 'Hospitalized']['dateannounced'].value_counts().sort_index()).rename(columns={'dateannounced': 'Hospitalized'}),
           ks.DataFrame(kdf[kdf['currentstatus'] == 'Recovered']['dateannounced'].value_counts().sort_index()).rename(columns={'dateannounced': 'Recovered'}), 
           ks.DataFrame(kdf[kdf['currentstatus'] == 'Deceased']['dateannounced'].value_counts().sort_index()).rename(columns={'dateannounced': 'Deceased'})], 
          axis=1).fillna(0).astype('int64')

cols = ['Hospitalized','Recovered','Deceased']

tmp = tmp[cols]

# status date-wise 
tmp.plot.area(stacked=False, figsize=(10,5))

plt.xticks(tmp.index, fontsize=12, rotation=45)
plt.title('Status Date-wise', fontsize=14)
plt.show()

In [9]:
# cumulative sum data prep
#tmp_cumsum = ks.DataFrame()
tmp = tmp.sort_index()
tmp['Hospitalized_CumSum'] = tmp['Hospitalized'].cumsum()
tmp['Recovered_CumSum'] = tmp['Recovered'].cumsum()
tmp['Deceased_CumSum'] = tmp['Deceased'].cumsum()

tmp_cumsum = tmp[['Hospitalized_CumSum', 'Recovered_CumSum', 'Deceased_CumSum']]

# Cumulative Sum Detected by States
tmp_cumsum.plot.area(stacked=False, figsize=(10,5))

plt.xticks(tmp_cumsum.index, fontsize=12, rotation=45)
plt.title('Cumulative Sum Status Date-wise', fontsize=14)
plt.show()

In [10]:
# state-wise data prep
# status date-wise data prep
tmp = ks.concat([ks.DataFrame(kdf['dateannounced'].value_counts().sort_index()).rename(columns={'dateannounced': 'Overall'}),
           ks.DataFrame(kdf[kdf['detectedstate'] == 'Maharashtra']['dateannounced'].value_counts().sort_index()).rename(columns={'dateannounced': 'Maharashtra'}),
           ks.DataFrame(kdf[kdf['detectedstate'] == 'Tamil Nadu']['dateannounced'].value_counts().sort_index()).rename(columns={'dateannounced': 'Tamil Nadu'}), 
           ks.DataFrame(kdf[kdf['detectedstate'] == 'Delhi']['dateannounced'].value_counts().sort_index()).rename(columns={'dateannounced': 'Delhi'}), 
           ks.DataFrame(kdf[kdf['detectedstate'] == 'Telangana']['dateannounced'].value_counts().sort_index()).rename(columns={'dateannounced': 'Telangana'}),
           ks.DataFrame(kdf[kdf['detectedstate'] == 'Kerala']['dateannounced'].value_counts().sort_index()).rename(columns={'dateannounced': 'Kerala'}), 
           ks.DataFrame(kdf[kdf['detectedstate'] == 'Uttar Pradesh']['dateannounced'].value_counts().sort_index()).rename(columns={'dateannounced': 'Uttar Pradesh'})], 
          axis=1).fillna(0).astype('int64')

cols = ['Maharashtra', 'Tamil Nadu', 'Delhi', 'Telangana', 'Kerala', 'Uttar Pradesh']
#cols = ['Kerala','Maharashtra','Tamil Nadu','Karnataka','Telangana','Delhi','Uttar Pradesh']

tmp = tmp[cols]

# Detection State-wise
tmp.plot.area(stacked=False, figsize=(10,5))

plt.xticks(tmp.index, fontsize=12, rotation=45)
plt.title('Detection State-wise', fontsize=14)
plt.show()

In [11]:
# cumulative sum data prep
#tmp_cumsum = pd.DataFrame()
tmp = tmp.sort_index()
#tmp_cumsum['Overall_CumSum'] = tmp['Overall'].cumsum()
tmp['Maharashtra_CumSum'] = tmp['Maharashtra'].cumsum()
tmp['Tamil_Nadu_CumSum'] = tmp['Tamil Nadu'].cumsum()
tmp['Delhi_CumSum'] = tmp['Delhi'].cumsum()
tmp['Telangana_CumSum'] = tmp['Telangana'].cumsum()
tmp['Kerala_CumSum'] = tmp['Kerala'].cumsum()
tmp['Uttar_Pradesh_CumSum'] = tmp['Uttar Pradesh'].cumsum()

tmp_cumsum = tmp[['Maharashtra_CumSum', 'Tamil_Nadu_CumSum', 'Delhi_CumSum', 'Telangana_CumSum', 'Kerala_CumSum', 'Uttar_Pradesh_CumSum']]

# Cumulative Sum Detected by States
tmp_cumsum.plot.area(stacked=False, figsize=(10,5))

plt.xticks(tmp.index, fontsize=12, rotation=45)
plt.title('Cumulative Sum Detected by States', fontsize=14)
plt.show()