In [1]:
import pandas as pd

In [2]:
df_confirmed = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv', index_col='Country/Region')
df_deaths = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_deaths_global.csv', index_col='Country/Region')
df_recovered = pd.read_csv('https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_recovered_global.csv', index_col='Country/Region')

In [3]:
df_confirmed.rename(columns={'Province/State':'Province', df_confirmed.columns[-1]:'Confirmed_Cases'}, inplace=True)
df_deaths.rename(columns={'Province/State':'Province', df_deaths.columns[-1]:'Total_Deaths'}, inplace=True)
df_recovered.rename(columns={'Province/State':'Province', df_recovered.columns[-1]:'Total_Recovered'}, inplace=True)

In [4]:
Confirmed = df_confirmed[['Province', 'Confirmed_Cases']]
Deaths = df_deaths[['Province', 'Total_Deaths']]
Recovered = df_recovered[['Province', 'Total_Recovered']]

In [5]:
filt = pd.isnull(Confirmed.Province)
filt1 = pd.isnull(Deaths.Province)
filt2 = pd.isnull(Recovered.Province)

In [6]:
Confirmed = Confirmed.loc[filt]
Deaths = Deaths.loc[filt1]
Recovered = Recovered.loc[filt2]

In [7]:
Recovered.loc['United Kingdom'] # UK has stopped giving recovered cases data.

Province           NaN
Total_Recovered      0
Name: United Kingdom, dtype: object

In [8]:
new = pd.concat([Confirmed, Deaths, Recovered], axis = 'columns')

In [9]:
new.drop(['Province'], axis = 1, inplace = True)

In [10]:
new.nlargest(10, 'Confirmed_Cases')

Unnamed: 0,Confirmed_Cases,Total_Deaths,Total_Recovered
US,2163290.0,117717.0,592191
Brazil,955377.0,46510.0,521046
Russia,552549.0,7468.0,303800
India,366946.0,12237.0,194325
United Kingdom,299251.0,42153.0,0
Spain,244683.0,27136.0,150376
Peru,240908.0,7257.0,128622
Italy,237828.0,34448.0,179455
Chile,220628.0,3615.0,181931
Iran,195051.0,9185.0,154812


In [11]:
new.loc['US']

Confirmed_Cases    2163290.0
Total_Deaths        117717.0
Total_Recovered     592191.0
Name: US, dtype: float64

In [12]:
grpC = df_confirmed.groupby(['Country/Region'])
chinaC = grpC.get_group('China')['Confirmed_Cases'].sum()
canadaC = grpC.get_group('Canada')['Confirmed_Cases'].sum()
australiaC = grpC.get_group('Australia')['Confirmed_Cases'].sum()

In [13]:
grpD = df_deaths.groupby(['Country/Region'])
chinaD = grpD.get_group('China')['Total_Deaths'].sum()
canadaD = grpD.get_group('Canada')['Total_Deaths'].sum()
australiaD = grpD.get_group('Australia')['Total_Deaths'].sum()

In [14]:
grpR = df_recovered.groupby(['Country/Region'])
australiaR = grpR.get_group('Australia')['Total_Recovered'].sum()
canadaR = Recovered.loc['Canada', 'Total_Recovered']
chinaR = grpR.get_group('China')['Total_Recovered'].sum()

In [15]:
Arow = pd.Series(data={'Confirmed_Cases':australiaC, 'Total_Deaths':australiaD, 'Total_Recovered':australiaR}, name= 'Australia')
new = new.append(Arow, ignore_index=False)

In [16]:
Chrow = pd.Series(data={'Confirmed_Cases':chinaC, 'Total_Deaths':chinaD, 'Total_Recovered':chinaR}, name= 'China')
new = new.append(Chrow, ignore_index=False)

In [17]:
new.drop(['Canada'], axis=0, inplace=True)

In [18]:
Carow = pd.Series(data={'Confirmed_Cases':canadaC, 'Total_Deaths':canadaD, 'Total_Recovered':canadaR}, name= 'Canada')
new = new.append(Carow, ignore_index=False)

In [19]:
new.tail()

Unnamed: 0,Confirmed_Cases,Total_Deaths,Total_Recovered
Tajikistan,5221.0,51.0,3700
Lesotho,4.0,0.0,2
Australia,7391.0,102.0,6877
China,84458.0,4638.0,79510
Canada,101491.0,8312.0,63280


In [20]:
new['Active_Cases'] = new['Confirmed_Cases'] - new['Total_Deaths'] - new['Total_Recovered']

In [21]:
new = new.astype(int)
new.nlargest(20, 'Active_Cases')

Unnamed: 0,Confirmed_Cases,Total_Deaths,Total_Recovered,Active_Cases
US,2163290,117717,592191,1453382
Brazil,955377,46510,521046,387821
United Kingdom,299251,42153,0,257098
Russia,552549,7468,303800,241281
India,366946,12237,194325,160384
Peru,240908,7257,128622,105029
Pakistan,160118,3093,59215,97810
France,189906,29512,70223,90171
Spain,244683,27136,150376,67171
Bangladesh,98489,1305,38189,58995


In [22]:
new.to_json('COVID-19\\JHU_6-18-20.json', indent=2)