# Count covid deaths and confirmed cases by country and put in an output file

###### import library

In [1]:
import pandas as pd

###### read data from main dataset to find out death numbers

In [2]:
death_file = "Resources/covid_19_deaths.csv"
death_df = pd.read_csv(death_file, encoding="utf-8")
death_df.head()

Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,8/03/2020,8/04/2020,8/05/2020,8/06/2020,8/07/2020,8/08/2020,8/09/2020,8/10/2020,8/11/2020,8/12/2020
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,1288,1288,1294,1298,1307,1312,1312,1328,1344,1354
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,172,176,182,188,189,193,199,200,205,208
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,1239,1248,1261,1273,1282,1293,1302,1312,1322,1333
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,52,52,52,52,52,52,52,52,52,53
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,58,59,62,64,67,70,75,78,80,80


###### change column names to make them more meaningful 

In [3]:
death_df = death_df.rename(columns={"Country/Region":"country", "8/12/2020":"covid_deaths"})
death_df.head()

Unnamed: 0,Province/State,country,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,8/03/2020,8/04/2020,8/05/2020,8/06/2020,8/07/2020,8/08/2020,8/09/2020,8/10/2020,8/11/2020,covid_deaths
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,1288,1288,1294,1298,1307,1312,1312,1328,1344,1354
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,172,176,182,188,189,193,199,200,205,208
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,1239,1248,1261,1273,1282,1293,1302,1312,1322,1333
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,52,52,52,52,52,52,52,52,52,53
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,58,59,62,64,67,70,75,78,80,80


###### call only necessary columns

In [4]:
death_df = death_df[["country","covid_deaths"]]
death_df.head()

Unnamed: 0,country,covid_deaths
0,Afghanistan,1354
1,Albania,208
2,Algeria,1333
3,Andorra,53
4,Angola,80


In [5]:
death_grouped = death_df.groupby(['country']).sum()
death_grouped.head()

Unnamed: 0_level_0,covid_deaths
country,Unnamed: 1_level_1
Afghanistan,1354
Albania,208
Algeria,1333
Andorra,53
Angola,80


###### read data from main dataset to find out confirm cases numbers

In [6]:
covid_file = "Resources/covid_19_confirmed.csv"
confirmdf = pd.read_csv(covid_file, encoding="utf-8")
confirmdf.head()


Unnamed: 0,Province/State,Country/Region,Lat,Long,1/22/20,1/23/20,1/24/20,1/25/20,1/26/20,1/27/20,...,8/3/20,8/4/20,8/5/20,8/6/20,8/7/20,8/8/20,8/9/20,8/10/20,8/11/20,8/12/20
0,,Afghanistan,33.93911,67.709953,0,0,0,0,0,0,...,36747,36782,36829,36896,37015,37054,37054,37162,37269,37345
1,,Albania,41.1533,20.1683,0,0,0,0,0,0,...,5620,5750,5889,6016,6151,6275,6411,6536,6676,6817
2,,Algeria,28.0339,1.6596,0,0,0,0,0,0,...,31972,32504,33055,33626,34155,34693,35160,35712,36204,36699
3,,Andorra,42.5063,1.5218,0,0,0,0,0,0,...,937,939,939,944,955,955,955,963,963,977
4,,Angola,-11.2027,17.8739,0,0,0,0,0,0,...,1280,1344,1395,1483,1538,1572,1672,1679,1735,1762


In [7]:
confirmdf = confirmdf.rename(columns={"Country/Region":"country", "8/12/20":"covid_confirmed"})
confirmdf = confirmdf[["country","covid_confirmed"]]
confirmgroupdf = confirmdf.groupby(['country']).sum()
confirmgroupdf.head()

Unnamed: 0_level_0,covid_confirmed
country,Unnamed: 1_level_1
Afghanistan,37345
Albania,6817
Algeria,36699
Andorra,977
Angola,1762


###### merge both into one dataframe

In [8]:
coviddf = death_grouped.merge(confirmgroupdf, left_on='country', right_on='country')
coviddf.head()

Unnamed: 0_level_0,covid_deaths,covid_confirmed
country,Unnamed: 1_level_1,Unnamed: 2_level_1
Afghanistan,1354,37345
Albania,208,6817
Algeria,1333,36699
Andorra,53,977
Angola,80,1762


###### create an output file to upload in database schema

In [9]:
coviddf.to_csv("Data/Covid_stats.csv", index=True, header=True)