## Part 1. Compairing the number of covid confirmed cases across 5 continents

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
covid_confirmed_cases = pd.read_csv("total-cases-covid-19.csv")
covid_confirmed_cases

Unnamed: 0,Entity,Code,Date,Total confirmed cases,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8
0,Afghanistan,AFG,25-Feb-20,1,,,,,
1,Afghanistan,AFG,26-Feb-20,1,,,,,
2,Afghanistan,AFG,27-Feb-20,1,,,,,
3,Afghanistan,AFG,28-Feb-20,1,,,,,
4,Afghanistan,AFG,29-Feb-20,1,,,,,
...,...,...,...,...,...,...,...,...,...
12282,Zimbabwe,ZWE,25-Apr-20,29,,,,,
12283,Zimbabwe,ZWE,26-Apr-20,31,,,,,
12284,Zimbabwe,ZWE,27-Apr-20,31,,,,,
12285,Zimbabwe,ZWE,28-Apr-20,32,,,,,


### Change column name "Total confirmed cases" to "Total_confirmed_cases"

In [3]:
covid_confirmed_cases = covid_confirmed_cases.rename(columns = {'Total confirmed cases ': 'Total_confirmed_cases'})
covid_confirmed_cases.head()

Unnamed: 0,Entity,Code,Date,Total_confirmed_cases,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8
0,Afghanistan,AFG,25-Feb-20,1,,,,,
1,Afghanistan,AFG,26-Feb-20,1,,,,,
2,Afghanistan,AFG,27-Feb-20,1,,,,,
3,Afghanistan,AFG,28-Feb-20,1,,,,,
4,Afghanistan,AFG,29-Feb-20,1,,,,,


### Remove unwanted columns

In [4]:
covid_confirmed_cases.columns

Index(['Entity', 'Code', 'Date', 'Total_confirmed_cases', 'Unnamed: 4',
       'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8'],
      dtype='object')

In [5]:
covid_confirmed_cases = covid_confirmed_cases[['Entity', 'Code', 'Date', 'Total_confirmed_cases']]
covid_confirmed_cases.head()

Unnamed: 0,Entity,Code,Date,Total_confirmed_cases
0,Afghanistan,AFG,25-Feb-20,1
1,Afghanistan,AFG,26-Feb-20,1
2,Afghanistan,AFG,27-Feb-20,1
3,Afghanistan,AFG,28-Feb-20,1
4,Afghanistan,AFG,29-Feb-20,1


### Create a separeate dataframe of confirmed cases of six continents

In [22]:
continents = ["Asia", "Europe", "North America", "South America", "Oceania", "Africa"]
covid_continents= pd.DataFrame()

for continent in continents:
    covid_continents = covid_continents.append(covid_confirmed_cases.loc[covid_confirmed_cases.Entity == continent])

covid_continents

Unnamed: 0,Entity,Code,Date,Total_confirmed_cases
545,Asia,,31-Dec-19,27
546,Asia,,1-Jan-20,27
547,Asia,,2-Jan-20,27
548,Asia,,3-Jan-20,44
549,Asia,,4-Jan-20,44
...,...,...,...,...
125,Africa,,25-Apr-20,29075
126,Africa,,26-Apr-20,30316
127,Africa,,27-Apr-20,31748
128,Africa,,28-Apr-20,33164


- display a plot with 5 legends(continents) showing Date vs. Total_confirmed_cases  
- claculate the log (rate of increase) for each line


## Part 2. Comparing across top 3 continents with the highest total confirmed cases

### Remove rows whose Code is NaN (non countries)

In [7]:
covid_confirmed_cases.Code.unique()

array(['AFG', nan, 'ALB', 'DZA', 'AND', 'AGO', 'AIA', 'ATG', 'ARG', 'ARM',
       'ABW', 'AUS', 'AUT', 'AZE', 'BHS', 'BHR', 'BGD', 'BRB', 'BLR',
       'BEL', 'BLZ', 'BEN', 'BMU', 'BTN', 'BOL', 'BIH', 'BWA', 'BRA',
       'VGB', 'BRN', 'BGR', 'BFA', 'BDI', 'KHM', 'CMR', 'CAN', 'CPV',
       'CYM', 'CAF', 'TCD', 'CHL', 'CHN', 'COL', 'COG', 'CRI', 'CIV',
       'HRV', 'CUB', 'CUW', 'CYP', 'CZE', 'COD', 'DNK', 'DJI', 'DMA',
       'DOM', 'ECU', 'EGY', 'SLV', 'GNQ', 'ERI', 'EST', 'ETH', 'FRO',
       'FLK', 'FJI', 'FIN', 'FRA', 'PYF', 'GAB', 'GMB', 'GEO', 'DEU',
       'GHA', 'GIB', 'GRC', 'GRL', 'GRD', 'GUM', 'GTM', 'GGY', 'GIN',
       'GNB', 'GUY', 'HTI', 'HND', 'HUN', 'ISL', 'IND', 'IDN', 'IRN',
       'IRQ', 'IRL', 'IMN', 'ISR', 'ITA', 'JAM', 'JPN', 'JEY', 'JOR',
       'KAZ', 'KEN', 'OWID_KOS', 'KWT', 'KGZ', 'LAO', 'LVA', 'LBN', 'LBR',
       'LBY', 'LIE', 'LTU', 'LUX', 'MKD', 'MDG', 'MWI', 'MYS', 'MDV',
       'MLI', 'MLT', 'MRT', 'MUS', 'MEX', 'MDA', 'MCO', 'MNG', 'MNE',
       'MS

In [8]:
bool_crit = (pd.isnull(covid_confirmed_cases.Code))
print("Number of NaN entries identified: ", sum(bool_crit))
covid_confirmed_cases[bool_crit]

Number of NaN entries identified:  1528


Unnamed: 0,Entity,Code,Date,Total_confirmed_cases
55,Africa,,15-Feb-20,1
56,Africa,,16-Feb-20,1
57,Africa,,17-Feb-20,1
58,Africa,,18-Feb-20,1
59,Africa,,19-Feb-20,1
...,...,...,...,...
12180,"World excl. China, South Korea, Japan and Sing...",,25-Apr-20,2612056
12181,"World excl. China, South Korea, Japan and Sing...",,26-Apr-20,2711959
12182,"World excl. China, South Korea, Japan and Sing...",,27-Apr-20,2794347
12183,"World excl. China, South Korea, Japan and Sing...",,28-Apr-20,2858738


In [9]:
covid_confirmed_cases_countries = covid_confirmed_cases[~bool_crit]
covid_confirmed_cases_countries

Unnamed: 0,Entity,Code,Date,Total_confirmed_cases
0,Afghanistan,AFG,25-Feb-20,1
1,Afghanistan,AFG,26-Feb-20,1
2,Afghanistan,AFG,27-Feb-20,1
3,Afghanistan,AFG,28-Feb-20,1
4,Afghanistan,AFG,29-Feb-20,1
...,...,...,...,...
12282,Zimbabwe,ZWE,25-Apr-20,29
12283,Zimbabwe,ZWE,26-Apr-20,31
12284,Zimbabwe,ZWE,27-Apr-20,31
12285,Zimbabwe,ZWE,28-Apr-20,32


### Select data entries on 29-Apr-20

In [10]:
countries_0429 = covid_confirmed_cases_countries.loc[covid_confirmed_cases_countries.Date == "29-Apr-20"]
countries_0429

Unnamed: 0,Entity,Code,Date,Total_confirmed_cases
54,Afghanistan,AFG,29-Apr-20,1827
181,Albania,ALB,29-Apr-20,750
240,Algeria,DZA,29-Apr-20,3649
287,Andorra,AND,29-Apr-20,748
326,Angola,AGO,29-Apr-20,27
...,...,...,...,...
11739,Vietnam,VNM,29-Apr-20,270
11860,World,OWID_WRL,29-Apr-20,3052370
12204,Yemen,YEM,29-Apr-20,1
12246,Zambia,ZMB,29-Apr-20,95


### Create a dictionary of all countries in the world

In [11]:
pip install pycountry-convert

Note: you may need to restart the kernel to use updated packages.


In [12]:
import pycountry
countries = list(pycountry.countries)
countries_alpha2 = []
countries_alpha3 = []

for i in range(len(countries)):
    countries_alpha2.append(countries[i].alpha_2)
    countries_alpha3.append(countries[i].alpha_3)

countries_alpha3
countries_alpha2
countries

[Country(alpha_2='AW', alpha_3='ABW', name='Aruba', numeric='533'),
 Country(alpha_2='AF', alpha_3='AFG', name='Afghanistan', numeric='004', official_name='Islamic Republic of Afghanistan'),
 Country(alpha_2='AO', alpha_3='AGO', name='Angola', numeric='024', official_name='Republic of Angola'),
 Country(alpha_2='AI', alpha_3='AIA', name='Anguilla', numeric='660'),
 Country(alpha_2='AX', alpha_3='ALA', name='Åland Islands', numeric='248'),
 Country(alpha_2='AL', alpha_3='ALB', name='Albania', numeric='008', official_name='Republic of Albania'),
 Country(alpha_2='AD', alpha_3='AND', name='Andorra', numeric='020', official_name='Principality of Andorra'),
 Country(alpha_2='AE', alpha_3='ARE', name='United Arab Emirates', numeric='784'),
 Country(alpha_2='AR', alpha_3='ARG', name='Argentina', numeric='032', official_name='Argentine Republic'),
 Country(alpha_2='AM', alpha_3='ARM', name='Armenia', numeric='051', official_name='Republic of Armenia'),
 Country(alpha_2='AS', alpha_3='ASM', nam

### Convert country_alpha3 to continent code

In [15]:
from pycountry_convert import country_alpha2_to_continent_code, country_alpha3_to_country_alpha2

for i in range(len(countries_0429.index)):
    country_alpha3 = countries_0429.Code.iloc[i]
    if country_alpha3 not in countries_alpha3:
        continents.append("invalid")
        print(country_alpha3)
    else:
        country_alpha2 = country_alpha3_to_country_alpha2(country_alpha3)
        continents.append(country_alpha2_to_continent_code(country_alpha2) )

continents

OWID_KOS
OWID_WRL


['AS',
 'EU',
 'AF',
 'EU',
 'AF',
 'NA',
 'NA',
 'SA',
 'AS',
 'NA',
 'OC',
 'EU',
 'AS',
 'NA',
 'AS',
 'AS',
 'NA',
 'EU',
 'EU',
 'NA',
 'AF',
 'NA',
 'AS',
 'SA',
 'EU',
 'AF',
 'SA',
 'NA',
 'AS',
 'EU',
 'AF',
 'AF',
 'AS',
 'AF',
 'NA',
 'AF',
 'NA',
 'AF',
 'AF',
 'SA',
 'AS',
 'SA',
 'AF',
 'NA',
 'AF',
 'EU',
 'NA',
 'NA',
 'AS',
 'EU',
 'AF',
 'EU',
 'AF',
 'NA',
 'NA',
 'SA',
 'AF',
 'NA',
 'AF',
 'AF',
 'EU',
 'AF',
 'EU',
 'SA',
 'OC',
 'EU',
 'EU',
 'OC',
 'AF',
 'AF',
 'AS',
 'EU',
 'AF',
 'EU',
 'EU',
 'NA',
 'NA',
 'OC',
 'NA',
 'EU',
 'AF',
 'AF',
 'SA',
 'NA',
 'NA',
 'EU',
 'EU',
 'AS',
 'AS',
 'AS',
 'AS',
 'EU',
 'EU',
 'AS',
 'EU',
 'NA',
 'AS',
 'EU',
 'AS',
 'AS',
 'AF',
 'invalid',
 'AS',
 'AS',
 'AS',
 'EU',
 'AS',
 'AF',
 'AF',
 'EU',
 'EU',
 'EU',
 'EU',
 'AF',
 'AF',
 'AS',
 'AS',
 'AF',
 'EU',
 'AF',
 'AF',
 'NA',
 'EU',
 'EU',
 'AS',
 'EU',
 'NA',
 'AF',
 'AF',
 'AS',
 'AF',
 'AS',
 'EU',
 'OC',
 'OC',
 'NA',
 'AF',
 'AF',
 'OC',
 'EU',
 'AS',
 'AS',
 

### Add column Continent to the dataframe countries_0429

In [19]:
countries_0429 = countries_0429.copy()
countries_0429["Continent"] = continents

countries_0429

Unnamed: 0,Entity,Code,Date,Total_confirmed_cases,Continent
54,Afghanistan,AFG,29-Apr-20,1827,AS
181,Albania,ALB,29-Apr-20,750,EU
240,Algeria,DZA,29-Apr-20,3649,AF
287,Andorra,AND,29-Apr-20,748,EU
326,Angola,AGO,29-Apr-20,27,AF
...,...,...,...,...,...
11739,Vietnam,VNM,29-Apr-20,270,AS
11860,World,OWID_WRL,29-Apr-20,3052370,invalid
12204,Yemen,YEM,29-Apr-20,1,AS
12246,Zambia,ZMB,29-Apr-20,95,AF
