## Part 1. Compairing the number of covid confirmed cases across 5 continents

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
covid_confirmed_cases = pd.read_csv("total-cases-covid-19.csv")
covid_confirmed_cases

Unnamed: 0,Entity,Code,Date,Total confirmed cases,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8
0,Afghanistan,AFG,25-Feb-20,1,,,,,
1,Afghanistan,AFG,26-Feb-20,1,,,,,
2,Afghanistan,AFG,27-Feb-20,1,,,,,
3,Afghanistan,AFG,28-Feb-20,1,,,,,
4,Afghanistan,AFG,29-Feb-20,1,,,,,
5,Afghanistan,AFG,1-Mar-20,1,,,,,
6,Afghanistan,AFG,2-Mar-20,1,,,,,
7,Afghanistan,AFG,8-Mar-20,4,,,,,
8,Afghanistan,AFG,11-Mar-20,7,,,,,
9,Afghanistan,AFG,15-Mar-20,10,,,,,


### Change column name "Total confirmed cases" to "Total_confirmed_cases"

In [3]:
covid_confirmed_cases = covid_confirmed_cases.rename(columns = {'Total confirmed cases ': 'Total_confirmed_cases'})
covid_confirmed_cases.head()

Unnamed: 0,Entity,Code,Date,Total_confirmed_cases,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8
0,Afghanistan,AFG,25-Feb-20,1,,,,,
1,Afghanistan,AFG,26-Feb-20,1,,,,,
2,Afghanistan,AFG,27-Feb-20,1,,,,,
3,Afghanistan,AFG,28-Feb-20,1,,,,,
4,Afghanistan,AFG,29-Feb-20,1,,,,,


### Remove unwanted columns

In [4]:
covid_confirmed_cases.columns

Index(['Entity', 'Code', 'Date', 'Total_confirmed_cases', 'Unnamed: 4',
       'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8'],
      dtype='object')

In [5]:
covid_confirmed_cases = covid_confirmed_cases[['Entity', 'Code', 'Date', 'Total_confirmed_cases']]
covid_confirmed_cases.head()

Unnamed: 0,Entity,Code,Date,Total_confirmed_cases
0,Afghanistan,AFG,25-Feb-20,1
1,Afghanistan,AFG,26-Feb-20,1
2,Afghanistan,AFG,27-Feb-20,1
3,Afghanistan,AFG,28-Feb-20,1
4,Afghanistan,AFG,29-Feb-20,1


### Create a separeate dataframe of confirmed cases of six continents

In [6]:
continents = ["Asia", "Europe", "North America", "South America", "Oceania", "Africa"]
covid_continents= pd.DataFrame()

for continent in continents:
    covid_continents = covid_continents.append(covid_confirmed_cases.loc[covid_confirmed_cases.Entity == continent])

covid_continents

Unnamed: 0,Entity,Code,Date,Total_confirmed_cases
545,Asia,,31-Dec-19,27
546,Asia,,1-Jan-20,27
547,Asia,,2-Jan-20,27
548,Asia,,3-Jan-20,44
549,Asia,,4-Jan-20,44
550,Asia,,5-Jan-20,59
551,Asia,,6-Jan-20,59
552,Asia,,7-Jan-20,59
553,Asia,,8-Jan-20,59
554,Asia,,9-Jan-20,59


- display a plot with 5 legends(continents) showing Date vs. Total_confirmed_cases  
- claculate the log (rate of increase) for each line


## Part 2. Comparing across top 3 continents with the highest total confirmed cases

### Remove rows whose Code is NaN (non countries)

In [7]:
covid_confirmed_cases.Code.unique()

array(['AFG', nan, 'ALB', 'DZA', 'AND', 'AGO', 'AIA', 'ATG', 'ARG', 'ARM',
       'ABW', 'AUS', 'AUT', 'AZE', 'BHS', 'BHR', 'BGD', 'BRB', 'BLR',
       'BEL', 'BLZ', 'BEN', 'BMU', 'BTN', 'BOL', 'BIH', 'BWA', 'BRA',
       'VGB', 'BRN', 'BGR', 'BFA', 'BDI', 'KHM', 'CMR', 'CAN', 'CPV',
       'CYM', 'CAF', 'TCD', 'CHL', 'CHN', 'COL', 'COG', 'CRI', 'CIV',
       'HRV', 'CUB', 'CUW', 'CYP', 'CZE', 'COD', 'DNK', 'DJI', 'DMA',
       'DOM', 'ECU', 'EGY', 'SLV', 'GNQ', 'ERI', 'EST', 'ETH', 'FRO',
       'FLK', 'FJI', 'FIN', 'FRA', 'PYF', 'GAB', 'GMB', 'GEO', 'DEU',
       'GHA', 'GIB', 'GRC', 'GRL', 'GRD', 'GUM', 'GTM', 'GGY', 'GIN',
       'GNB', 'GUY', 'HTI', 'HND', 'HUN', 'ISL', 'IND', 'IDN', 'IRN',
       'IRQ', 'IRL', 'IMN', 'ISR', 'ITA', 'JAM', 'JPN', 'JEY', 'JOR',
       'KAZ', 'KEN', 'OWID_KOS', 'KWT', 'KGZ', 'LAO', 'LVA', 'LBN', 'LBR',
       'LBY', 'LIE', 'LTU', 'LUX', 'MKD', 'MDG', 'MWI', 'MYS', 'MDV',
       'MLI', 'MLT', 'MRT', 'MUS', 'MEX', 'MDA', 'MCO', 'MNG', 'MNE',
       'MS

In [8]:
bool_crit = (pd.isnull(covid_confirmed_cases.Code))
print("Number of NaN entries identified: ", sum(bool_crit))
covid_confirmed_cases[bool_crit]

Number of NaN entries identified:  1528


Unnamed: 0,Entity,Code,Date,Total_confirmed_cases
55,Africa,,15-Feb-20,1
56,Africa,,16-Feb-20,1
57,Africa,,17-Feb-20,1
58,Africa,,18-Feb-20,1
59,Africa,,19-Feb-20,1
60,Africa,,20-Feb-20,1
61,Africa,,21-Feb-20,1
62,Africa,,22-Feb-20,1
63,Africa,,23-Feb-20,1
64,Africa,,24-Feb-20,1


In [9]:
covid_confirmed_cases_countries = covid_confirmed_cases[~bool_crit]
covid_confirmed_cases_countries

Unnamed: 0,Entity,Code,Date,Total_confirmed_cases
0,Afghanistan,AFG,25-Feb-20,1
1,Afghanistan,AFG,26-Feb-20,1
2,Afghanistan,AFG,27-Feb-20,1
3,Afghanistan,AFG,28-Feb-20,1
4,Afghanistan,AFG,29-Feb-20,1
5,Afghanistan,AFG,1-Mar-20,1
6,Afghanistan,AFG,2-Mar-20,1
7,Afghanistan,AFG,8-Mar-20,4
8,Afghanistan,AFG,11-Mar-20,7
9,Afghanistan,AFG,15-Mar-20,10


### Select data entries on 29-Apr-20

In [10]:
countries_0429 = covid_confirmed_cases_countries.loc[covid_confirmed_cases_countries.Date == "29-Apr-20"]
countries_0429

Unnamed: 0,Entity,Code,Date,Total_confirmed_cases
54,Afghanistan,AFG,29-Apr-20,1827
181,Albania,ALB,29-Apr-20,750
240,Algeria,DZA,29-Apr-20,3649
287,Andorra,AND,29-Apr-20,748
326,Angola,AGO,29-Apr-20,27
360,Anguilla,AIA,29-Apr-20,3
401,Antigua and Barbuda,ATG,29-Apr-20,24
455,Argentina,ARG,29-Apr-20,4114
506,Armenia,ARM,29-Apr-20,1932
544,Aruba,ABW,29-Apr-20,100


### Create a dictionary of all countries in the world

In [14]:
pip install pycountry-convert


The following command must be run outside of the IPython shell:

    $ pip install pycountry-convert

The Python package manager (pip) can only be used from outside of IPython.
Please reissue the `pip` command in a separate terminal or command prompt.

See the Python documentation for more information on how to install packages:

    https://docs.python.org/3/installing/


In [12]:

import pycountry
countries = list(pycountry.countries)
countries_alpha2 = []
countries_alpha3 = []

for i in range(len(countries)):
    countries_alpha2.append(countries[i].alpha_2)
    countries_alpha3.append(countries[i].alpha_3)

countries_alpha3
countries_alpha2
countries

ModuleNotFoundError: No module named 'pycountry'

### Convert country_alpha3 to continent code

In [None]:
from pycountry_convert import country_alpha2_to_continent_code, country_alpha3_to_country_alpha2
continents = []

for i in range(len(countries_0429.index)):
    country_alpha3 = countries_0429.Code.iloc[i]
    if country_alpha3 not in countries_alpha3:
        continents.append("invalid")
        print(country_alpha3)
    else:
        country_alpha2 = country_alpha3_to_country_alpha2(country_alpha3)
        continents.append(country_alpha2_to_continent_code(country_alpha2) )

continents

### Add column Continent to the dataframe countries_0429

In [None]:
countries_0429 = countries_0429.copy()
countries_0429["Continent"] = continents

countries_0429

## Age Demographics - all countries

### Median Age by countries

In [None]:
median_age= pd.read_csv('median-age.csv')

In [None]:
median_age.rename(columns={'UN Population Division (Median Age) (2017) (years)':'median age'}, inplace=True)

In [None]:
median_age

In [None]:
median_age_2020 = median_age[median_age['Year'] == 2020]

In [None]:
median_age_2020

#### drop rows that contain a particular string 

In [None]:
median_age_2020.Code.unique()

In [None]:
median_age_2020 = median_age_2020[median_age_2020['Code'] != 'OWID_WRL']
median_age_2020 = median_age_2020[median_age_2020['Code'] != 'OWID_MNS']
median_age_2020 = median_age_2020[median_age_2020['Code'] != 'OWID_PYA']
median_age_2020 = median_age_2020[median_age_2020['Code'] != 'OWID_CIS']

In [None]:
median_age_2020.Code.unique()

In [None]:
bool_crit_2 = (pd.isnull(median_age_2020.Code))
print('Number of NaN entries identified:', sum(bool_crit_2))
median_age_2020[bool_crit_2]

In [None]:
median_age_2020_countries = median_age_2020[~bool_crit_2]
median_age_2020_countries

### Age Structure/breakdown by countries

In [None]:
age_breakdown = pd.read_csv('population-by-broad-age-group.csv')
age_breakdown

In [None]:
age_breakdown.Year.unique()

In [None]:
age_breakdown_2015 = age_breakdown[age_breakdown['Year'] == 2015]
age_breakdown_2015

In [None]:
age_breakdown_2015.Code.unique()

In [None]:
age_breakdown_2015 = age_breakdown_2015[age_breakdown_2015['Code'] != 'OWID_WRL']
age_breakdown_2015 = age_breakdown_2015[age_breakdown_2015['Code'] != 'OWID_MNS']
age_breakdown_2015 = age_breakdown_2015[age_breakdown_2015['Code'] != 'OWID_PYA']
age_breakdown_2015 = age_breakdown_2015[age_breakdown_2015['Code'] != 'OWID_CIS']
age_breakdown_2015.Code.unique()

In [None]:
bool_crit_3 = (pd.isnull(age_breakdown_2015.Code))
print('Number of NaN entries identified:', sum(bool_crit_3))
age_breakdown_2015[bool_crit_3]

In [None]:
age_breakdown_2015_countries = age_breakdown_2015[~bool_crit_3]

In [None]:
age_breakdown_2015_countries.Code.unique()

In [None]:
age_breakdown_2015_countries

### Reorder columns

In [None]:
age_breakdown_2015_countries = age_breakdown_2015_countries[['Entity','Code','Year','Under-5s','5-14 years','15-24 years','25-64 years','65+ years']]

In [None]:
age_breakdown_2015_countries

In [None]:
#age_data = age_breakdown_2015_countries.merge(median_age_2020_countries, how = 'inner', on = ['Entity'])

In [None]:
#age_data

## Economic Indicators - all countries

### GDP per capita, PPP

In [None]:
GDP = pd.read_csv('API_NY.GDP.PCAP.PP.CD_DS2_en_csv_v2_988619.csv', skiprows = 1, header = 1)
GDP

In [None]:
GDP_2018 = GDP[['Country Name','Country Code','2018']]
GDP_2018

In [None]:
bool_crit_4 = (pd.isnull(GDP_2018['2018']))
print('Number of NaN entries identified:', sum(bool_crit_4))
GDP_2018[bool_crit_4]

In [None]:
GDP_2018_countries = GDP_2018[~bool_crit_4]
GDP_2018_countries

### unemployment rate

In [46]:
unemployment = pd.read_csv('unemployment.csv',skiprows = 1, header = 1 )
unemployment.head()

Unnamed: 0,Country Name,Country Code,Indicator Name,Indicator Code,1960,1961,1962,1963,1964,1965,...,2011,2012,2013,2014,2015,2016,2017,2018,2019,Unnamed: 64
0,Aruba,ABW,"Unemployment, total (% of total labor force) (...",SL.UEM.TOTL.ZS,,,,,,,...,,,,,,,,,,
1,Afghanistan,AFG,"Unemployment, total (% of total labor force) (...",SL.UEM.TOTL.ZS,,,,,,,...,11.488,11.508,11.534,11.448,11.387,11.313,11.184,11.057,11.118,
2,Angola,AGO,"Unemployment, total (% of total labor force) (...",SL.UEM.TOTL.ZS,,,,,,,...,7.362,7.379,7.4,7.331,7.282,7.223,7.119,7.019,6.886,
3,Albania,ALB,"Unemployment, total (% of total labor force) (...",SL.UEM.TOTL.ZS,,,,,,,...,13.481,13.376,15.866,17.49,17.08,15.22,13.75,12.34,12.331,
4,Andorra,AND,"Unemployment, total (% of total labor force) (...",SL.UEM.TOTL.ZS,,,,,,,...,,,,,,,,,,


In [47]:
unemployment_rate = unemployment[['Country Name', 'Country Code','2019']] 
unemployment_rate.head()

Unnamed: 0,Country Name,Country Code,2019
0,Aruba,ABW,
1,Afghanistan,AFG,11.118
2,Angola,AGO,6.886
3,Albania,ALB,12.331
4,Andorra,AND,


In [49]:
bool_crit_5 = (pd.isnull(unemployment_rate['2019']))
print('Number of NaN entries identified:', sum(bool_crit_5))
unemployment_rate[bool_crit_5].head()

Number of NaN entries identified: 31


Unnamed: 0,Country Name,Country Code,2019
0,Aruba,ABW,
4,Andorra,AND,
9,American Samoa,ASM,
10,Antigua and Barbuda,ATG,
25,Bermuda,BMU,


In [50]:
unemployment_rate_2019_countries = unemployment_rate[~bool_crit_5]
unemployment_rate_2019_countries.head()

Unnamed: 0,Country Name,Country Code,2019
1,Afghanistan,AFG,11.118
2,Angola,AGO,6.886
3,Albania,ALB,12.331
5,Arab World,ARB,10.337095
6,United Arab Emirates,ARE,2.348


## Health System Indicators

In [94]:
def only2017(data):
    data=data[['Country Name','2017']]
    nanrow=pd.isnull(data['2017'])
    return data[~nanrow]

def only2015(data):
    data=data[['Country Name','2015']]
    nanrow=pd.isnull(data['2015'])
    return data[~nanrow]
                    
            

### Hospital beds

In [97]:
#hospitalbeds=pd.read_csv('hospitalbeds.csv')
#hospitalbeds_2017=only2017(hospitalbeds)
#hospitalbeds_2017.head()



In [95]:
hospitalbed2=pd.read_csv('hospitalbed2.csv',skiprows = 1, header = 1)
hospitalbed2=only2015(hospitalbed2)
hospitalbed2.head()


Unnamed: 0,Country Name,2015
1,Afghanistan,0.5
8,Armenia,4.2
18,Bangladesh,0.8
29,Brunei Darussalam,2.7
46,Costa Rica,1.2


### Health expenditure per capita

In [98]:
health_expenditure_percap=pd.read_csv('health_exp_cap.csv',skiprows = 1, header = 1)
health_expenditure_percap_2017=only2017(health_expenditure_percap)
health_expenditure_percap_2017.head()

Unnamed: 0,Country Name,2017
1,Afghanistan,67.12265
2,Angola,114.459641
4,Andorra,4040.786621
6,United Arab Emirates,1357.017456
7,Argentina,1324.603516


### Health expenditure out of pocket per capita

In [99]:
out_pocket=pd.read_csv('outofpocket.csv',skiprows = 1, header = 1)
out_pocket=only2017(out_pocket)
out_pocket.head()

Unnamed: 0,Country Name,2017
1,Afghanistan,75.482577
2,Angola,34.121013
4,Andorra,41.777053
6,United Arab Emirates,18.867441
7,Argentina,15.018527


### Physicians per 1000 people

In [83]:
physicians= pd.read_csv('physicians.csv',skiprows = 1, header = 1)
physicians_2017=only2017(physicians)
physicians_2017.head()

Unnamed: 0,Country Name,2017
2,Angola,0.2149
7,Argentina,3.96
10,Antigua and Barbuda,2.7647
18,Bangladesh,0.5268
21,"Bahamas, The",1.9373


### People with basic handwashing facilities including soap and water (% of population)

In [104]:
handsoap=pd.read_csv('handsoap%.csv',skiprows = 1, header = 1)
handsoap_2017=only2017(handsoap)
handsoap_2017.head()


Unnamed: 0,Country Name,2017
1,Afghanistan,37.746032
2,Angola,26.664183
5,Arab World,70.171735
8,Armenia,94.042941
13,Azerbaijan,83.2413


### above 65 population %

In [102]:
above65=pd.read_csv('above65.csv',skiprows = 1, header = 1)
above65_2017=only2017(above65)
above65_2017.head()


Unnamed: 0,Country Name,2017
0,Aruba,13.073477
1,Afghanistan,2.554251
2,Angola,2.239197
3,Albania,13.329073
5,Arab World,4.46156


In [None]:
#keys=health_expenditure_percap['Country Name']
#define 
  #for i in keys:
       #data.loc[i]=