Inpendent Variables:
    GDP per capita
    Human Development Index (HDI)
    Total Population
    Transparency Index

Dependent Variables:
    Total Cases per million
    Total Deaths per million
    People fully vaccinated per hundred
    
Other Data Columns:
    ISO Code
    Continent
    Location
    Date

In [7]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import scipy.stats as st
import numpy as np
from scipy.stats import linregress
from scipy.stats import pearsonr

import seaborn as sns; sns.set(style='white')


In [8]:
# Study data files
owidCovidData = "Resources/owid-covid-data.csv"
transparencyData = "Resources/Transparency Index.csv"

CovidData = pd.read_csv(owidCovidData)
Transparency = pd.read_csv(transparencyData)
# Transparency

In [17]:
# eliminate extra columns
CovidClean = CovidData.copy()
CovidClean.drop([ 'total_cases', 'new_cases', 'new_cases_smoothed', 'total_deaths', 'new_deaths', 'new_deaths_smoothed', 'new_cases_per_million', 
                 'new_cases_smoothed_per_million', 'new_deaths_per_million', 'new_deaths_smoothed_per_million', 'reproduction_rate', 'icu_patients', 
                 'icu_patients_per_million', 'hosp_patients', 'hosp_patients_per_million', 'weekly_icu_admissions', 'weekly_icu_admissions_per_million', 
                 'weekly_hosp_admissions', 'weekly_hosp_admissions_per_million', 'new_tests', 'total_tests', 'total_tests_per_thousand', 
                 'new_tests_per_thousand', 'new_tests_smoothed', 'new_tests_smoothed_per_thousand', 'positive_rate', 'tests_per_case', 'tests_units', 
                 'total_vaccinations', 'people_vaccinated', 'people_fully_vaccinated', 'new_vaccinations', 'new_vaccinations_smoothed', 
                 'total_vaccinations_per_hundred', 'people_vaccinated_per_hundred', 'new_vaccinations_smoothed_per_million', 'stringency_index',
                 'median_age', 'aged_65_older', 'aged_70_older', 'extreme_poverty', 'cardiovasc_death_rate',
                 'diabetes_prevalence', 'female_smokers', 'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand', 'life_expectancy'],
                 axis = 1, inplace = True)


# limit to max date
CovidClean = CovidClean[CovidClean['date'].isin(['2021-04-27'])]


# rename "Location" to "Country"
CovidClean.rename(columns={"location":"Country"}, inplace=True)

# rename everything else for ease
CovidClean.rename(columns={"total_cases_per_million":"Cases", "total_deaths_per_million": "Deaths", "people_fully_vaccinated_per_hundred":"Vaxxed", 
                           "population": "Population", "population_density": "Density","gdp_per_capita": "GDP", "human_development_index": "HDI"}, inplace=True)

# merge with Transparency (inner join)
CovidAll= pd.merge(CovidClean, Transparency, on=["Country"], how="inner")

CovidClean
CovidAll
CovidAll.to_csv("Resources/CovidWorkingData.csv", index = False)


In [16]:
CovidClean

Unnamed: 0,iso_code,continent,Country,date,Cases,Deaths,Vaxxed,Population,Density,GDP,HDI
428,AFG,Asia,Afghanistan,2021-04-27,1521.385,66.738,,3.892834e+07,54.422,1803.987,0.511
868,OWID_AFR,,Africa,2021-04-27,3373.910,90.104,0.36,1.340598e+09,,,
1296,ALB,Europe,Albania,2021-04-27,45429.147,828.063,,2.877800e+06,104.871,11803.431,0.795
1724,DZA,Africa,Algeria,2021-04-27,2767.186,73.544,,4.385104e+07,17.348,13913.839,0.748
2146,AND,Europe,Andorra,2021-04-27,169818.158,1604.866,,7.726500e+04,163.755,,0.868
...,...,...,...,...,...,...,...,...,...,...,...
82874,VNM,Asia,Vietnam,2021-04-27,29.351,0.360,,9.733858e+07,308.127,6171.884,0.704
83336,OWID_WRL,,World,2021-04-27,19078.988,402.186,3.19,7.794799e+09,58.045,15469.207,0.737
83719,YEM,Asia,Yemen,2021-04-27,208.543,40.468,,2.982597e+07,53.508,1479.147,0.470
84125,ZMB,Africa,Zambia,2021-04-27,4972.706,67.940,,1.838396e+07,22.995,3689.251,0.584


In [None]:
CovidContinent = CovidClean.copy()
# CovidClean = CovidClean[CovidClean['iso_code'].isin(["OWID_AFR"])]
# CovidClean = CovidClean.isna(CovidClean["Density"])
CovidContinent.isna()['continent']

Continent = CovidContinent.loc[CovidContinent.isna()['continent']]
Continent

In [None]:
CovidAll

In [13]:
# See if there is substantial difference looking at only the first half of the pandemic

CovidAug20 = CovidData.copy()
CovidAug20.drop([ 'total_cases', 'new_cases', 'new_cases_smoothed', 'total_deaths', 'new_deaths', 'new_deaths_smoothed', 'new_cases_per_million', 
                 'new_cases_smoothed_per_million', 'new_deaths_per_million', 'new_deaths_smoothed_per_million', 'reproduction_rate', 'icu_patients', 
                 'icu_patients_per_million', 'hosp_patients', 'hosp_patients_per_million', 'weekly_icu_admissions', 'weekly_icu_admissions_per_million', 
                 'weekly_hosp_admissions', 'weekly_hosp_admissions_per_million', 'new_tests', 'total_tests', 'total_tests_per_thousand', 
                 'new_tests_per_thousand', 'new_tests_smoothed', 'new_tests_smoothed_per_thousand', 'positive_rate', 'tests_per_case', 'tests_units', 
                 'total_vaccinations', 'people_vaccinated', 'people_fully_vaccinated', 'new_vaccinations', 'new_vaccinations_smoothed', 
                 'total_vaccinations_per_hundred', 'people_vaccinated_per_hundred', 'new_vaccinations_smoothed_per_million', 'stringency_index',
                 'median_age', 'aged_65_older', 'aged_70_older', 'extreme_poverty', 'cardiovasc_death_rate',
                 'diabetes_prevalence', 'female_smokers', 'male_smokers', 'handwashing_facilities', 'hospital_beds_per_thousand', 'life_expectancy'],
                 axis = 1, inplace = True)


# limit to max date
CovidAug20 = CovidAug20[CovidAug20['date'].isin(['2020-08-31'])]

# rename "Location" to "Country"
CovidAug20.rename(columns={"location":"Country"}, inplace=True)

# rename everything else for ease
CovidAug20.rename(columns={"total_cases_per_million":"Cases", "total_deaths_per_million": "Deaths", "people_fully_vaccinated_per_hundred":"Vaxxed", 
                           "population": "Population", "population_density": "Density","gdp_per_capita": "GDP", "human_development_index": "HDI"}, inplace=True)

# merge with Transparency (inner join)
CovidAll820= pd.merge(CovidAug20, Transparency, on=["Country"], how="inner")

# rename index to location

CovidAug20
CovidAll820
CovidAll820.to_csv("Resources/CovidWorkingData820.csv", index = False)

In [14]:
CovidAll820

Unnamed: 0,iso_code,continent,Country,date,Cases,Deaths,Vaxxed,Population,Density,GDP,HDI,Score,Rank
0,AFG,Asia,Afghanistan,2020-08-31,980.237,36.041,,38928341.0,54.422,1803.987,0.511,19.0,165.0
1,ALB,Europe,Albania,2020-08-31,3305.650,98.686,,2877800.0,104.871,11803.431,0.795,36.0,104.0
2,DZA,Africa,Algeria,2020-08-31,1014.662,34.435,,43851043.0,17.348,13913.839,0.748,36.0,104.0
3,AGO,Africa,Angola,2020-08-31,80.751,3.286,,32866268.0,23.890,5819.495,0.581,27.0,142.0
4,ARG,South America,Argentina,2020-08-31,9242.788,191.611,,45195777.0,16.177,18933.907,0.845,42.0,78.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
168,VEN,South America,Venezuela,2020-08-31,1643.272,13.574,,28435943.0,36.253,16745.022,0.711,15.0,176.0
169,VNM,Asia,Vietnam,2020-08-31,10.725,0.349,,97338583.0,308.127,6171.884,0.704,36.0,104.0
170,YEM,Asia,Yemen,2020-08-31,65.647,18.977,,29825968.0,53.508,1479.147,0.470,15.0,176.0
171,ZMB,Africa,Zambia,2020-08-31,658.019,15.666,,18383956.0,22.995,3689.251,0.584,33.0,117.0


In [None]:
# Pair Plots as of 4/27/2021

# Use Seaborn to create a PairGrid
def corrfunc(x, y, ax=None, **kws):
    """Plot the correlation coefficient in the top left hand corner of a plot."""
    r, _ = pearsonr(x, y)
    ax = ax or plt.gca()
    ax.annotate(f'coor = {r:.2f}', xy=(.1, .9), xycoords=ax.transAxes)

g = sns.pairplot(CovidAll, corner=True)  # Set corner=True to plot only the lower triangle:
g.map_lower(corrfunc)
plt.show()

In [None]:
# Pair Plots as of 8/31/2020

# Use Seaborn to create a PairGrid
def corrfunc(x, y, ax=None, **kws):
    """Plot the correlation coefficient in the top left hand corner of a plot."""
    r, _ = pearsonr(x, y)
    ax = ax or plt.gca()
    ax.annotate(f'coor = {r:.2f}', xy=(.1, .9), xycoords=ax.transAxes)

g = sns.pairplot(CovidAll820, corner=True)  # Set corner=True to plot only the lower triangle:
g.map_lower(corrfunc)
plt.show()

In [None]:
# Pair Plots by continent
# Use Seaborn to create a PairGrid
def corrfunc(x, y, ax=None, **kws):
    """Plot the correlation coefficient in the top left hand corner of a plot."""
    r, _ = pearsonr(x, y)
    ax = ax or plt.gca()
    ax.annotate(f'coor = {r:.2f}', xy=(.1, .9), xycoords=ax.transAxes)

g = sns.pairplot(Continent, corner=True)  # Set corner=True to plot only the lower triangle:
g.map_lower(corrfunc)
plt.show()

### Playing with Maps

In [4]:
MapsFrame = CovidClean.copy()
MapsFrame

Unnamed: 0,iso_code,continent,Country,date,Cases,Deaths,Vaxxed,Population,Density,GDP,HDI
428,AFG,Asia,Afghanistan,2021-04-27,1521.385,66.738,,3.892834e+07,54.422,1803.987,0.511
868,OWID_AFR,,Africa,2021-04-27,3373.910,90.104,0.36,1.340598e+09,,,
1296,ALB,Europe,Albania,2021-04-27,45429.147,828.063,,2.877800e+06,104.871,11803.431,0.795
1724,DZA,Africa,Algeria,2021-04-27,2767.186,73.544,,4.385104e+07,17.348,13913.839,0.748
2146,AND,Europe,Andorra,2021-04-27,169818.158,1604.866,,7.726500e+04,163.755,,0.868
...,...,...,...,...,...,...,...,...,...,...,...
82874,VNM,Asia,Vietnam,2021-04-27,29.351,0.360,,9.733858e+07,308.127,6171.884,0.704
83336,OWID_WRL,,World,2021-04-27,19078.988,402.186,3.19,7.794799e+09,58.045,15469.207,0.737
83719,YEM,Asia,Yemen,2021-04-27,208.543,40.468,,2.982597e+07,53.508,1479.147,0.470
84125,ZMB,Africa,Zambia,2021-04-27,4972.706,67.940,,1.838396e+07,22.995,3689.251,0.584
