<a href="https://colab.research.google.com/github/henry8695/Henry-Woodward/blob/master/Coronavirus_16_03_2020.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setting up Environment

In [0]:
#Installing packages
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor

# Importing Files

In [1]:
#Importing Confirmed cases file
from google.colab import files
upload = files.upload()

Saving confirmed.csv to confirmed.csv


In [2]:
#Importing Deaths file
from google.colab import files
upload = files.upload()

Saving deaths.csv to deaths.csv


In [3]:
#Importing Population file
from google.colab import files
upload = files.upload()

Saving API_SP.POP.TOTL_DS2_en_csv_v2_821007.csv to API_SP.POP.TOTL_DS2_en_csv_v2_821007.csv


In [4]:
#Importing Population Density file
from google.colab import files
upload = files.upload()

Saving population density.csv to population density.csv


In [5]:
#Importing Urban Population file
from google.colab import files
upload = files.upload()

Saving API_SP.URB.TOTL.IN.ZS_DS2_en_csv_v2_821361.csv to API_SP.URB.TOTL.IN.ZS_DS2_en_csv_v2_821361.csv


In [87]:
#Importing Temperature file
from google.colab import files
upload = files.upload()

Saving temperature.csv to temperature.csv


# Setting up Dataframes

In [0]:
#Setting up dataframe
covid_cases = pd.read_csv("confirmed.csv")
covid_deaths = pd.read_csv("deaths.csv")
population = pd.read_csv("API_SP.POP.TOTL_DS2_en_csv_v2_821007.csv")
density = pd.read_csv("population density.csv")
urban = pd.read_csv("API_SP.URB.TOTL.IN.ZS_DS2_en_csv_v2_821361.csv")

# Pre-processing Covid-19 Cases for Original Model

In [0]:
covid_cases.head()

In [0]:
#Subsetting for minimum cases
covid_cases=covid_cases[(covid_cases["03/04/2020"]>30)]
covid_cases

In [0]:
#Renaming variables
covid_cases.rename(columns={"Country/Region":"Country"}, inplace=True)
covid_cases

In [0]:
#Taking out China and US
covid_cases = covid_cases[covid_cases.Country != "Mainland China"]
covid_cases = covid_cases[covid_cases.Country != "US"]
covid_cases = covid_cases[covid_cases.Country != "Others"]
covid_cases = covid_cases[covid_cases.Country != "Taipei and environs"]
covid_cases = covid_cases[covid_cases.Country != "China"]
covid_cases = covid_cases[covid_cases.Country != "Cruise Ship"]
covid_cases = covid_cases[covid_cases.Country != "Taiwan*"]
covid_cases

In [0]:
covid_cases.columns

In [0]:
#Taking out unnecessary variables
covid_cases.columns
covid_cases=covid_cases.drop(['Province/State','Lat', 'Long', '1/22/20', '1/23/20','1/24/20', '1/25/20', '1/26/20', '1/27/20', '1/28/20', '1/29/20','1/30/20', '1/31/20', '02/01/2020', '02/02/2020', '02/03/2020','02/04/2020', '02/05/2020', '02/06/2020', '02/07/2020', '02/08/2020','02/09/2020', '02/10/2020', '02/11/2020', '02/12/2020', '2/13/20','2/14/20', '2/15/20', '2/16/20', '2/17/20', '2/18/20', '2/19/20','2/20/20', '2/21/20', '2/22/20', '2/23/20', '2/24/20', '2/25/20','2/26/20', '2/27/20', '2/28/20', '2/29/20', '03/01/2020', '03/02/2020','03/03/2020'], axis=1)
covid_cases

In [0]:
#Creating growth rate variable
covid_cases['Growth Rate']=covid_cases['03/11/2020']/covid_cases['03/04/2020']
covid_cases

In [0]:

covid_cases["Country"].unique()

# Pre-processing Population Density Data

In [0]:
#Inspecting population density dataframe 
density

In [0]:
#Subsetting relevant variables
density=density.loc[:,["Country Name","2018"]]
density

In [0]:
#Changing variable names
density.columns=["Country","Population Density"]
density

In [0]:
#Inspecting country names to make sure they match covid_cases
density["Country"].unique()

In [0]:
#Countries in covid_cases: 'Thailand', 'Japan', 'Singapore', 'Malaysia', 'Germany', 'Italy','Sweden', 'Spain', 'Iraq', 'Bahrain', 'Kuwait', 'Switzerland','Norway', 'Netherlands', 'Iran', 'Korea, South', 'France','Taiwan*', 'United Kingdom'
#NEED HELP RATHER THAN MANUALLY CHECKING
#Names needing to change to from density: 'Iran, Islamic Rep.' to 'Iran'; 'Korea, Rep.' to 'Korea, South'; 'UK' to 'United Kingdom'

In [0]:
density.loc[density["Country"] == "UK", "Country"] = "United Kingdom"
density.loc[density["Country"] == "Iran, Islamic Rep.", "Country"] = "Iran"
density.loc[density["Country"] == "Korea, Rep.", "Country"] = "Korea, South"
density["Country"].unique()

In [0]:
#Removing Outliers
density=density[density["Population Density"] < 1000]
density

In [0]:
#Merging with covid_cases
left=covid_cases
right=density
covid_cases=pd.merge(left, right, on="Country")
covid_cases

# Pre-processing Population Data

In [0]:
#Inspecting population dataframe 
population

In [0]:
#Subsetting relevant variables
population=population.loc[:,["Country Name","2018"]]

In [0]:
#Changing variable names
population.columns=["Country","Population"]

In [0]:
#Changing country names
population.loc[population["Country"] == "UK", "Country"] = "United Kingdom"
population.loc[population["Country"] == "Iran, Islamic Rep.", "Country"] = "Iran"
population.loc[population["Country"] == "Korea, Rep.", "Country"] = "Korea, South"
population["Country"].unique()

In [0]:
#Merging with covid_cases
left=covid_cases
right=population
covid_cases=pd.merge(left, right, on="Country")
covid_cases

# Pre-processing Urban Population Data

In [0]:
#Inspecting population dataframe 
urban

In [0]:
#Subsetting relevant variables
urban=urban.loc[:,["Country Name","2018"]]

In [0]:
#Changing variable names
urban.columns=["Country","Urban"]

In [0]:
#Changing country names
urban.loc[urban["Country"] == "UK", "Country"] = "United Kingdom"
urban.loc[urban["Country"] == "Iran, Islamic Rep.", "Country"] = "Iran"
urban.loc[urban["Country"] == "Korea, Rep.", "Country"] = "Korea, South"
urban["Country"].unique()

In [0]:
#Merging with covid_cases
left=covid_cases
right=urban
covid_cases=pd.merge(left, right, on="Country")
covid_cases

# Creating Temperature Dataframe

In [0]:
covid_cases

In [250]:
covid_cases["Country"].unique()

array(['Thailand', 'Japan', 'Malaysia', 'Germany', 'Italy', 'Sweden',
       'Spain', 'Iraq', 'Kuwait', 'Switzerland', 'Norway', 'Netherlands',
       'Iran', 'Korea, South', 'France', 'United Kingdom'], dtype=object)

In [0]:
d = {'Country': ['Thailand', 'Japan', 'Malaysia', 'Germany', 'Italy', 'Sweden','Spain', 'Iraq', 'Kuwait', 'Switzerland', 'Norway', 'Netherlands','Iran', 'Korea, South', 'France', 'United Kingdom'], 'Temperature': [301, 282, 300, 275, 282, 270, 280, 286, 288, 275, 274, 276, 275, 272, 277, 278]}
temperature = pd.DataFrame(data=d)
temperature

In [0]:
#Merging with covid_cases
left=covid_cases
right=temperature
covid_cases=pd.merge(left, right, on="Country")
covid_cases

# Country Comparison Graph

In [0]:
graph = covid_cases.T
graph.columns=graph.iloc[0]
graph=graph[1:-5]
graph.plot()

# Government Performance

In [0]:
#Pre-processing deaths dataset
covid_deaths.rename(columns={"Country/Region":"Country"}, inplace=True)
left=covid_cases["Country"]
right=covid_deaths
cases_deaths=pd.merge(left, right, on="Country")
cases_deaths=cases_deaths[(cases_deaths["3/14/20"]>20)]
cases_deaths=cases_deaths.drop(['Province/State','Lat', 'Long', '1/22/20', '1/23/20','1/24/20', '1/25/20', '1/26/20', '1/27/20', '1/28/20', '1/29/20','1/30/20', '1/31/20', '02/01/2020', '02/02/2020', '02/03/2020','02/04/2020', '02/05/2020', '02/06/2020', '02/07/2020', '02/08/2020','02/09/2020', '02/10/2020', '02/11/2020', '02/12/2020', '2/13/20','2/14/20', '2/15/20', '2/16/20', '2/17/20', '2/18/20', '2/19/20','2/20/20', '2/21/20', '2/22/20', '2/23/20', '2/24/20', '2/25/20','2/26/20', '2/27/20', '2/28/20', '2/29/20', '03/01/2020', '03/02/2020','03/03/2020'], axis=1)
cases_deaths

In [0]:
#Calculating Death Rate
covid_cases["Death Rate"]=cases_deaths["3/14/20"]/covid_cases["3/14/20"]
covid_cases["Death Rate"]

In [0]:
#Predicting case numer
covid_cases["Predicted Cases"]=covid_cases["03/11/2020"]*covid_cases["Growth Rate"]**(3/7)

In [0]:
Gov_impact = (covid_cases["Country"], covid_cases["3/14/20"], covid_cases["Predicted Cases"], covid_cases["Death Rate"])
Gov_impact = pd.DataFrame(data=Gov_impact)
Gov_impact = Gov_impact.T
Gov_impact

In [0]:
#Calculating proxy for government impact
Gov_impact["Government Impact"]=Gov_impact["3/14/20"]-Gov_impact["Predicted Cases"]
Gov_impact["Government Impact"]

In [0]:
#Visualising Government Impact on Confirmed Cases
x=Gov_impact["Country"]
y=Gov_impact["Government Impact"]/Gov_impact["3/14/20"]
plt.bar(x,y)
plt.xlabel('Country', fontsize=10)
plt.ylabel('Government Impact', fontsize=10)
plt.xticks(x, fontsize=10, rotation=45)
plt.title('Government Impact on Confirmed Cases')

In [0]:
#Visualising Government Impact on Lives
x=Gov_impact["Country"]
y=Gov_impact["Government Impact"]*Gov_impact["Death Rate"]/Gov_impact['3/14/20']*1000
plt.bar(x,y)
plt.xlabel('Country', fontsize=10)
plt.ylabel('Government Impact', fontsize=10)
plt.xticks(x, fontsize=10, rotation=45)
plt.title('Government Impact on Lives Saved')