In [380]:
##Import Key Libraries for use
import datetime as dt
import math
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn.metrics import explained_variance_score, mean_absolute_error, r2_score, mean_squared_error

In [381]:
#Import CSV
covid19Dataframe = pd.read_csv("https://raw.githubusercontent.com/WhipSnake23/Python-Class-Project/main/Data/owid-covid-data.csv")
#Filter Dataset to Canada
covid19Dataframe = covid19Dataframe[covid19Dataframe.iso_code.str.contains("CAN")]
#Remove Smoothed Columns from dataframe

covid19Dataframe = covid19Dataframe.drop(columns=['new_cases_smoothed','new_deaths_smoothed','new_cases_smoothed_per_million','new_deaths_smoothed_per_million','new_tests_smoothed','new_tests_smoothed_per_thousand','new_vaccinations_smoothed','new_vaccinations_smoothed_per_million'])
#Reset Index
covid19Dataframe.reset_index(drop=True, inplace=True)
#covid19Dataframe.dropna(inplace=True)
covid19Dataframe.dropna(subset=['total_deaths'],inplace=True)


canadaMarchCovidData = pd.read_csv("https://raw.githubusercontent.com/WhipSnake23/Python-Class-Project/main/Data/Canada-MarchData.csv")
canadaMarchCovidData = canadaMarchCovidData.drop(columns=['new_cases_smoothed','new_deaths_smoothed','new_cases_smoothed_per_million','new_deaths_smoothed_per_million','new_tests_smoothed','new_tests_smoothed_per_thousand','new_vaccinations_smoothed','new_vaccinations_smoothed_per_million'])
#Reset Index
canadaMarchCovidData.reset_index(drop=True, inplace=True)

In [382]:
##Functions
def printMetrics(test, predictions):
    print(f"Score: {explained_variance_score(test, predictions):.2f}")
    print(f"MAE: {mean_absolute_error(test, predictions):.2f}")
    print(f"RMSE: {math.sqrt(mean_squared_error(test, predictions)):.2f}")
    print(f"r2: {r2_score(test, predictions):.2f}")

def  calculateScore(F_Features,F_Target):
    X_train, X_test, y_train, y_test = train_test_split(F_Features, F_Target, test_size=0.25, random_state=1)
    reg = ElasticNet()
    reg.fit(X_train, y_train)
    predictions = reg.predict(X_test)
    printMetrics(y_test, predictions)

def  predictValue(F_Features,F_Target,predict_df):
    X_train, X_test, y_train, y_test = train_test_split(F_Features, F_Target, test_size=0.25, random_state=1)
    reg = ElasticNet()
    reg.fit(X_train, y_train)
    predictions = reg.predict(predict_df)
    return predictions

#### Set Target Column

In [383]:
targetColumn = "total_deaths"
reg = ElasticNet()

### Test 1
- New Cases
- Median Age
- Population Density
- Population
- Total Cases

In [384]:
featureColumns = ["new_cases", "median_age","population_density","population",'total_cases']

Features=covid19Dataframe[featureColumns]
Target=covid19Dataframe[targetColumn]

calculateScore(Features,Target)

Score: 0.89
MAE: 1548.75
RMSE: 1921.68
r2: 0.89


In [385]:
##Prediction Methods
featureColumns = ["new_cases", "median_age","population_density","population",'total_cases']
predictionDataFrame = canadaMarchCovidData[featureColumns]
predictedValues = predictValue(Features,Target,predictionDataFrame)
predictedValues = np.around(predictedValues)
predictedValuesFrame = predictionDataFrame.copy()
predictedValuesFrame['Predicted_Death'] = predictedValues
predictedValuesFrame


Unnamed: 0,new_cases,median_age,population_density,population,total_cases,Predicted_Death
0,3694,41.4,4.037,37742157,875388,23683.0
1,2663,41.4,4.037,37742157,878051,24062.0
2,2784,41.4,4.037,37742157,880835,24088.0
3,2980,41.4,4.037,37742157,883815,24096.0
4,2967,41.4,4.037,37742157,886782,24167.0
5,2769,41.4,4.037,37742157,889551,24292.0
6,2648,41.4,4.037,37742157,892199,24390.0
7,4048,41.4,4.037,37742157,896247,24051.0
8,2928,41.4,4.037,37742157,899175,24463.0
9,3059,41.4,4.037,37742157,902234,24492.0


### Test 2
- New Cases
- Median Age
- Population Density
- Population

In [386]:
##Missing Total Cases
featureColumns = ["new_cases", "median_age","population_density","population"]

Features=covid19Dataframe[featureColumns]
Target=covid19Dataframe[targetColumn]

calculateScore(Features,Target)

Score: 0.35
MAE: 3503.82
RMSE: 4831.17
r2: 0.30


### Test 3
- New Cases
- Median Age
- Population Density
- Population
- Hospital Patients

In [387]:
covid19Dataframe.dropna(subset=['hosp_patients'],inplace=True)
featureColumns = ["new_cases", "median_age","population_density","population","hosp_patients"]

Features=covid19Dataframe[featureColumns]
Target=covid19Dataframe[targetColumn]

calculateScore(Features,Target)

Score: 0.45
MAE: 2792.94
RMSE: 3672.82
r2: 0.45


### Test 4
- New Cases
- Median Age
- Population Density
- Population
- Total Cases
- Hospital Patients

In [388]:
featureColumns = ["new_cases", "median_age","population_density","population","hosp_patients","total_cases"]

Features=covid19Dataframe[featureColumns]
Target=covid19Dataframe[targetColumn]

calculateScore(Features,Target)


Score: 0.85
MAE: 1431.59
RMSE: 1941.16
r2: 0.85


### Test 5
- New Cases
- Median Age
- Population Density
- Population
- Total Cases
- Hospital Patients
- Positive Rate

In [389]:
covid19Dataframe.dropna(subset=['positive_rate'],inplace=True)
featureColumns = ["new_cases", "median_age","population_density","population","hosp_patients","total_cases","positive_rate"]

Features=covid19Dataframe[featureColumns]
Target=covid19Dataframe[targetColumn]

calculateScore(Features,Target)

Score: 0.85
MAE: 1197.78
RMSE: 1784.20
r2: 0.85


### Test 5
- New Cases
- Median Age
- Population Density
- Population
- Total Cases
- Hospital Patients
- Positive Rate

In [390]:
covid19Dataframe['positive_rate'] = covid19Dataframe['positive_rate'] * 100
featureColumns = ["new_cases", "median_age","population_density","population","hosp_patients","total_cases","positive_rate"]

Features=covid19Dataframe[featureColumns]
Target=covid19Dataframe[targetColumn]

calculateScore(Features,Target)

Score: 0.89
MAE: 923.87
RMSE: 1485.52
r2: 0.89


### Predicting using recent Covid Data in March

