## Linear Regression Analysis of Covid 19 data for Canada, Italy, India, and the United States
#### By: Ian

### Import needed Libraries

In [68]:
##Import Key Libraries for use
import datetime as dt
import math
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import ElasticNet
from sklearn.metrics import explained_variance_score, mean_absolute_error, r2_score, mean_squared_error

### Import and Scrub Data from Github

In [69]:
#Import CSV
covid19Dataframe = pd.read_csv("https://raw.githubusercontent.com/WhipSnake23/Python-Class-Project/main/Data/owid-covid-data.csv")
#Remove Smoothed Columns from dataframe
covid19Dataframe = covid19Dataframe[covid19Dataframe.iso_code.str.contains("CAN")]
covid19Dataframe = covid19Dataframe.drop(columns=['new_cases_smoothed','new_deaths_smoothed','new_cases_smoothed_per_million','new_deaths_smoothed_per_million','new_tests_smoothed','new_tests_smoothed_per_thousand','new_vaccinations_smoothed','new_vaccinations_smoothed_per_million'])

#Reset Index
covid19Dataframe.reset_index(drop=True, inplace=True)
#Drop rows where Total Deaths is blank. This is required for the ML
covid19Dataframe.dropna(subset=['total_cases_per_million'],inplace=True)
covid19Dataframe.dropna(subset=['total_deaths'],inplace=True)
covid19Dataframe.dropna(subset=['hosp_patients'],inplace=True)

#Import Comparison Data set containing data from March. The data set we are using stops in February
#canadaMarchCovidData = pd.read_csv("https://raw.githubusercontent.com/WhipSnake23/Python-Class-Project/main/Data/Canada-MarchData.csv")
#canadaMarchCovidData = canadaMarchCovidData.drop(columns=['new_cases_smoothed','new_deaths_smoothed','new_cases_smoothed_per_million','new_deaths_smoothed_per_million','new_tests_smoothed','new_tests_smoothed_per_thousand','new_vaccinations_smoothed','new_vaccinations_smoothed_per_million'])
#Reset Index
#canadaMarchCovidData.reset_index(drop=True, inplace=True)
#Drop rows where hosp_patients hasnt been reported
#canadaMarchCovidData.dropna(subset=['hosp_patients'],inplace=True)

### Define Notebook Functions

In [70]:
#These functions make it easier to test configuration changes and features easier

#Prints the Scores from the run of the algorithum against the data
def printMetrics(test, predictions):
    print(f"Score: {explained_variance_score(test, predictions):.2f}")
    print(f"MAE: {mean_absolute_error(test, predictions):.2f}")
    print(f"RMSE: {math.sqrt(mean_squared_error(test, predictions)):.2f}")
    print(f"r2: {r2_score(test, predictions):.2f}")

#Function to run the algorithum and call metric function
def RunModel(f_Features,f_Target,f_execution):
    x_train, x_test, y_train, y_test = train_test_split(f_Features, f_Target, test_size=0.25, random_state=1)
    model = ElasticNet(random_state=1)
    model.fit(x_train, y_train)
    if f_execution == "score":
        predictions = model.predict(x_test)
        printMetrics(y_test, predictions)
    elif f_execution == "predict":
        print("Hello")

In [71]:
targetColumn = "total_cases_per_million"

In [72]:
featureColumns = ["new_cases","median_age","population_density","population","total_deaths", "hosp_patients"]

features=covid19Dataframe[featureColumns]
target=covid19Dataframe[targetColumn]

RunModel(features,target,"score")

#RunModel(features,target,"predict")


Score: 0.90
MAE: 1465.54
RMSE: 1912.15
r2: 0.90
