In [111]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.ensemble import RandomForestRegressor


# Load the dataset
df = pd.read_csv('cleaned_assessment_centre_locations.csv')

In [104]:
X = df[['testing_enrolled', 'location_id', 'latitude', 'longitude', 'appointments', 
        'phone_appointments', 'symptomatic', 'drive_through', 'accessible', 
        'walk_ins', 'children_under_2', 'public_transit', 'french_language_services', 
        'free_parking', 'general_population', 'asl_interpretation', 'first_nations', 
        'community_lab', 'vaccine_enrolled', 'pharmacy']]

In [105]:
y = df['latitude']  # target column name

In [106]:
# Splitting data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [107]:
# Standardize the features 
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [108]:
# 1. Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train_scaled, y_train)
lr_pred = lr_model.predict(X_test_scaled)
lr_mse = mean_squared_error(y_test, lr_pred)
lr_r2 = r2_score(y_test, lr_pred)

print("Linear Regression MSE:", lr_mse)
print("Linear Regression R-squared:", lr_r2)


Linear Regression MSE: 5.048709793414476e-29
Linear Regression R-squared: 1.0


In [109]:
# 2. Decision Tree Regression
dt_model = DecisionTreeRegressor(random_state=42)
dt_model.fit(X_train, y_train)
dt_pred = dt_model.predict(X_test)
dt_mse = mean_squared_error(y_test, dt_pred)
dt_r2 = r2_score(y_test, dt_pred)

print("Decision Tree Regression MSE:", dt_mse)
print("Decision Tree Regression R-squared:", dt_r2)


Decision Tree Regression MSE: 3.1950510427741724e-05
Decision Tree Regression R-squared: 0.9999743867668272


In [112]:
# 3. Random Forest Regression
rf_model = RandomForestRegressor(random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)
rf_mse = mean_squared_error(y_test, rf_pred)
rf_r2 = r2_score(y_test, rf_pred)

print("Random Forest Regression MSE:", rf_mse)
print("Random Forest Regression R-squared:", rf_r2)


Random Forest Regression MSE: 3.0713656750996857e-05
Random Forest Regression R-squared: 0.9999753782946994


the analyzing and comparing the capacity and utilization rates of COVID-19 assessment centers across different regions using regression models Linear Regression, Decision Tree Regression, and Random Forest Regression — using these numerical attributes,showing that the MSE and R-squared values for the **linear regression model**. Lower MSE values indicate better predictive performance, while higher R-squared values indicate a better fit of the model to the data.
