In [1]:
import numpy as np
import pandas as pd
import joblib
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

# Load dataset (Replace 'your_dataset.csv' with the actual dataset file)
df = pd.read_csv("cleaned_covid_data.csv")

# Define expected features
expected_features = ['growth_rate', 'mortality_rate', 'total_cases']

# Ensure dataset has the correct columns
for feature in expected_features:
    if feature not in df.columns:
        raise ValueError(f"❌ Missing column: {feature} in dataset")

# Extract features (X) and target variable (y)
X = df[expected_features]
y = df["total_deaths"]  # Adjust target variable if needed

# Split dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Linear Regression Model
lin_reg = LinearRegression()
lin_reg.fit(X_train, y_train)

# Predict on test set
y_pred_lin = lin_reg.predict(X_test)

# Evaluate performance
rmse_lin = mean_squared_error(y_test, y_pred_lin, squared=False)
r2_lin = r2_score(y_test, y_pred_lin)

print(f"✅ Linear Regression - RMSE: {rmse_lin:.2f}, R² Score: {r2_lin:.2f}")

# Save the trained Linear Regression Model correctly
joblib.dump(lin_reg, "regression_model.pkl")

print("✅ Model saved successfully as 'regression_model.pkl'")

✅ Linear Regression - RMSE: 2148.06, R² Score: 0.92
✅ Model saved successfully as 'regression_model.pkl'


