# 0.0 - Imports

In [1]:
import warnings

import numpy as np
import pandas as pd
from sklearn import metrics as mt
from sklearn.linear_model import LinearRegression

In [2]:
warnings.filterwarnings("ignore")


def root_mean_squared_error(y_true, y_pred):
    return np.sqrt(mt.mean_squared_error(y_true, y_pred))

# 1.0 - Load Data

In [3]:
# Train Dataset
X_train = pd.read_csv("../data/regression_X_training.csv")
y_train = pd.read_csv("../data/regression_y_training.csv")

# Validation Dataset
X_val = pd.read_csv("../data/regression_X_validation.csv")
y_val = pd.read_csv("../data/regression_y_validation.csv")

# Test Dataset
X_test = pd.read_csv("../data/regression_X_test.csv")
y_test = pd.read_csv("../data/regression_y_test.csv")

In [4]:
y_train = y_train.iloc[:, 0]
y_val = y_val.iloc[:, 0]
y_test = y_test.iloc[:, 0]

# 2.0 - Linear Regression

## 2.1 - Performance for Train Dataset

In [5]:
# Train and Fit Model
linear_reg = LinearRegression()
linear_reg.fit(X_train, y_train)

# Predict
y_pred_train = linear_reg.predict(X_train)

# Performance Metrics
r2_train = mt.r2_score(y_train, y_pred_train)
mse_train = mt.mean_squared_error(y_train, y_pred_train)
rmse_train = root_mean_squared_error(y_train, y_pred_train)
mae_train = mt.mean_absolute_error(y_train, y_pred_train)
mape_train = mt.mean_absolute_percentage_error(y_train, y_pred_train)

print(f"Train R2: {r2_train}")
print(f"Train MSE: {mse_train}")
print(f"Train RMSE: {rmse_train}")
print(f"Train MAE: {mae_train}")
print(f"Train MAPE: {mape_train}")

Train R2: 0.04605830473391903
Train MSE: 455.99611182562677
Train RMSE: 21.35406546364478
Train MAE: 16.998249066011095
Train MAPE: 8.653185943804514


## 2.2 - Performance for Validation Dataset

In [6]:
# Define Model
linear_reg = LinearRegression()
linear_reg.fit(X_train, y_train)

# Predict
y_pred_val = linear_reg.predict(X_val)

# Performance Metrics
r2_val = mt.r2_score(y_val, y_pred_val)
mse_val = mt.mean_squared_error(y_val, y_pred_val)
rmse_val = root_mean_squared_error(y_val, y_pred_val)
mae_val = mt.mean_absolute_error(y_val, y_pred_val)
mape_val = mt.mean_absolute_percentage_error(y_val, y_pred_val)

print(f"Validation R2: {r2_val}")
print(f"Validation MSE: {mse_val}")
print(f"Validation RMSE: {rmse_val}")
print(f"Validation MAE: {mae_val}")
print(f"Validation MAPE: {mape_val}")

Validation R2: 0.0399248303815406
Validation MSE: 458.44704184393123
Validation RMSE: 21.41137645841414
Validation MAE: 17.039753759960327
Validation MAPE: 8.682541883735295


## 2.3 - Performance for Test Dataset

In [7]:
# Performance Metrics
r2_test = ""
mse_test = ""
rmse_test = ""
mae_test = ""
mape_test = ""

# 3.0 - Save Results

In [8]:
train_metrics = {
    "Algorithm": "Linear Regression",
    "R-Squared": np.round(r2_train, 3),
    "MSE": np.round(mse_train, 3),
    "RMSE": np.round(rmse_train, 3),
    "MAE": np.round(mae_train, 3),
    "MAPE": np.round(mape_train, 3),
}
validation_metrics = {
    "Algorithm": "Linear Regression",
    "R-Squared": np.round(r2_val, 3),
    "MSE": np.round(mse_val, 3),
    "RMSE": np.round(rmse_val, 3),
    "MAE": np.round(mae_val, 3),
    "MAPE": np.round(mape_val, 3),
}
test_metrics = {
    "Algorithm": "Linear Regression",
    "R-Squared": r2_test,
    "MSE": mse_test,
    "RMSE": rmse_test,
    "MAE": mae_test,
    "MAPE": mape_test,
}

pd.DataFrame(train_metrics, index=[0]).to_csv(
    "./reg_train_metrics.csv", mode="a", header=False
)
pd.DataFrame(validation_metrics, index=[0]).to_csv(
    "./reg_validation_metrics.csv", mode="a", header=False
)
pd.DataFrame(test_metrics, index=[0]).to_csv(
    "./reg_test_metrics.csv", mode="a", header=False
)