In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_absolute_percentage_error, mean_squared_error, r2_score
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures

In [2]:
# Importing cleaned datasets
training_data = pd.read_csv("data/train_data.csv")
test_data = pd.read_csv("data/test_data.csv")

In [3]:
# Splitting the features and the target for datasets
features = ["Plant_Production_GWh", "Population_k", "tmax"]
target = ["Max_Demand_GW"]

x_train = training_data[features]
y_train = training_data[target]

x_test = test_data[features]
y_test = test_data[target]

# Linear Regression

In [None]:
# Linear Regression
linear_model = LinearRegression()
linear_model.fit(x_train, y_train)

In [None]:
# Make predictions on the test dataset
linear_model_prediction = linear_model.predict(x_test)

# Model evaluation
mae_test = mean_absolute_error(y_test, linear_model_prediction)
print(f"Mean Absolute Error (MAE): {mae_test}")

def mean_absolute_percentage_error(actual, predictions): 
    actual, predictions = np.array(actual), np.array(predictions)
    return np.mean(np.abs((actual - predictions) / actual)) * 100

mape = mean_absolute_percentage_error(y_test, linear_model_prediction)
print(f'Mean Absolute Percentage (MAPE): {mape:.2f}%')

mpe = np.mean((y_test - linear_model_prediction) / y_test) * 100
print(f"Mean Percentage Error (MPE): {mpe}")

mse_test = mean_squared_error(y_test, linear_model_prediction)
print(f"Mean Squared Error (MSE): {mse_test}")

r2 = r2_score(y_test, linear_model_prediction)
print(f"R-squared (R2): {r2}")

In [None]:
# Plot predictions against ideal line
plt.scatter(y_test, linear_model_prediction, label="Actual vs Predicted")
plt.plot(y_test, y_test, color="red", label="Ideal Line")
plt.title("Actual vs Predicted Values on Validation Dataset")
plt.xlabel("Actual Values")
plt.ylabel("Predicted Values")
plt.legend()
plt.show()

In [None]:
# Plot predictions against actual values over time
plt.figure(figsize=(12,6))
plt.plot(test_data["Date"], test_data["Max_Demand_GW"], color="green", label="True Values")
plt.plot(test_data["Date"], linear_model_prediction, color="orange", label="Predictions")
plt.xticks(rotation=90)
plt.xlabel("Date")
plt.ylabel("Max_Demand_GW")
plt.legend()
plt.show()

# Polynomial Regression

In [None]:
# Polynomial Regression
polynomial_model = make_pipeline(PolynomialFeatures(degree=2), LinearRegression())
polynomial_model.fit(x_train, y_train)

In [None]:
# Make predictions on the test dataset
polynomial_model_prediction = polynomial_model.predict(x_test)

# Model evaluation
mae_test= mean_absolute_error(y_test, polynomial_model_prediction)
print(f"Mean Absolute Error (MAE): {mae_test}")

mape = mean_absolute_percentage_error(y_test, polynomial_model_prediction)
print(f'Mean Abslute Percentage (MAPE): {mape:.2f}%')

mpe = np.mean((y_test - polynomial_model_prediction) / y_test) * 100
print(f"Mean Percentage Error (MPE): {mpe}")

mse_test = mean_squared_error(y_test, polynomial_model_prediction)
print(f"Mean Squared Error (MSE): {mse_test}")

r2 = r2_score(y_test, polynomial_model_prediction)
print(f"R-squared (R2): {r2}")

def calculate_wape(actual, prediction):
    residuals = actual - prediction
    normalized_residuals = residuals / np.max(np.abs(residuals))
    weights = np.abs(normalized_residuals)

    abs_percentage_error = np.abs((actual - prediction) / actual) * 100

    wape = np.average(abs_percentage_error, weights=weights)

    return wape

wape = calculate_wape(y_test, polynomial_model_prediction)
print(f"WAPE: {wape}")

In [None]:
# Plot predictions against ideal line
plt.scatter(y_test, polynomial_model_prediction, label="Actual vs Predicted")
plt.plot(y_test, y_test, color="red", label="Ideal Line")
plt.title("Actual vs Predicted Values on Validation Dataset")
plt.xlabel("Actual Values")
plt.ylabel("Predicted Values")
plt.legend()
plt.show()

In [None]:
# Plot predictions against actual values over time
plt.figure(figsize=(12,6))
plt.plot(test_data["Date"], test_data["Max_Demand_GW"], color="green", label="True Values")
plt.plot(test_data["Date"], polynomial_model_prediction, color="orange", label="Predictions")
plt.xticks(rotation=90)
plt.xlabel("Date")
plt.ylabel("Max_Demand_GW")
plt.legend()
plt.show()