# **0.0 - Imoprtar Bibliotecas**

In [None]:
import warnings

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import metrics as mt
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures

In [None]:
!pip install scikit-learn



In [None]:
warnings.filterwarnings("ignore")


def root_mean_squared_error(y_true, y_pred):
    return np.sqrt(mt.mean_squared_error(y_true, y_pred))

# **1.0 - Baixar Datasets**

In [None]:
# Train Dataset
X_train = pd.read_csv("/content/X_training.csv")
y_train = pd.read_csv("/content/y_training.csv")

# Validation Dataset
X_val = pd.read_csv("/content/X_validation.csv")
y_val = pd.read_csv("/content/y_validation.csv")

# Test Dataset
X_test = pd.read_csv("/content/X_test.csv")
y_test = pd.read_csv("/content/y_test.csv")

In [None]:
y_train = y_train.iloc[:, 0]
y_val = y_val.iloc[:, 0]
y_test = y_test.iloc[:, 0]

# **2.0 - Performance do Dataset de Treinamento**

In [None]:
# Define polynomial model
poly = PolynomialFeatures()
X_poly_train = poly.fit_transform(X_train)

# Train and Fit Model
poly_reg = LinearRegression()
poly_reg.fit(X_poly_train, y_train)

# Predict
y_pred_train = poly_reg.predict(X_poly_train)

# Performance Metrics
r2_train = mt.r2_score(y_train, y_pred_train)
mse_train = mt.mean_squared_error(y_train, y_pred_train)
rmse_train = root_mean_squared_error(y_train, y_pred_train)
mae_train = mt.mean_absolute_error(y_train, y_pred_train)
mape_train = mt.mean_absolute_percentage_error(y_train, y_pred_train)

print(f"Train R2: {r2_train}")
print(f"Train MSE: {mse_train}")
print(f"Train RMSE: {rmse_train}")
print(f"Train MAE: {mae_train}")
print(f"Train MAPE: {mape_train}")

Train R2: 0.7510324665000363
Train MSE: 0.061137572484780946
Train RMSE: 0.24726013120756235
Train MAE: 0.1778171548041916
Train MAPE: 392079852874417.8


# **3.0 - Performance do Dataset de Validação**

In [None]:
# Define polynomial model
poly = PolynomialFeatures()
X_poly_train = poly.fit_transform(X_train)
X_poly_val = poly.fit_transform(X_val)

# Train and Fit Model
poly_reg = LinearRegression()
poly_reg.fit(X_poly_train, y_train)

# Predict
y_pred_val = poly_reg.predict(X_poly_val)

# Performance Metrics
r2_val = mt.r2_score(y_val, y_pred_val)
mse_val = mt.mean_squared_error(y_val, y_pred_val)
rmse_val = root_mean_squared_error(y_val, y_pred_val)
mae_val = mt.mean_absolute_error(y_val, y_pred_val)
mape_val = mt.mean_absolute_percentage_error(y_val, y_pred_val)

print(f"Validation R2: {r2_val}")
print(f"Validation MSE: {mse_val}")
print(f"Validation RMSE: {rmse_val}")
print(f"Validation MAE: {mae_val}")
print(f"Validation MAPE: {mape_val}")

Validation R2: 0.7476780880972926
Validation MSE: 0.06196059829833931
Validation RMSE: 0.24891885886436832
Validation MAE: 0.17898972021780238
Validation MAPE: 395305776272762.4


# **4.0 - Ajuste fino de hiperparâmetros**

In [None]:
degree = np.arange(1, 6)

r2_list = []
mse_list = []
rmse_list = []
mae_list = []
mape_list = []

In [None]:
for i in degree:
    # Define polynomial model
    poly = PolynomialFeatures(degree=i)
    X_poly_train = poly.fit_transform(X_train)
    X_poly_val = poly.fit_transform(X_val)

    # Train and Fit Model
    poly_reg = LinearRegression()
    poly_reg.fit(X_poly_train, y_train)

    # Predict
    y_pred = poly_reg.predict(X_poly_val)

    # Performance Metrics
    r2 = mt.r2_score(y_val, y_pred)
    mse = mt.mean_squared_error(y_val, y_pred)
    rmse = root_mean_squared_error(y_val, y_pred)
    mae = mt.mean_absolute_error(y_val, y_pred)
    mape = mt.mean_absolute_percentage_error(y_val, y_pred)

    r2_list.append(r2)
    mse_list.append(mse)
    rmse_list.append(rmse)
    mae_list.append(mae)
    mape_list.append(mape)

    print(f"Degree: {i}, RMSE: {rmse}")

Degree: 1, RMSE: 0.3332389856803182
Degree: 2, RMSE: 0.24891885886436832
Degree: 3, RMSE: 1.2047601530973902


In [None]:
plt.plot(degree, r2_list, marker="o", label="R-Squared")
plt.plot(degree, mse_list, marker="o", label="MSE")
plt.plot(degree, rmse_list, marker="o", label="RMSE")
plt.plot(degree, mae_list, marker="o", label="MAE")
plt.plot(degree, mape_list, marker="o", label="MAPE")
plt.xlabel("Degree")
plt.ylabel("Metrics")
plt.legend()

In [None]:
best_degree = degree[np.argmin(mse_list)]
best_degree

# **5.0 - Performance de Dataset de Teste**

In [None]:
# Define polynomial model
poly = PolynomialFeatures(degree=best_degree)
X_poly_train = poly.fit_transform(X_train)
X_poly_val = poly.fit_transform(X_val)
X_poly_test = poly.fit_transform(X_test)

# Train and Fit Model
poly_reg = LinearRegression()
poly_reg.fit(
    np.concatenate((X_poly_train, X_poly_val)), np.concatenate((y_train, y_val))
)
# Predict
y_pred_test = poly_reg.predict(X_poly_test)

# Performance Metrics
r2_test = mt.r2_score(y_test, y_pred_test)
mse_test = mt.mean_squared_error(y_test, y_pred_test)
rmse_test = root_mean_squared_error(y_test, y_pred_test)
mae_test = mt.mean_absolute_error(y_test, y_pred_test)
mape_test = mt.mean_absolute_percentage_error(y_test, y_pred_test)

print(f"Validation R2: {r2_test}")
print(f"Validation MSE: {mse_test}")
print(f"Validation RMSE: {rmse_test}")
print(f"Validation MAE: {mae_test}")
print(f"Validation MAPE: {mape_test}")

# **6.0 - Salvar Resultados**

In [None]:
train_metrics = {
    "Algorithm": "Polynomial Regression",
    "R-Squared": np.round(r2_train, 3),
    "MSE": np.round(mse_train, 3),
    "RMSE": np.round(rmse_train, 3),
    "MAE": np.round(mae_train, 3),
    "MAPE": np.round(mape_train, 3),
}
validation_metrics = {
    "Algorithm": "Polynomial Regression",
    "R-Squared": np.round(r2_val, 3),
    "MSE": np.round(mse_val, 3),
    "RMSE": np.round(rmse_val, 3),
    "MAE": np.round(mae_val, 3),
    "MAPE": np.round(mape_val, 3),
}
test_metrics = {
    "Algorithm": "Polynomial Regression",
    "R-Squared": np.round(r2_test, 3),
    "MSE": np.round(mse_test, 3),
    "RMSE": np.round(rmse_test, 3),
    "MAE": np.round(mae_test, 3),
    "MAPE": np.round(mape_test, 3),
}

pd.DataFrame(train_metrics, index=[0]).to_csv(
    "./reg_train_metrics.csv", mode="a", header=False
)
pd.DataFrame(validation_metrics, index=[0]).to_csv(
    "./reg_validation_metrics.csv", mode="a", header=False
)
pd.DataFrame(test_metrics, index=[0]).to_csv(
    "./reg_test_metrics.csv", mode="a", header=False
)