In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.metrics import mean_absolute_error, r2_score

In [2]:
np.random.seed(0)

In [3]:
# Load data
file = '../dados/data.csv'
cols = pd.read_csv(file, nrows=1).columns
X = pd.read_csv(file, usecols=cols[2:14]).values
y = pd.read_csv(file, usecols=cols[1:2]).values

In [4]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y)

In [5]:
# Linear Regression with raw data
model_raw = LinearRegression()
model_raw.fit(X_train, y_train)
y_pred_raw = model_raw.predict(X_test)

In [6]:
# MinMax Scaler
scaler_minmax = MinMaxScaler()
X_train_minmax = scaler_minmax.fit_transform(X_train)
X_test_minmax = scaler_minmax.transform(X_test)

In [7]:
# Linear Regression with MinMax scaled data
model_minmax = LinearRegression()
model_minmax.fit(X_train_minmax, y_train)
y_pred_minmax = model_minmax.predict(X_test_minmax)

In [8]:
# Standard Scaler
scaler_standard = StandardScaler()
X_train_standard = scaler_standard.fit_transform(X_train)
X_test_standard = scaler_standard.transform(X_test)

In [9]:
# Linear Regression with Standard scaled data
model_standard = LinearRegression()
model_standard.fit(X_train_standard, y_train)
y_pred_standard = model_standard.predict(X_test_standard)

In [10]:
# Evaluate models
mae_raw = mean_absolute_error(y_test, y_pred_raw)
mae_minmax = mean_absolute_error(y_test, y_pred_minmax)
mae_standard = mean_absolute_error(y_test, y_pred_standard)

print(f'MAE with Raw Data: {mae_raw}')
print(f'MAE with MinMax Scaled Data: {mae_minmax}')
print(f'MAE with Standard Scaled Data: {mae_standard}')

MAE with Raw Data: 163581.76716381576
MAE with MinMax Scaled Data: 163601.24622415472
MAE with Standard Scaled Data: 163581.76716381955


In [11]:
# Evaluate models
r2_raw = r2_score(y_test, y_pred_raw)
r2_minmax = r2_score(y_test, y_pred_minmax)
r2_standard = r2_score(y_test, y_pred_standard)

print(f'R2 with Raw Data: {r2_raw}')
print(f'R2 with MinMax Scaled Data: {r2_minmax}')
print(f'R2 with Standard Scaled Data: {r2_standard}')

R2 with Raw Data: 0.5553575270098776
R2 with MinMax Scaled Data: 0.5553367617511691
R2 with Standard Scaled Data: 0.5553575270099153
