# Polynomial Regression

### Import Libraries

In [1]:
import pandas as pd
import numpy  as np

from sklearn import metrics       as mt
from sklearn import preprocessing as pp
from sklearn import linear_model  as lm

### Load Dataset

In [2]:
X_train = pd.read_csv('Training/X_training.csv')
y_train = pd.read_csv('Training/y_training.csv')
X_val = pd.read_csv('Validation/X_val.csv')
y_val = pd.read_csv('Validation/y_val.csv')
X_test = pd.read_csv('Test/X_test.csv')
y_test = pd.read_csv('Test/y_test.csv')

In [3]:
d = np.arange(1, 5)
rmse_list = []
max_r2 = 0
min_mse = float('inf')
min_rmse = float('inf')
min_mae = float('inf')
min_mape = float('inf')

for i in d:
    # Define
    poly = pp.PolynomialFeatures(degree=i)
    X_poly_train = poly.fit_transform(X_train)

    # Training
    model = lm.LinearRegression()
    model.fit(X_poly_train, y_train)

    # Predict
    yhat_train = model.predict(X_poly_train)

    r2 = mt.r2_score(y_train, yhat_train)
    if r2 > max_r2:
        max_r2 = r2    

    mse = mt.mean_squared_error(y_train, yhat_train)
    if mse < min_mse:
        min_mse = mse    

    rmse = np.sqrt(mse)
    rmse_list.append(rmse)
    if rmse < min_rmse:
        min_rmse = rmse    

    mae = mt.mean_absolute_error(y_train, yhat_train)
    if mae < min_mae:
        min_mae = mae    

    mape = mt.mean_absolute_percentage_error(y_train, yhat_train)
    if mape < min_mape:
        min_mape = mape

print(f'Max R2: {max_r2:.3f}')
print(f'Min MSE: {min_mse:.3f}')
print(f'Min RMSE: {min_rmse:.3f}')
print(f'Min MAE: {min_mae:.3f}')
print(f'Min MAPE: {min_mape:.3f}')    

Max R2: 0.334
Min MSE: 318.377
Min RMSE: 17.843
Min MAE: 13.614
Min MAPE: 5.913


### Validation

In [4]:
d = np.arange(1, 5)

for i in d:
    # Define
    poly = pp.PolynomialFeatures(degree=i)
    X_poly_train = poly.fit_transform(X_train)
    X_poly_val = poly.fit_transform(X_val)

    # Training
    model = lm.LinearRegression()
    model.fit(X_poly_train, y_train)

    # Predict
    yhat_val = model.predict(X_poly_val)

    r2 = mt.r2_score(y_val, yhat_val)  

    mse = mt.mean_squared_error(y_val, yhat_val)   

    rmse = np.sqrt(mse)  

    mae = mt.mean_absolute_error(y_val, yhat_val)  

    mape = mt.mean_absolute_percentage_error(y_val, yhat_val)

    print(f'Degreee: {i}\n'
          f'R2: {r2:.3f}\n'
          f'MSE: {mse:.3f}\n'
          f'RMSE: {rmse:.3f}\n'
          f'MAE: {mae:.3f}\n'
          f'MAPE: {mape:.3f}\n'
          )  



Degreee: 1
R2: 0.040
MSE: 458.447
RMSE: 21.411
MAE: 17.040
MAPE: 8.683
Degreee: 2
R2: 0.066
MSE: 445.768
RMSE: 21.113
MAE: 16.750
MAPE: 8.548
Degreee: 3
R2: -0.048
MSE: 500.326
RMSE: 22.368
MAE: 17.087
MAPE: 8.678
Degreee: 4
R2: -102.924
MSE: 49624.741
RMSE: 222.766
MAE: 36.104
MAPE: 10.185


### Test

In [5]:
d = np.arange(1, 5)

for i in d:
    # Define
    poly = pp.PolynomialFeatures(degree=i)
    X_poly_train = poly.fit_transform(X_train)
    X_poly_val = poly.fit_transform(X_val)
    X_poly_test = poly.fit_transform(X_test)

    # Training
    model = lm.LinearRegression()
    model.fit( np.concatenate( ( X_poly_train, X_poly_val ) ),
               np.concatenate( ( y_train, y_val ) ) )

    # Predict
    y_pred = model.predict(X_poly_test)

    r2 = mt.r2_score(y_test, y_pred)  

    mse = mt.mean_squared_error(y_test, y_pred)   

    rmse = np.sqrt(mse)  

    mae = mt.mean_absolute_error(y_test, y_pred)  

    mape = mt.mean_absolute_percentage_error(y_test, y_pred)

    print(f'Degreee: {i}\n'
          f'R2: {r2:.3f}\n'
          f'MSE: {mse:.3f}\n'
          f'RMSE: {rmse:.3f}\n'
          f'MAE: {mae:.3f}\n'
          f'MAPE: {mape:.3f}\n'
          )

Degreee: 1
R2: 0.051
MSE: 461.988
RMSE: 21.494
MAE: 17.144
MAPE: 8.531

Degreee: 2
R2: 0.091
MSE: 442.641
RMSE: 21.039
MAE: 16.736
MAPE: 8.277

Degreee: 3
R2: 0.021
MSE: 476.492
RMSE: 21.829
MAE: 16.858
MAPE: 7.976

Degreee: 4
R2: -124.595
MSE: 61152.314
RMSE: 247.290
MAE: 23.394
MAPE: 7.845

