In [65]:
from typing import Union

import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error

In [66]:
def fit_and_validate_model(
    model: Union[LinearRegression, SVR, GradientBoostingRegressor],
    X: np.ndarray,
    y: np.ndarray,
) -> Union[LinearRegression, SVR, GradientBoostingRegressor]:
    X_train, X_val, y_train, y_val = train_test_split(
        X, y, test_size=0.25, random_state=0
    )
    model = model.fit(X_train.values, y_train.values)
    val_preds = model.predict(X_val.values)
    print(f"Validation R2 score: {r2_score(y_val, val_preds)}")
    print(f"MSE: {mean_squared_error(y_val, val_preds)}")
    return model


def get_test_score(
    model: Union[LinearRegression, SVR, GradientBoostingRegressor],
    X: np.ndarray,
    y: np.ndarray,
) -> None:
    print(f"Validation R2 score: {r2_score(y, model.predict(X))}")
    print(f"MSE: {mean_squared_error(y, model.predict(X))}")
    return None

In [72]:
X = pd.read_csv("../data/processed/custom_features_train_X.csv")
y = pd.read_csv("../data/processed/train_y.csv")
svr_cf_sigmoid = fit_and_validate_model(SVR(kernel="sigmoid"), X, y)

Validation R2 score: -19.385413433382357
MSE: 15.779762375762305


  y = column_or_1d(y, warn=True)


In [64]:
X = pd.read_csv("../data/processed/train_X.csv")
y = pd.read_csv("../data/processed/train_y.csv")
lr = fit_and_validate_model(LinearRegression, X, y)

Validation R2 score: 0.7280755283112772
MSE: 0.21048891460676158


In [63]:
X = pd.read_csv("../data/processed/custom_features_train_X.csv")
y = pd.read_csv("../data/processed/train_y.csv")
lr_cf = fit_and_validate_model(LinearRegression, X, y)

Validation R2 score: 0.7861549322933281
MSE: 0.16553131800183454


In [62]:
X = pd.read_csv("../data/processed/train_X.csv")
y = pd.read_csv("../data/processed/train_y.csv")
gbr = fit_and_validate_model(GradientBoostingRegressor, X, y)

Validation R2 score: 0.8722415280536399
MSE: 0.09889415956130367


  y = column_or_1d(y, warn=True)


In [60]:
X = pd.read_csv("../data/processed/train_X.csv")
y = pd.read_csv("../data/processed/train_y.csv")
svr_base = fit_and_validate_model(SVR, X, y)

Validation R2 score: 0.8999449517629033
MSE: 0.0774497358533512


  y = column_or_1d(y, warn=True)


In [57]:
X = pd.read_csv("../data/processed/custom_features_train_X.csv")
y = pd.read_csv("../data/processed/train_y.csv")
svr_cf = fit_and_validate_model(SVR, X, y)

Validation R2 score: 0.9207939455965473
MSE: 0.06131112921955631


  y = column_or_1d(y, warn=True)


In [67]:
X = pd.read_csv("../data/processed/custom_features_train_X.csv")
y = pd.read_csv("../data/processed/train_y.csv")
svr_cf = fit_and_validate_model(SVR(kernel='poly'), X, y)

Validation R2 score: 0.9336881116598841
MSE: 0.05133012602931031


  y = column_or_1d(y, warn=True)
