In [6]:
from sklearn import svm, metrics, ensemble, linear_model, neighbors
from skopt.space import Real, Categorical, Integer
import numpy as np
import pandas as pd
import torch
from skopt import BayesSearchCV
import xgboost
from ray import tune
import torch
import torchmetrics

In [7]:
train_features = np.load("Train(Features).npy")
train_features = np.squeeze(train_features, axis=2)
train_targets = np.load("Train(Targets).npy")
train_targets = np.squeeze(train_targets, axis=2)
train_targets = np.squeeze(train_targets, axis=1)
val_features = np.load("Val(Features).npy")
val_features = np.squeeze(val_features, axis=2)
val_targets = np.load("Val(Targets).npy")
val_targets = np.squeeze(val_targets, axis=2)

In [5]:
train_features

array([[ 9.48981429e+00,  0.00000000e+00,  1.20000000e+01,
        -9.61916667e-01,  8.35629466e+07],
       [ 9.56869668e+00,  0.00000000e+00,  1.20000000e+01,
        -1.15133333e+00,  8.34760314e+07],
       [ 9.64757908e+00,  0.00000000e+00,  1.20000000e+01,
        -4.10166667e-01,  8.33891163e+07],
       [ 9.72646148e+00,  1.00000000e+00,  1.20000000e+01,
        -7.48750000e-01,  8.33022011e+07],
       [ 9.80534387e+00,  1.00000000e+00,  1.20000000e+01,
        -2.03083333e-01,  8.32152859e+07],
       [ 9.88422627e+00,  1.00000000e+00,  1.20000000e+01,
        -5.55416667e-01,  8.31283707e+07],
       [ 9.96310866e+00,  0.00000000e+00,  1.20000000e+01,
        -3.06833333e-01,  8.30414556e+07],
       [ 1.00419911e+01,  1.00000000e+00,  1.20000000e+01,
        -1.30583333e-01,  8.29545404e+07],
       [ 1.01208735e+01,  0.00000000e+00,  1.20000000e+01,
        -2.19600000e+00,  8.28676252e+07],
       [ 1.01997558e+01,  0.00000000e+00,  1.20000000e+01,
        -1.18041667e+00

In [5]:
print(train_targets.shape)
print(train_features.shape)

(200, 1)
(200, 5)


In [3]:
#Naive Benchmark (MAE 29.36, MAPE 1.0)

naivetarget = np.zeros((50,1))
print(f"MAE = {metrics.mean_absolute_error(val_targets, naivetarget)}, MAPE = {metrics.mean_absolute_percentage_error(val_targets, naivetarget)}")

MAE = 29.36, MAPE = 1.0


In [None]:
#sklearn SVR 

params_svr = dict()
params_svr['kernel'] = Categorical(['linear', 'poly', 'rbf', 'sigmoid'])
params_svr['degree'] = Integer(0,20)
params_svr['coef0'] = Real(0.001, 5, prior = 'log-uniform')
params_svr['gamma'] = Categorical(['scale', 'auto'])
params_svr['C'] = Real(0.01, 20, prior="log-uniform")

opt = BayesSearchCV(
    estimator = svm.SVR(),
    search_spaces = params_svr,
    n_iter = 100,
    cv = 5,
    scoring = 'neg_mean_absolute_error'
)
opt.fit(train_features, train_targets)
svrbestparams = opt.best_params_
f = open("SVRparams.txt", "w")
f.write(str(svrbestparams))
f.close()
print(f"MAE = {metrics.mean_absolute_error(val_targets, opt.predict(val_features))}, MAPE = {metrics.mean_absolute_percentage_error(val_targets, opt.predict(val_features))}")

In [4]:
#sklearn RandomForestRegressor (MAE 9.08, MAPE 0.279)

params_rf = dict()
params_rf['n_estimators'] = Integer(1, 1000)
params_rf['criterion'] = Categorical(['squared_error', 'absolute_error', 'poisson'])
params_rf['min_samples_split'] = Integer(2,100)
params_rf['max_depth'] = Integer(1, 50)

opt = BayesSearchCV(
    estimator = ensemble.RandomForestRegressor(),
    search_spaces = params_rf,
    n_iter = 100,
    cv = 5,
    scoring = 'neg_mean_absolute_error'
)

opt.fit(train_features, np.squeeze(train_targets))
print(f"MAE = {metrics.mean_absolute_error(val_targets, opt.predict(val_features))}, MAPE = {metrics.mean_absolute_percentage_error(val_targets, opt.predict(val_features))}")

MAE = 9.083524504692388, MAPE = 0.27951729919866497


In [58]:
#sklearn AdaBoostRegressor (MAE 6.66, MAPE 0.209)

params_ada = dict()
params_ada['learning_rate'] = Real(0.005, 0.9, prior="log-uniform")
params_ada['n_estimators'] = Integer(1, 1000)
params_ada['loss'] = Categorical(['linear', 'square', 'exponential'])

opt = BayesSearchCV(
    estimator = ensemble.AdaBoostRegressor(),
    search_spaces=params_ada,
    n_iter=100,
    cv=5,
    scoring = 'neg_mean_absolute_error'
)
opt.fit(train_features, np.squeeze(train_targets))
print(f"MAE = {metrics.mean_absolute_error(val_targets, opt.predict(val_features))}, MAPE = {metrics.mean_absolute_percentage_error(val_targets, opt.predict(val_features))}")



MAE = 6.664000000000001, MAPE = 0.2092444217211552


In [63]:
#sklearn Multiple Linear Regression (MAE 43.54, MAPE 1.429)
 
regr = linear_model.LinearRegression()
regr.fit(train_features, np.squeeze(train_targets))
print(f"MAE = {metrics.mean_absolute_error(val_targets, regr.predict(val_features))}, MAPE = {metrics.mean_absolute_percentage_error(val_targets, regr.predict(val_features))}")

MAE = 43.53828384609981, MAPE = 1.429424849201543


In [4]:
#xgboost tree (MAE 8.44, MAPE 0.259)

params_xgb = dict()
params_xgb['booster'] = Categorical(['gbtree'])
params_xgb['eta'] = Real(0.001, 1, prior = 'log-uniform')
params_xgb['gamma'] = Real(0.01, 10, prior = 'log-uniform')
params_xgb['max_depth'] = Integer(1,15)
params_xgb['subsample'] = Real(0.001, 1, prior = 'log-uniform')
params_xgb['colsample_bytree'] = Real(0.001, 1, prior = 'log-uniform')
params_xgb['lambda'] = Real(0.01, 50, prior = 'log-uniform')
params_xgb['alpha'] = Real(0.01, 50, prior = 'log-uniform')
params_xgb['num_parallel_tree'] = Integer(1, 10)

opt = BayesSearchCV(
    estimator = xgboost.XGBRegressor(),
    search_spaces = params_xgb,
    n_iter = 100,
    cv = 5,
    scoring = 'neg_mean_absolute_error'
)

opt.fit(train_features, np.squeeze(train_targets))
print(f"MAE = {metrics.mean_absolute_error(val_targets, opt.predict(val_features))}, MAPE = {metrics.mean_absolute_percentage_error(val_targets, opt.predict(val_features))}")

MAE = 8.443783836364746, MAPE = 0.25900535189002083


In [8]:
#xgboost linear (MAE 14.57, MAPE 0.524)

params_xgbt = dict()
params_xgbt['booster'] = Categorical(['gblinear'])
params_xgbt['feature_selector'] = Categorical(['cyclic', 'shuffle'])
params_xgbt['lambda'] = Real(0.01, 50, prior = 'log-uniform')
params_xgbt['alpha'] = Real(0.01, 50, prior = 'log-uniform')
params_xgbt['updater'] = Categorical(['shotgun', 'coord_descent'])

opt = BayesSearchCV(
    estimator = xgboost.XGBRegressor(),
    search_spaces = params_xgbt,
    n_iter = 100,
    cv = 5,
    scoring = 'neg_mean_absolute_error'
)

opt.fit(train_features, np.squeeze(train_targets))
print(f"MAE = {metrics.mean_absolute_error(val_targets, opt.predict(val_features))}, MAPE = {metrics.mean_absolute_percentage_error(val_targets, opt.predict(val_features))}")

MAE = 14.571553535461426, MAPE = 0.5243719558194426


In [8]:
#sklearn KNN (MAE 10.92, MAPE 0.34)

params_knn = dict()
params_knn['n_neighbors'] = Integer(2,10)
params_knn['weights'] = Categorical(['uniform', 'distance'])
params_knn['algorithm'] = Categorical(['auto', 'ball_tree', 'kd_tree', 'brute'])
params_knn['metric'] = Categorical(['minkowski'])
params_knn['p'] = Integer(1, 10)


opt = BayesSearchCV(
    estimator = neighbors.KNeighborsRegressor(),
    search_spaces = params_knn,
    n_iter = 100,
    cv = 5,
    scoring = 'neg_mean_absolute_error'
)

opt.fit(train_features, np.squeeze(train_targets))
print(f"MAE = {metrics.mean_absolute_error(val_targets, opt.predict(val_features))}, MAPE = {metrics.mean_absolute_percentage_error(val_targets, opt.predict(val_features))}")



MAE = 10.92, MAPE = 0.34072649045038483
