In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV
from sklearn.multioutput import MultiOutputRegressor
from sklearn import metrics 
from sklearn.svm import SVR
from sklearn.model_selection import RepeatedKFold
from sklearn.base import clone
from sklearn.model_selection import train_test_split
from sklearn.metrics import make_scorer

plt.rcParams.update({'font.size': 12})

In [None]:
#importing data
path=r'/home/ludovico/ML-project/data/cup/ML-CUP23-'
train_set = pd.read_csv(path+'TR.csv',skiprows=7, header=None, delimiter=',', dtype=str)

input=train_set[train_set.columns[1:-3]]
target=train_set[train_set.columns[-3:]]

#splitting design set from test set (test set is used in this final model assessment)

x_train, x_test, y_train, y_test = train_test_split(input, target, test_size=0.2, random_state=0, shuffle=True)

x_train=x_train.astype(np.float64)
y_train=y_train.astype(np.float64)
x_test=x_test.astype(np.float64)
y_test=y_test.astype(np.float64)


#we add this metric (Mean euclidean error) to evaluate the performance of the model 
def MEE(x, y):
    return np.mean(np.linalg.norm(x - y, 2, axis=1))


## Best model is SVM sigmoid with the following hyperparameters

In [None]:
best_model={'C': 50000, 
             'coef0': -4.0, 
             'epsilon': 0.1, 
             'gamma': 0.3, 
             'kernel': 'sigmoid'}

estimator=MultiOutputRegressor(SVR(**best_model))

# we fit the model on the whole design set
SVM=estimator.fit(x_train,y_train)

## Model assessment (MEE test error)

In [None]:
y_pred =SVM.predict(x_test)

print('Test-error: MEE =', MEE(y_test, y_pred)) 

## Blind test prediction

### Final refit on the whole dataset (TR+TS) 

In [None]:
path=r'/home/ludovico/ML-project/data/cup/ML-CUP23-'
#importing the data
data_set = pd.read_csv(path+'TR.csv',skiprows=7, header=None, delimiter=',', dtype=str)

#splitting input from target
input=data_set[data_set.columns[1:-3]]
target=data_set[data_set.columns[-3:]]

In [None]:
estimator_refit=clone(MultiOutputRegressor(SVR(**best_model)))
SVM_refit=estimator_refit.fit(input.astype(np.float64),target.astype(np.float64))

## Importing data from the blind test file

In [None]:
blind_test_set = pd.read_csv(path+'TS.csv',skiprows=7, header=None, delimiter=',', dtype=str)

input_blind=blind_test_set[blind_test_set.columns[1:]]

x_blind=input_blind.astype(np.float64)

#predict the data
y_blind_pred=SVM_refit.predict(x_blind)

## Creation of the csv file for the cup competition

In [None]:
df_blind = pd.DataFrame((y_blind_pred))
df_blind.index+=1

with open('NikolaTeslaPigeon_ML-CUP23-TS.csv', 'w') as file:
    file.write('# Ludovico Iannello, Federico Fattorini' + '\n')
    file.write('# NikolaTesla' + '\n')
    file.write('# ML-CUP23' + '\n')
    file.write('# 14/01/2024' + '\n')

df_blind.to_csv('NikolaTesla_ML-CUP23-TS.csv', mode='a', header=False)
