# KNN models - Machine Learning CUP

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.model_selection import RepeatedKFold, GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import (
    make_scorer,
    mean_squared_error,
    mean_absolute_error
)

In [None]:
def mee_f(Y_true, Y_pred):
    
    return (1/Y_true.shape[0])*np.sqrt(np.square(Y_true[:,0] - Y_pred[:,0]) + np.square(Y_true[:,1] - Y_pred[:,1])).sum()


def neg_mee_f(Y_true, Y_pred):
    
    return -(1/Y_true.shape[0])*np.sqrt(np.square(Y_true[:,0] - Y_pred[:,0]) + np.square(Y_true[:,1] - Y_pred[:,1])).sum()


def selecting_results(df, param_metric_value, param_weights_value):
    """
    This function select, from the gridsearch table, rows related to a specific combination of hyperparameter
    leaving free the hyperparameter k (nth-neighbour)
     
    """
    
    df = df[df["param_metric"] == param_metric_value]
    df = df[df["param_weights"] == param_weights_value]
    return df


# Importing datasets "ML-CUP22-TR" and "ML-CUP22-TS" (Blind Test)

In [None]:
tr = pd.read_csv('ML-CUP22-TR.csv', skiprows=7, header=None)
tr = tr.drop([0], axis=1).rename(columns={i:i-1 for i in tr.columns})
blind_ts = pd.read_csv('ML-CUP22-TS.csv', skiprows=7, header=None)
blind_ts = blind_ts.drop([0], axis=1).rename(columns={i:i-1 for i in blind_ts.columns})
     
X_blind = blind_ts.values
X = tr.iloc[:,:9].values
Y = tr.iloc[:,9:].values

# HOLD-OUT TR_&_TS CUP
X_tr, X_tt, Y_tr, Y_tt = train_test_split(
    X, Y, test_size=0.3, random_state=0, shuffle=True)
 
y1_tr, y2_tr = Y_tr[:,0], Y_tr[:,1]
y1_tt, y2_tt = Y_tt[:,0], Y_tt[:,1]


# Data Preprocessing
- Data Normalization: ON

In [None]:
# Data normalization so that for each feature mean = 0 and std = 1 (implemented by StandardScaler()
scaler = StandardScaler()
scaler.fit(X_tr)
X_tr = scaler.transform(X_tr)
X_tt = scaler.transform(X_tt)
blind_ts = scaler.transform(X_blind)

# KNN model with 2 outputs (Double KNN)

- Approach:
-1) Initial Hold-out of "ML-CUP22-TR" dataset: Design set (X_tr, Y_tr) + Test set (X_tt, Y_tt)  
-2) Grid Search through a RepeatedKFold (Repeated: 10 times, #folds: 5) over the Design Set
-3) Find the best hyperparameters (basing the decision on the Mean Euclidean Error (MEE))
-4) Compute the MEE over the Test set (X_tt, Y_tt) to give an estimation of the future performances of the model found


## Model Selection 


In [None]:
%%time

# Scorer on which to base the decision of the best model (Negative Mean Euclidean Error scorer)
mee_scorer = make_scorer(neg_mee_f, greater_is_better=True)

n_neighbors_v = np.arange(1, 51, step = 1)

param_grid = {
    "n_neighbors": n_neighbors_v,
    "weights": ["distance", "uniform"],
    "metric": ["cityblock", "euclidean", "cosine"]
}

grid_double_knn = GridSearchCV(
    KNeighborsRegressor(),
    param_grid=param_grid,
    scoring = mee_scorer,
    cv=RepeatedKFold(n_splits=5, n_repeats=10, random_state=0),
    n_jobs=-1,
    refit = True,
    return_train_score = True
)

# Train a single model which has two outputs (y_pred_1 and y_pred_2)
grid_double_knn.fit(X_tr, Y_tr)  


cv_results = pd.DataFrame(grid_double_knn.cv_results_)

In [None]:
#Saving the grid search results of doulbe_knn
pd.DataFrame(grid_double_knn.cv_results_).to_csv("grid_double_knn.csv")

In [None]:
#printing the first 10 results of the grid search
results_sorted = cv_results.sort_values("rank_test_score", axis = 0)
results_sorted.iloc[:10,:]

### Selecting the best model 

In [None]:
best_model_table = cv_results[cv_results["rank_test_score"] == 1] #best model (Highest neg_MEE over VL_s)
best_model_table

### Extracting the hyperparameters of the best model

In [None]:
best_k = best_model_table["param_n_neighbors"].iloc[0] # #n-neightbours of the best model
best_weigths = best_model_table["param_weights"].iloc[0] # "weights" attribute of the best model
best_metric = best_model_table["param_metric"].iloc[0] # Metric choosen to compute distance for the best model

### From the results of grid_double_knn select the rows that have the same "metric" and "weights" hyperparameters as the best one
- To do this has been used the function selecting_results (Implemented in the first part of the notebook)

In [None]:
models = selecting_results(cv_results, best_metric, best_weigths)

## Plotting MEE vs n_neighbors (k) for each model of "models" table

In [None]:
x_val = np.array(models["param_n_neighbors"], dtype = int)
y_val = np.array(-models["mean_test_score"], dtype = float)
std_y_val = np.array(models["std_test_score"], dtype = float)

fig, ax1 = plt.subplots()
ax1.plot(x_val, y_val, color = "r", linestyle = "-", marker = ".", markersize = 8, linewidth = 1, label = "Validation MEE")
ax1.fill_between(x_val, y_val-std_y_val, y_val+std_y_val, alpha = 0.4, label = "$\pm$ std(Validation MEE)")
ax1.grid()
ax1.set_ylabel("MEE")
ax1.set_xlabel("n_neighbors")
title_str = "ML CUP - Weights = " + best_weigths + ", " + "metric = " + best_metric + ", KNN two outputs "  
ax1.set_title(title_str)
best_k_sting = "best n_neighbors = " + str(best_model_table["param_n_neighbors"].iloc[0])
x_best = list(models["mean_test_score"]).index(max(models["mean_test_score"])) +1
ax1.axvline(x=x_best, color='g', label = best_k_sting)
ax1.legend()
plt.savefig("MEE_vs_k.pdf")

In [None]:
plt.scatter(models["param_n_neighbors"], models["std_test_score"], marker='.', label = "Validation error (std value)")
plt.grid()
plt.ylabel("MEE std.")
plt.xlabel("n_neighbors")
title_str = "ML CUP - Weights = " + best_weigths + ", " + "metric = " + best_metric + ", KNN two outputs "
plt.title(title_str)
best_k_sting = "best n_neighbors = " + str(best_model_table["param_n_neighbors"].iloc[0])
x_best = list(models["mean_test_score"]).index(max(models["mean_test_score"])) +1
plt.axvline(x=x_best, color='g', label = best_k_sting)
plt.legend()
plt.savefig("MEE_std_vs_k.pdf")

## Mean and std MEE computed over TR vs n_neighbors (k) for each model of "models" table


In [None]:
x_train = np.array(models["param_n_neighbors"], dtype = int)
y_train = np.array(-models["mean_train_score"], dtype = float)
std_y_train = np.array(models["std_train_score"], dtype = float)


fig, ax2 = plt.subplots()
ax2.plot(x_train,y_train, color = "r", linestyle = "-", marker = ".", markersize = 8, linewidth = 1, label = "Training MEE")
ax2.fill_between(x_train, y_train-std_y_train, y_train+std_y_train, alpha = 0.4, label = "$\pm$ std(Training MEE)")
ax2.grid()
ax2.set_ylabel("MEE")
ax2.set_xlabel("n_neighbors")
title_str = "ML CUP - Weights = " + best_weigths + ", " + "metric = " + best_metric + ", KNN two outputs "
ax2.set_title(title_str)
best_k_sting = "best n_neighbors = " + str(best_model_table["param_n_neighbors"].iloc[0])
x_best = list(models["mean_test_score"]).index(max(models["mean_test_score"])) + 1
ax2.axvline(x=x_best, color='g', label = best_k_sting)
ax2.legend()

### From the results of grid_double_knn select the rows that have the same "metric" hyperparameters as the best one and weights = uniform

In [None]:
models_uniform = selecting_results(cv_results, best_metric, "uniform")

## Plotting MEE vs n_neighbors (k) for each model of "models_uniform" table

In [None]:
x_val = np.array(models_uniform["param_n_neighbors"], dtype = int)
y_val = np.array(-models_uniform["mean_test_score"], dtype = float)
std_y_val = np.array(models_uniform["std_test_score"], dtype = float)

fig, ax1 = plt.subplots()
ax1.plot(x_val, y_val, color = "r", linestyle = "-", marker = ".", markersize = 8, linewidth = 1, label = "Validation MEE")
ax1.fill_between(x_val, y_val-std_y_val, y_val+std_y_val, alpha = 0.4, label = "$\pm$ std(Validation MEE)")
ax1.grid()
ax1.set_ylabel("MEE")
ax1.set_xlabel("n_neighbors")
title_str = "ML CUP - Weights = Uniform," + "metric = " + best_metric + ", KNN two outputs "  
ax1.set_title(title_str)
ax1.legend()

## Mean and std MEE computed over TR vs n_neighbors (k) for each model of "models_uniform" table

In [None]:
x_train = np.array(models_uniform["param_n_neighbors"], dtype = int)
y_train = np.array(-models_uniform["mean_train_score"], dtype = float)
std_y_train = np.array(models_uniform["std_train_score"], dtype = float)


fig, ax2 = plt.subplots()
ax2.plot(x_train,y_train, color = "r", linestyle = "-", marker = ".", markersize = 8, linewidth = 1, label = "Training MEE")
ax2.fill_between(x_train, y_train-std_y_train, y_train+std_y_train, alpha = 0.4, label = "$\pm$ std(Training MEE)")
ax2.grid()
ax2.set_ylabel("MEE")
ax2.set_xlabel("n_neighbors")
title_str = "ML CUP - Weights = Uniform, " + "metric = " + best_metric + ", KNN two outputs "
ax2.set_title(title_str)
ax2.legend()

- The grid Search results shows that "weights" hyperparameter for the best model is "Uniform"
- The grid Search results shows that "metric" hyperparameter for the best model is "Euclidean"

# Model Assesment - KNN - Double Output (Target 1 and Target 2)

- Mean Euclidean Error of the model trained over the desing set (X_tr, Y_tr)


In [None]:
final_fitted_double_knn = grid_double_knn.best_estimator_

mean_train_mee = -best_model_table["mean_train_score"].iloc[0]
std_train_mee = best_model_table["std_train_score"].iloc[0]

mean_val_mee = -best_model_table["mean_test_score"].iloc[0]
std_val_mee = best_model_table["std_test_score"].iloc[0]

test_mee = mee_f(Y_tt, final_fitted_double_knn.predict(X_tt))


print(f"Train MEE: {mean_train_mee} +- {std_train_mee} ")
print(f"Validation MEE: {mean_val_mee} +- {std_val_mee} ")
print(f"Test MEE: {test_mee}")

- Final Test MEE, excluding "weights" = distance in the grid search, is 1.4627
- Final Test MEE, including "weights" = distance in the grid search, is 1.44
- For this reason has been decided to perform a grid search with also "Weights" = distance.  


##  Predicting Target 1 and Target 2 of blind test using "final_fitted_double_knn"

In [None]:
Y_blind_pred = final_fitted_double_knn.predict(blind_ts)

# Prediction of target 1 for blind test using the KNN trained with both target 1 and 2 (AKA double KNN)
y1_blind_double_knn = Y_blind_pred[:,0] 

# Prediction of target 1 for blind test using the KNN trained with both target 1 and 2 (AKA double KNN)
y2_blind_double_knn = Y_blind_pred[:,1]

# KNN model with target 1 as output

- Approach:
-1) Initial Hold-out of "ML-CUP22-TR" dataset: Design set (X_tr, y1_tr) + Test set (X_tt, y1_tt)  
-2) Grid Search through a RepeatedKFold (Repeated: 10 times, #folds: 5) over the Design Set
-3) Find the best hyperparameters (basing the decision on the Mean Euclidean Error (MEE))
-4) Compute the MEE over the Test set (X_tt, y1_tt) to give an estimation of the future performances of the model found


## Model Selection - Target 1 

In [None]:
%%time

n_neighbors_v = np.arange(1, 51, step = 1)

param_grid = {
    "n_neighbors": n_neighbors_v,
    "weights": ["distance", 'uniform'],
    "metric": ["cityblock", "euclidean", "cosine"]
}

grid_knn1 = GridSearchCV(
    KNeighborsRegressor(),
    param_grid=param_grid,
    scoring = ["neg_mean_squared_error"],
    cv=RepeatedKFold(n_splits=5, n_repeats=10, random_state=0),
    n_jobs=-1,
    refit = "neg_mean_squared_error",
    return_train_score = True
)

grid_knn1.fit(X_tr, y1_tr)

cv_results_1 = pd.DataFrame(grid_knn1.cv_results_)

In [None]:
#Saving the grid search results of knn1
pd.DataFrame(grid_knn1.cv_results_).to_csv("grid_knn1.csv")

### Selecting the best model for target 1

In [None]:
#best model table 
best_model1_table = cv_results_1[cv_results_1["rank_test_neg_mean_squared_error"] == 1]
best_model1_table

### Extracting the hyperparameters of the best model 1

In [None]:
best1_k = best_model1_table["param_n_neighbors"].iloc[0]
best1_weigths = best_model1_table["param_weights"].iloc[0]
best1_metric = best_model1_table["param_metric"].iloc[0]

### From the results of "grid_knn1" select the rows that have the same "metric" and "weights" hyperparameters as the best one
- To do this has been used the function selecting_results (Implemented in the first part of the notebook)

In [None]:
models1 = selecting_results(cv_results_1, best1_metric, best1_weigths)

### Plotting MEE vs n_neighbors (k) for each model of "models1" table

In [None]:
x_val = np.array(models1["param_n_neighbors"], dtype = int)
y_val = np.array(-models1["mean_test_neg_mean_squared_error"], dtype = float)
std_y_val = np.array(models1["std_test_neg_mean_squared_error"], dtype = float)

fig, ax1 = plt.subplots()
ax1.plot(x_val, y_val, color = "r", linestyle = "-", marker = ".", markersize = 8, linewidth = 1, label = "Validation MEE")
ax1.fill_between(x_val, y_val-std_y_val, y_val+std_y_val, alpha = 0.4, label = "$\pm$ std(Validation MEE)")
ax1.grid()
ax1.set_ylabel("MEE")
ax1.set_xlabel("n_neighbors")
title_str = "ML CUP - Weights = " + best1_weigths + ", " + "metric = " + best1_metric + ", KNN target 1 "
ax1.set_title(title_str)
best_k_sting = "best n_neighbors = " + str(best_model1_table["param_n_neighbors"].iloc[0]) 
x_best = list(models1["mean_test_neg_mean_squared_error"]).index(max(models1["mean_test_neg_mean_squared_error"])) +1
ax1.axvline(x=x_best, color='g', label = best_k_sting)
ax1.legend()

In [None]:
plt.scatter(models1["param_n_neighbors"], models1["std_test_neg_mean_squared_error"], marker='.', label = "Validation std(MEE)")
plt.grid()
plt.ylabel("MSE std.")
plt.xlabel("n_neighbors")
title_str = "ML CUP - Weights = " + best1_weigths + ", " + "metric = " + best1_metric + ", KNN target 1 "
plt.title(title_str)
best_k_sting = "best n_neighbors = " + str(best_model1_table["param_n_neighbors"].iloc[0])
x_best = list(models1["mean_test_neg_mean_squared_error"]).index(max(models1["mean_test_neg_mean_squared_error"]))+1
plt.axvline(x=x_best, color='g', label = best_k_sting)
plt.legend()

### Mean and std MEE computed over TR vs n_neighbors (k) for each model of "models1" table

In [None]:
x_train = np.array(models1["param_n_neighbors"], dtype = int)
y_train = np.array(-models1["mean_train_neg_mean_squared_error"], dtype = float)
std_y_train = np.array(models1["std_train_neg_mean_squared_error"], dtype = float)


fig, ax2 = plt.subplots()
ax2.plot(x_train,y_train, color = "r", linestyle = "-", marker = ".", markersize = 8, linewidth = 1, label = "Training MEE")
ax2.fill_between(x_train, y_train-std_y_train, y_train+std_y_train, alpha = 0.4, label = "$\pm$ std(Training \ MEE)")
ax2.grid()
ax2.set_ylabel("MEE")
ax2.set_xlabel("n_neighbors")
title_str = "ML CUP - Weights = " + best1_weigths + ", " + "metric = " + best1_metric + ", KNN target 1 "
ax2.set_title(title_str)
best_k_sting = "best n_neighbors = " + str(best_model1_table["param_n_neighbors"].iloc[0])
x_best = list(models1["mean_test_neg_mean_squared_error"]).index(max(models1["mean_test_neg_mean_squared_error"])) +1
ax2.axvline(x=x_best, color='g', label = best_k_sting)
ax2.legend()

# Model Assesment - KNN - Output: target 1 

- Mean Absolute Error of the model trained over the desing set (X_tr, y1_tr)
- Evaluating the MAE over the inner Test Set (X_tt, y1_tt)

In [None]:
final_fitted_knn1 = grid_knn1.best_estimator_

y1_pred_tr = final_fitted_knn1.predict(X_tr)
y1_pred_tt = final_fitted_knn1.predict(X_tt)


train_MAE_target1 =  mean_absolute_error(y1_tr, y1_pred_tr)
test_MAE_target1 = mean_absolute_error(y1_tt, y1_pred_tt)

print(f"Train MAE target 1: {train_MAE_target1}")
print(f"Test MAE target 1: {test_MAE_target1}")

# 

# KNN model with target 2 as output

- Approach:
-1) Initial Hold-out of "ML-CUP22-TR" dataset: Design set (X_tr, y2_tr) + Test set (X_tt, y2_tt)  
-2) Grid Search through a RepeatedKFold (Repeated: 10 times, #folds: 5) over the Design Set
-3) Find the best hyperparameters (basing the decision on the Mean Euclidean Error (MEE))
-4) Compute the MEE over the Test set (X_tt, y2_tt) to give an estimation of the future performances of the model found


## Model Selection - Target 2 

In [None]:
%%time

n_neighbors_v = np.arange(1, 51, step = 1)

param_grid = {
    "n_neighbors": n_neighbors_v,
    "weights": ["distance", 'uniform'],
    "metric": ["cityblock", "euclidean", "cosine"]
}

grid_knn2 = GridSearchCV(
    KNeighborsRegressor(),
    param_grid=param_grid,
    scoring = ["neg_mean_squared_error"],
    cv=RepeatedKFold(n_splits=5, n_repeats=10, random_state=0),
    n_jobs=-1,
    refit = "neg_mean_squared_error",
    return_train_score = True
)

grid_knn2.fit(X_tr, y2_tr)



cv_results_2 = pd.DataFrame(grid_knn2.cv_results_)


In [None]:
#Saving the grid search results of knn2
pd.DataFrame(grid_knn2.cv_results_).to_csv("grid_knn2.csv")

### Selecting the best model for target 2

In [None]:
#best model table
best_model2_table = cv_results_2[cv_results_2["rank_test_neg_mean_squared_error"] == 1]
best_model2_table

### Extracting the hyperparameters of the best model for target 2

In [None]:
best2_k = best_model2_table["param_n_neighbors"].iloc[0]
best2_weigths = best_model2_table["param_weights"].iloc[0]
best2_metric = best_model2_table["param_metric"].iloc[0]

### From the results of "grid_knn2" select the rows that have the same "metric" and "weights" hyperparameters as the best one
- To do this has been used the function selecting_results (Implemented in the first part of the notebook)

In [None]:
models2 = selecting_results(cv_results_2, best2_metric, best2_weigths)

### Plotting MEE vs n_neighbors (k) for each model of "models2" table

In [None]:
x_val = np.array(models2["param_n_neighbors"], dtype = int)
y_val = np.array(-models2["mean_test_neg_mean_squared_error"], dtype = float)
std_y_val = np.array(models2["std_test_neg_mean_squared_error"], dtype = float)

fig, ax1 = plt.subplots()
ax1.plot(x_val, y_val, color = "r", linestyle = "-", marker = ".", markersize = 8, linewidth = 1, label = "Validation MEE")
ax1.fill_between(x_val, y_val-std_y_val, y_val+std_y_val, alpha = 0.4, label = "$\pm$ std(Validation MEE)")
ax1.grid()
ax1.set_ylabel("MEE")
ax1.set_xlabel("n_neighbors")
title_str = "ML CUP - Weights = " + best2_weigths + ", " + "metric = " + best2_metric + ", KNN target 2 "
ax1.set_title(title_str)
best_k_sting = "best n_neighbors = " + str(best_model2_table["param_n_neighbors"].iloc[0])
x_best = list(models2["mean_test_neg_mean_squared_error"]).index(max(models2["mean_test_neg_mean_squared_error"])) +1
ax1.axvline(x=x_best, color='g', label = best_k_sting)
ax1.legend()

In [None]:
plt.scatter(models2["param_n_neighbors"], models2["std_test_neg_mean_squared_error"], marker='.', label = "Validation std(MEE)")
plt.grid()
plt.ylabel("MSE std.")
plt.xlabel("n_neighbors")
title_str = "ML CUP - Weights = " + best2_weigths + ", " + "metric = " + best2_metric + ", KNN target 2 "
plt.title(title_str)
best_k_sting = "best n_neighbors = " + str(best_model2_table["param_n_neighbors"].iloc[0])
x_best = list(models2["mean_test_neg_mean_squared_error"]).index(max(models2["mean_test_neg_mean_squared_error"])) + 1
plt.axvline(x=x_best, color='g', label = best_k_sting)
plt.legend()

### Mean and std MEE computed over TR vs n_neighbors (k) for each model of "models2" table

In [None]:
x_train = np.array(models2["param_n_neighbors"], dtype = int)
y_train = np.array(-models2["mean_train_neg_mean_squared_error"], dtype = float)
std_y_train = np.array(models2["std_train_neg_mean_squared_error"], dtype = float)


fig, ax2 = plt.subplots()
ax2.plot(x_train,y_train, color = "r", linestyle = "-", marker = ".", markersize = 8, linewidth = 1, label = "Training MEE")
ax2.fill_between(x_train, y_train-std_y_train, y_train+std_y_train, alpha = 0.4, label = "$\pm$ std(Training \ MEE)")
ax2.grid()
ax2.set_ylabel("MEE")
ax2.set_xlabel("n_neighbors")
title_str = "ML CUP - Weights = " + best2_weigths + ", " + "metric = " + best2_metric + ", KNN target 2 "
ax2.set_title(title_str)
best_k_sting = "best n_neighbors = " + str(best_model2_table["param_n_neighbors"].iloc[0])
x_best = list(models2["mean_test_neg_mean_squared_error"]).index(max(models2["mean_test_neg_mean_squared_error"])) +1
ax2.axvline(x=x_best, color='g', label = best_k_sting)
ax2.legend()

# Model Assesment - KNN - Output: target 2 

- Mean Absolute Error of the model trained over the desing set (X_tr, y2_tr)
- Evaluating the MAE over the inner Test Set (X_tt, y2_tt)

In [None]:
final_fitted_knn2 = grid_knn2.best_estimator_

y2_pred_tr = final_fitted_knn2.predict(X_tr)
y2_pred_tt = final_fitted_knn2.predict(X_tt)


train_MAE_target2 =  mean_absolute_error(y2_tr, y2_pred_tr)
test_MAE_target2 = mean_absolute_error(y2_tt, y2_pred_tt)


print(f"Train MAE target 2: {train_MAE_target2}")
print(f"Test MAE target 2: {test_MAE_target2}")


## Assessing the goodness of the models (knn_1 and knn2) found by calculating the Mean Euclidean Error (MEE)

In [None]:
combined_knn_TRAIN_MEE = (1/y1_tr.shape[0])*np.sqrt(np.square(y1_tr - y1_pred_tr) + np.square(y2_tr - y2_pred_tr)).sum()
combined_knn_TEST_MEE = (1/y1_tt.shape[0])*np.sqrt(np.square(y1_tt - y1_pred_tt) + np.square(y2_tt - y2_pred_tt)).sum()

print(f"Train MEE (Model 1 and 2 combined): {combined_knn_TRAIN_MEE}")
print(f"Test MEE (Model 1 and 2 combined): {combined_knn_TEST_MEE}")

##  Predicting target 1 and 2 of blind test using respectively "final_fitted_knn1" and "final_fitted_knn2" 

In [None]:
y1_blind_knn1 = final_fitted_knn1.predict(blind_ts)
y2_blind_knn2 = final_fitted_knn2.predict(blind_ts)

## Saving precition over blind test for each model in a csv file

In [None]:
df_blind_predictions_knn = (pd.DataFrame([y1_blind_double_knn, 
                                          y2_blind_double_knn,
                                          y1_blind_knn1,
                                          y2_blind_knn2 ]).T).rename(columns = {  0: "y1_blind_double_knn",
                                                                                  1: "y2_blind_double_knn",
                                                                                  2: "y1_blind_knn1",
                                                                                  3: "y2_blind_knn2"
                                                                               }
                                                                    )
df_blind_predictions_knn.to_csv("blind_predictions_knn.csv")