# Import librerie e training set

In [None]:
#!pip install tensorflow

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import math
from tqdm.notebook import tqdm

from sklearn.model_selection import train_test_split, KFold
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer

from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.multioutput import MultiOutputRegressor

from tensorflow import keras
from keras import Sequential, layers
from keras.layers import Dense 
from keras.optimizers import SGD, RMSprop
from keras import backend as K
from keras.wrappers.scikit_learn import KerasRegressor

In [None]:
df = pd.read_csv('ML-CUP22-TR.csv', header=None, skiprows=7, index_col=0) #, skipinitialspace=True)
df.reset_index(drop=True, inplace=True)
df.head()

In [None]:
df_blind = pd.read_csv('ML-CUP22-TS.csv', header=None, skiprows=7, index_col=0)
df_blind.reset_index(drop=True, inplace=True)
df_blind.head()

In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.dtypes

In [None]:
X = df.iloc[:, 0:9].values
X

In [None]:
X.shape

In [None]:
y = df.iloc[:, 9:11].values
y


# Partitioning

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)

In [None]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

# Support Functions

## MEE

In [None]:
def mean_euclidean_error(y_true, y_pred):
    return K.mean(K.sqrt(K.sum(K.square(y_pred - y_true), axis=-1)))

In [None]:
def mean_euclidean_error_skit_friendly(T, O):
    sum = 0
    for t, o in zip(T, O):
        sum += np.linalg.norm(t - o) / T.shape[0]
    return sum

## SGD

In [None]:
def create_bestSGDmodel(num_neurons=[50,50,100], l_rates=0.0001, decays=0.0001, nesterov=True, momentum=0.5):
      num_layers = len(num_neurons)
      model = Sequential()
      model.add(Dense(num_neurons[0], input_shape=(9,), activation='relu'))
      for i in range(num_layers-1):
              model.add(Dense(num_neurons[i], activation='relu'))
      model.add(Dense(2, activation='linear'))
      
      model.compile(
                    optimizer=SGD(learning_rate=l_rates, momentum=momentum, decay=decays, nesterov=nesterov),
                    loss=['mean_squared_error'],
                    metrics=[mean_euclidean_error]
                    )
      return model

## RMSprop

In [None]:
def create_bestRMSprop_model(num_neurons=[20,50,50], l_rates=0.0001, decays=0.0, momentum=0.2):
    num_layers = len(num_neurons)
    model = Sequential()
    model.add(Dense(num_neurons[0], input_shape=(9,), activation='relu'))
    for i in range(num_layers-1):
            model.add(Dense(num_neurons[i], activation='relu'))
    model.add(Dense(2, activation='linear'))
    

    model.compile(
                          optimizer=RMSprop(learning_rate=l_rates, momentum=momentum, decay=decays),
                          loss=['mean_squared_error'],
                          metrics=[mean_euclidean_error])

    return model

## Cross-validation

In [None]:
def cross_val(model, model_name, folds=10, X=X, y=y):
  cv = KFold(n_splits=folds, shuffle=True, random_state=42)
  scores = cross_val_score(model, X, y, cv=folds, scoring=make_scorer(mean_euclidean_error_skit_friendly, greater_is_better=False))
  scores_mean = scores.mean()
  scores_std = scores.std()
  print('scores', model_name)
  print('mean: %.3f' %scores_mean) 
  print('std: %.3f' %scores_std)
  print()


# Evaluating MLP

## SGD

### Computing training time

In [None]:
%%time
model = KerasRegressor(build_fn=create_bestSGDmodel, epochs=1000, batch_size=10, verbose=0)
model.fit(X_train, y_train)                    

In [None]:
%%time
#model = KerasRegressor(build_fn=create_bestSGDmodel, epochs=1000, batch_size=10, verbose=0)                    
scores = cross_val_score(model, X, y, cv=10, scoring=make_scorer(mean_euclidean_error_skit_friendly, greater_is_better=False))
scores.mean(), scores.std()                    


## RMSprop

### Computing training time

In [None]:
%%time
model = KerasRegressor(build_fn=create_bestRMSprop_model, epochs=1000, batch_size=10, verbose=0)
model.fit(X_train, y_train) 

In [None]:
%%time
#model = KerasRegressor(build_fn=create_bestRMSprop_model, epochs=1000, batch_size=10, verbose=0)
scores = cross_val_score(model, X, y, cv=10, scoring=make_scorer(mean_euclidean_error_skit_friendly, greater_is_better=False))
scores.mean(), scores.std()                    


# Evaluating Decision Tree

In [None]:
#dt1 = DecisionTreeRegressor(ccp_alpha=0.0, criterion='friedman_mse', max_depth=6)
dt2 = DecisionTreeRegressor(ccp_alpha=0.0, criterion='squared_error', max_depth=5)

### Computing training time

In [None]:
%%time
dt2.fit(X_train, y_train)

In [None]:
%%time
#cross_val(dt1, 'DT1')
cross_val(dt2,  'DT2')

# Evaluating Random Forest

In [None]:
#rf1 = RandomForestRegressor(n_estimators=100, max_features='log2', max_depth=10, min_samples_split=6, bootstrap=True)
rf2 = RandomForestRegressor(n_estimators=100, max_features='log2', max_depth=10, min_samples_split=6, bootstrap=True)

## Computing training time

In [None]:
rf2.fit(X_train, y_train)

In [None]:
%time
#cross_val(rf1, 'RF1')
cross_val(rf2,  'RF2')

# Evaluating KNN

In [None]:
#knn1 = KNeighborsRegressor(n_jobs=23, algorithm='brute', metric='euclidean', weights='distance')
knn2 = KNeighborsRegressor(n_jobs=24, algorithm='brute', metric='euclidean', weights='distance')


## Computing training time

In [None]:
knn2.fit(X_train, y_train)

In [None]:
%time
#cross_val(knn1, 'KNN1')
cross_val(knn2,  'KNN2')

# Evaluating SVR

In [None]:
svr1 = SVR(kernel='rbf', C=1, epsilon=0.1, gamma=0.125)
svr2 = SVR(kernel='rbf', C=1, epsilon=0.1, gamma=0.25)

In [None]:
y1 = np.array([e[0] for e in y])
y2 = np.array([e[1] for e in y])



In [None]:
%time
cross_val(svr1, 'SVR1', y=y1)
cross_val(svr2, 'SVR2', y=y2)

In [None]:
svr_reg = MultiOutputRegressor(SVR(kernel='rbf', C=0.1, gamma=0.125, epsilon=0.1))


## Computing training time

In [None]:
%%time
svr_reg.fit(X_train, y_train)

In [None]:
%time
cross_val(svr_reg, 'SVR ensamble')


# Best model predictions

In [None]:
rf = RandomForestRegressor(n_estimators=100, max_features='log2', max_depth=10, min_samples_split=6, bootstrap=True)

In [None]:
cv=KFold(n_splits=5, shuffle=True, random_state=42)

scores = cross_val_score(rf2, X_train, y_train, cv=cv, scoring=make_scorer(mean_euclidean_error_skit_friendly, greater_is_better=False))
scores.mean(), scores.std()

In [None]:
scores = cross_val_score(rf2, X, y, cv=cv, scoring=make_scorer(mean_euclidean_error_skit_friendly, greater_is_better=False))
scores.mean(), scores.std()

In [None]:
rf.fit(X_train, y_train)
y_pred_train = rf.predict(X_train)

In [None]:
mean_euclidean_error_skit_friendly(y_pred_train, y_train)

# Plotting results

In [None]:
rf.fit(X_train, y_train)
y_pred = rf2.predict(X_test)
y_pred = pd.DataFrame(y_pred, columns=['x', 'y'])

y_testdf = pd.DataFrame(y_test, columns=['x', 'y'])

In [None]:
sns.scatterplot(x=y_testdf['x'], y=y_testdf['y'], label="True", marker="X", palette=['green'])
sns.scatterplot(x=y_pred['x'], y=y_pred['y'], label="Predicted",palette=['green'])

plt.legend()
plt.xlabel('y1', fontsize=14)
plt.ylabel('y2', fontsize=14)
plt.legend(fontsize=14)
plt.savefig("RFpredictions.pdf", dpi=300, bbox_inches="tight")
plt.show()

In [None]:
y_blind_pred = rf2.predict(df_blind.values)
y_test_blind_df = pd.DataFrame(y_blind_pred, columns=['x', 'y'])
sns.scatterplot(x=y_test_blind_df['x'], y=y_test_blind_df['y'])
plt.xlabel('y1', fontsize=14)
plt.ylabel('y2', fontsize=14)
plt.savefig("RF_blind_predictions.pdf", dpi=300, bbox_inches="tight")
plt.show()

In [None]:
y_test_blind_df.index=[i for i in np.arange(1,len(y_test_blind_df)+1)]

In [None]:
y_test_blind_df.to_csv('Hoddmìmir_ML-CUP22_TS.csv', header=False)
