# Neutral Network

In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import seaborn as sns 
from sklearn.model_selection import GridSearchCV,KFold,train_test_split
import tensorflow as tf 
from tensorflow import keras 
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Dense, Dropout
from scikeras.wrappers import KerasRegressor
from sklearn import metrics
from sklearn.preprocessing import MinMaxScaler
%matplotlib inline 

### Read data

In [None]:
df = pd.read_csv('../../../../datasets/parte1/dataset_cleaned.csv')

In [None]:
df.shape

### X and y arrays

In [None]:
X = df.drop(['MaxTemp'], axis=1)
y = df[['MaxTemp']]

#### Train Test Split

Now let's split the data into a training set and a testing set. We will train out model on the training set and then use the test set to evaluate the model.

In [None]:
scaler_X = MinMaxScaler(feature_range=(0, 1)).fit(X)
scaler_y = MinMaxScaler(feature_range=(0, 1)).fit(y)
X = pd.DataFrame(scaler_X.transform(X[X.columns]), columns=X.columns)
y = pd.DataFrame(scaler_y.transform(y[y.columns]), columns=y.columns)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2023)

In [None]:
def build_model(activation = 'relu', learning_rate = 0.001):
    model = Sequential()
    model.add(Dense(16,input_dim = 22, activation = activation))
    model.add(Dense(8, activation = activation))
    model.add(Dense(1, activation = activation)) # output 

    #Compile the model 
    model.compile(
        loss = 'mae',
        optimizer  = tf.optimizers.Adam(learning_rate),
        metrics = ['mae', 'mse'])
    return model 
    

In [None]:
model = build_model()
model.summary()

#### Training 

Using GridSearchCV to find the best hyperparameters

In [None]:
optimizer = ['SGD','RMSprop','Adagrad']
param_grid = dict(optimizer = optimizer)


In [None]:
kf = KFold(n_splits = 5 , shuffle= True , random_state=2023)

In [None]:
model = KerasRegressor(model = build_model, batch_size=32, validation_split=0.3, epochs = 80)

In [None]:
grid_search = GridSearchCV(estimator = model, param_grid= param_grid, cv = kf, scoring = 'neg_mean_absolute_error', refit=True, verbose = 2, n_jobs=-1)

#### Fit the Model

In [None]:
grid_search.fit(X_train,y_train)

In [None]:
print("Best: %f using %s" % (grid_search.best_score_,grid_search.best_params_))

Inspect the best parameters

In [None]:
best_mlp_model = grid_search.best_estimator_
print(best_mlp_model)

#### Fit Model

In [None]:
best_mlp_model.fit(X_train, y_train, epochs=80, validation_data = (X_test,y_test), verbose = 2)

#### Learning Curves

In [None]:
plt.plot(best_mlp_model.history_['loss'])
plt.plot(best_mlp_model.history_['val_loss'])
plt.title('Model Performance')
plt.ylabel('Loss values')
plt.xlabel('Epochs')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
pd.DataFrame(best_mlp_model.history_).plot(figsize = (8,5))
plt.show()

#### Predictions 

In [None]:
predictions = best_mlp_model.predict(X_test)

In [None]:
predictions_unscaled = scaler_y.inverse_transform(predictions)

In [None]:
predictions_unscaled[:10]

In [None]:
y_test_unscaled = scaler_y.inverse_transform(y_test)

In [None]:
y_test_unscaled[:10]

#### Regression Report 

In [None]:
print('R2:', metrics.r2_score(y_test_unscaled, predictions_unscaled))
print('MAE:', metrics.mean_absolute_error(y_test_unscaled, predictions_unscaled))
print('MSE:', metrics.mean_squared_error(y_test_unscaled, predictions_unscaled))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test_unscaled, predictions_unscaled)))

In [None]:
plt.scatter(y_test_unscaled, predictions_unscaled)

In [None]:
sns.histplot((y_test_unscaled-predictions_unscaled), bins=50, kde=True)

In [None]:
plt.scatter(y_test_unscaled, predictions_unscaled)
plt.xlabel('Valores Reais')
plt.ylabel('Previsões')
plt.title('Regressão Linear Múltipla')
plt.show()