In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import GridSearchCV, train_test_split
import tensorflow as tf 
from tensorflow import keras 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from scikeras.wrappers import KerasClassifier
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from sklearn.metrics import classification_report, ConfusionMatrixDisplay, accuracy_score


#### Read data

In [None]:
df = pd.read_csv('../../../datasets/parte2/treino/dataset_prepared.csv', na_filter= False)

In [None]:
lb_make = LabelEncoder()
df['Injeção na rede (kWh)'] = lb_make.fit_transform(df['Injeção na rede (kWh)'])

X = df.drop('Injeção na rede (kWh)', axis=1)
y = df[['Injeção na rede (kWh)']]

### Normalization

In [None]:
scaler_X = MinMaxScaler(feature_range=(0, 1)).fit(X)
scaler_y = MinMaxScaler(feature_range=(0, 1)).fit(y)
X = pd.DataFrame(scaler_X.transform(X[X.columns]), columns=X.columns)

y = tf.keras.utils.to_categorical(y, 5)

In [None]:
df.head

#### Train Test Split

Now let's split the data into a training set and a testing set. We will train out model on the training set and then use the test set to evaluate the model.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=2023, stratify=y)

In [None]:
def build_model(activation='relu', learning_rate=0.005, dropout_rate=0.1):
    model = Sequential()
    model.add(Dense(64, input_dim=X.shape[1], activation=activation ))
    model.add(Dropout(dropout_rate))
    model.add(Dense(32, activation=activation))
    model.add(Dropout(dropout_rate))
    model.add(Dense(5, activation='softmax')) 
    
    #Compile the model
    model.compile(
    loss = 'categorical_crossentropy',
    optimizer = 'adam',
    metrics = ['accuracy'])
    
    return model 

In [None]:
model = build_model()
model.summary()

In [None]:
param_grid = {
    'optimizer': ['SGD', 'RMSprop', 'Adagrad'] 
} 

In [None]:
model = KerasClassifier(model=build_model, batch_size=32, validation_split=0.2, epochs=100)

In [None]:
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=15, scoring='accuracy', refit=True, verbose=2, n_jobs=-1)

In [None]:
grid_search.fit(X, y)

In [None]:
best_mlp_model = grid_search.best_estimator_

In [None]:
best_mlp_model.fit(X, y, epochs=100, validation_data = (X_test, y_test), verbose = 1)

#### Learning Curves

In [None]:
plt.plot(best_mlp_model.history_['loss'])
plt.plot(best_mlp_model.history_['val_loss'])
plt.title('Model Performance')
plt.ylabel('Loss values')
plt.xlabel('Epochs')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
predictions = best_mlp_model.predict(X_test)

In [None]:
accuracy_score(y_test, predictions)

In [None]:
print(classification_report(y_test,predictions))

Get the predictions using the trained model

In [None]:
df_test = pd.read_csv('../../../datasets/parte2/test_prepared.csv')
predictions_test = model.predict(df_test)

In [None]:
df_predictions = pd.DataFrame({'Result': predictions_test})
df_predictions['RowId'] = range(1, len(predictions_test) + 1)
df_predictions = df_predictions[['RowId', 'Result']]

replace_map = { 0:'None', 1:'Low', 2:'Medium', 3:'High', 4:'Very High'} 

df_predictions['Result'] = df_predictions['Result'].replace(replace_map)

df_predictions.to_csv('../../../datasets/parte2/kaggle.csv', index=False)