# Max Voting

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
from xgboost import XGBClassifier
import tensorflow as tf 
from tensorflow import keras 
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from scikeras.wrappers import KerasClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import VotingClassifier

#### Read data

In [None]:
df = pd.read_csv('../../../datasets/parte2/treino/dataset_prepared.csv', na_filter=False)

In [None]:
replace_map = {'None':0, 'Low':1, 'Medium':2, 'High':3, 'Very High':4}

df['injection'] = df['injection'].replace(replace_map).astype(int)

#### X and y arrays

In [None]:
X = df.drop(['injection'], axis=1)
y = df[['injection']]

#### Train Test Split

Now let's split the data into a training set and a testing set. We will train out model on the training set and then use the test set to evaluate the model.

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=2023, stratify=y)

## XGBoost

#### Training 

Using GridSearchCV to find the best hyperparameters

In [None]:
xgb_model = XGBClassifier(random_state=2023, learning_rate=0.1, max_depth=4, n_estimators=200)

In [None]:
xgb_model.fit(X_train, y_train)

## Neural Networks

### Normalization

In [None]:
scaler_X = MinMaxScaler(feature_range=(0, 1)).fit(X)
X_train = pd.DataFrame(scaler_X.transform(X_train[X_train.columns]), columns=X_train.columns)

#y = pd.DataFrame(scaler_y.transform(y[y.columns]), columns=y.columns)

X_test = pd.DataFrame(scaler_X.transform(X_test[X_test.columns]), columns=X_test.columns)

#### Train Test Split

Now let's split the data into a training set and a testing set. We will train out model on the training set and then use the test set to evaluate the model.

In [None]:
num_classes = 5
y_train = tf.keras.utils.to_categorical(y_train, num_classes)
y_test = tf.keras.utils.to_categorical(y_test, num_classes)

In [None]:
def build_model(activation='relu', learning_rate=0.005, dropout_rate=0.1):
    ann_model = Sequential()
    ann_model.add(Dense(64, input_dim=X_train.shape[1], activation=activation))
    ann_model.add(Dropout(dropout_rate))  # Adiciona Dropout após a primeira camada densa
    ann_model.add(Dense(32, activation=activation))
    ann_model.add(Dropout(dropout_rate))  # Adiciona Dropout após a segunda camada densa
    ann_model.add(Dense(5, activation='softmax')) # output 
    
    #Compile the model
    ann_model.compile(
    loss = 'categorical_crossentropy',
    optimizer = 'adam',
    metrics = ['accuracy'])

    return ann_model 

In [None]:
ann_model = build_model()
ann_model.summary()

In [None]:
ann_model = KerasClassifier(build_fn=build_model, batch_size=32, validation_split=0.2, epochs=100)

In [None]:
ann_model.fit(X_train, y_train, epochs=100, validation_data=(X_test, y_test))

## Max Voting

In [None]:
hvt_model = VotingClassifier(estimators=[("ann", ann_model), ("xgb", xgb_model)], voting='hard', weights=[1,2])

In [None]:
hvt_model.fit(X_train, y_train)

In [None]:
hvt_score = hvt_model.score(X_test, y_test)
print("Accuracy: %.2f%%" % (hvt_score*100))

In [None]:
hvt_predictions = hvt_model.predict(X_test)

In [None]:
print(classification_report(y_test, hvt_predictions))

In [None]:
hvt_predictions.to_csv('../../../datasets/parte2/teste/ensemble_results.csv', index=False)

In [None]:
#for model, label in zip([ann_model, xgb_model, hvt_model], ['ann', 'xgb', 'ensemble']):
#    hvt_score = cross_val_score(model, X_test, y_test, scoring='accuracy', cv=10)
#    print("Accuracy: %0.2f (+/- %0.2f) [%s]")