# Max Voting

In [37]:
import matplotlib.pyplot as plt
import pandas as pd
from xgboost import XGBClassifier
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import VotingClassifier,RandomForestClassifier

#### Read data

In [38]:
df = pd.read_csv('../../../datasets/parte2/train_prepared.csv', na_filter=False)

In [39]:
replace_map = {'None':0, 'Low':1, 'Medium':2, 'High':3, 'Very High':4}

df['Injeção na rede (kWh)'] = df['Injeção na rede (kWh)'].replace(replace_map).astype(int)

#### X and y arrays

In [40]:
X = df.drop(['Injeção na rede (kWh)'], axis=1)
y = df[['Injeção na rede (kWh)']]

#### Train Test Split

Now let's split the data into a training set and a testing set. We will train out model on the training set and then use the test set to evaluate the model.

In [41]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=2023, stratify=y)

## XGBoost

#### Training 

Using GridSearchCV to find the best hyperparameters

In [42]:
xgb_model = XGBClassifier(random_state=2023, learning_rate=0.1, max_depth=4, n_estimators=200)

In [43]:
xgb_model.fit(X_train, y_train)

## Random Forest

In [44]:
rf_model = RandomForestClassifier(random_state=2023)

## Max Voting

In [45]:
hvt_model = VotingClassifier(estimators=[("rf", rf_model), ("xgb", xgb_model)], voting='hard', weights=[1,2])

In [46]:
hvt_model.fit(X_train, y_train)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, dtype=self.classes_.dtype, warn=True)


In [47]:
hvt_score = hvt_model.score(X_test, y_test)
print("Accuracy: %.2f%%" % (hvt_score*100))

Accuracy: 88.60%


In [48]:
hvt_predictions = hvt_model.predict(X_test)

In [49]:
print(classification_report(y_test, hvt_predictions))

              precision    recall  f1-score   support

           0       0.97      0.98      0.97      1944
           1       0.43      0.28      0.34       108
           2       0.64      0.67      0.65       274
           3       0.72      0.73      0.72       276
           4       0.79      0.82      0.81       152

    accuracy                           0.89      2754
   macro avg       0.71      0.69      0.70      2754
weighted avg       0.88      0.89      0.88      2754

