In [None]:
"""
O código usa o método ensemble para combinar as previsões de vários modelos de classificação.
Os modelos individuais são um Decision Tree, um Random Forest, um Gradient Boosting, um Naive Bayes e um CatBoost.
Os dados são carregados de um arquivo CSV e codificados usando one-hot encoding.
Os dados são então divididos em conjuntos de treinamento e teste.
Os modelos individuais são treinados nos dados de treinamento e suas previsões são feitas nos dados de teste.
As previsões dos modelos individuais são combinadas para produzir uma única previsão.
A precisão do modelo ensemble nos dados de teste é calculada.
"""

import pandas as pd
from scipy.stats import mode
from sklearn.model_selection import train_test_split, RandomizedSearchCV, cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import MultinomialNB
from catboost import CatBoostClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from imblearn.over_sampling import SMOTE
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import StratifiedKFold
from joblib import dump, load
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Carregar os dados do arquivo CSV
data = pd.read_csv(
    fr"C:\Users\crist\Desktop\dados.csv",
    sep =','
)

# Aplicar a codificação one-hot nas colunas categóricas
data = pd.get_dummies(data, columns=["j_posicao"])
data = pd.get_dummies(data, columns=["j_rota"])
data = pd.get_dummies(data, columns=["j_jardas"])
data = pd.get_dummies(data, columns=["j_catch"])
data = pd.get_dummies(data, columns=["j_area_recepcao"])

# Separar as features (X) e o target (y)
X = data.drop(columns=["target"])  # Remover a coluna "target" das features
y = data["target"]

# Aplicar SMOTE para balancear os dados
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Normalização min-max das features
scaler = MinMaxScaler()
X_resampled_normalized = scaler.fit_transform(X_resampled)

# Dividir os dados em conjuntos de treinamento e teste
X_train, X_test, y_train, y_test = train_test_split(X_resampled_normalized, y_resampled, test_size=0.2, random_state=42)

In [None]:
# Criar os modelos individuais
decision_tree_model = DecisionTreeClassifier(random_state=42)
random_forest_model = RandomForestClassifier(random_state=42)
gradient_boosting_model = GradientBoostingClassifier(random_state=42)
naive_bayes_model = MultinomialNB()
catboost_model = CatBoostClassifier(random_state=42, verbose=0)

# Definir as distribuições dos hiperparâmetros para a busca aleatória
param_dist_decision_tree = {
    'max_depth': [None, 5, 10, 15, 20],
    'min_samples_split': [2, 5, 10, 20, 50],
    'criterion': ['gini', 'entropy'],
    'max_features': ['auto', 'sqrt', 'log2']
}

param_dist_random_forest = {
    'n_estimators': [50, 100, 150, 200, 250],
    'max_depth': [None, 5, 10, 15, 20],
    'min_samples_split': [2, 5, 10, 20, 50],
    'criterion': ['gini', 'entropy'],
    'max_features': ['auto', 'sqrt', 'log2'],
    'bootstrap': [True, False]
}

param_dist_gradient_boosting = {
    'n_estimators': [50, 100, 150, 200, 250],
    'learning_rate': [0.01, 0.1, 0.5, 1.0],
    'max_depth': [3, 5, 7, 9],
    'subsample': [0.8, 0.9, 1.0],
    'min_samples_split': [2, 5, 10, 20]
}

param_dist_naive_bayes = {
    'class_prior': [None, [0.2, 0.8], [0.5, 0.5]],
    'fit_prior': [True, False],
    'alpha': [0.1, 0.5, 1.0]
}

param_dist_catboost = {
    'iterations': [100, 200, 300],
    'depth': [4, 6, 8, 10],
    'learning_rate': [0.01, 0.1, 0.2],
    'l2_leaf_reg': [1, 3, 5]
}

# Definições para validação cruzada interna
n_folds = 15
cv_strategy = StratifiedKFold(n_splits=n_folds, shuffle=True, random_state=42)

# Criar os objetos RandomizedSearchCV com os modelos e as distribuições de hiperparâmetros
random_search_decision_tree = RandomizedSearchCV(decision_tree_model, param_dist_decision_tree, n_iter=25, cv=cv_strategy, scoring='accuracy', random_state=42)
random_search_random_forest = RandomizedSearchCV(random_forest_model, param_dist_random_forest, n_iter=25, cv=cv_strategy, scoring='accuracy', random_state=42)
random_search_gradient_boosting = RandomizedSearchCV(gradient_boosting_model, param_dist_gradient_boosting, n_iter=25, cv=cv_strategy, scoring='accuracy', random_state=42)
random_search_naive_bayes = RandomizedSearchCV(naive_bayes_model, param_dist_naive_bayes, n_iter=25, cv=cv_strategy, scoring='accuracy', random_state=42)
random_search_catboost = RandomizedSearchCV(catboost_model, param_dist_catboost, n_iter=25, cv=cv_strategy, scoring='accuracy', random_state=42)

# Realizar a busca aleatória para encontrar as melhores combinações de hiperparâmetros para cada modelo
random_search_decision_tree.fit(X_resampled_normalized, y_resampled)
random_search_random_forest.fit(X_resampled_normalized, y_resampled)
random_search_gradient_boosting.fit(X_resampled_normalized, y_resampled)
random_search_naive_bayes.fit(X_resampled_normalized, y_resampled)
random_search_catboost.fit(X_resampled_normalized, y_resampled)

# Obter os melhores modelos após a aplicação da busca aleatória
best_decision_tree_model = random_search_decision_tree.best_estimator_
best_random_forest_model = random_search_random_forest.best_estimator_
best_gradient_boosting_model = random_search_gradient_boosting.best_estimator_
best_naive_bayes_model = random_search_naive_bayes.best_estimator_
best_catboost_model = random_search_catboost.best_estimator_

In [None]:
# Fazer previsões usando cada modelo otimizado
y_pred_decision_tree = best_decision_tree_model.predict(X_test)
y_pred_random_forest = best_random_forest_model.predict(X_test)
y_pred_gradient_boosting = best_gradient_boosting_model.predict(X_test)
y_pred_naive_bayes = best_naive_bayes_model.predict(X_test)
y_pred_catboost = best_catboost_model.predict(X_test).squeeze()

ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_decision_tree[i], y_pred_random_forest[i], y_pred_gradient_boosting[i], y_pred_naive_bayes[i], y_pred_catboost[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

In [None]:
# DADOS DO MODELO

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
ensemble_classification_report =classification_report(y_test, ensemble_predictions) 
print(ensemble_classification_report)

# Matriz de confusão do modelo de ensemble nos dados de teste
conf_matrix = confusion_matrix(y_test, ensemble_predictions)
print("Matriz de Confusão:")
print(conf_matrix)

# Avaliar o modelo com validação cruzada
cv_scores_decision_tree = cross_val_score(best_decision_tree_model, X_resampled_normalized, y_resampled, cv=15, scoring='accuracy')
cv_scores_random_forest = cross_val_score(best_random_forest_model, X_resampled_normalized, y_resampled, cv=15, scoring='accuracy')
cv_scores_gradient_boosting = cross_val_score(best_gradient_boosting_model, X_resampled_normalized, y_resampled, cv=15, scoring='accuracy')
cv_scores_naive_bayes = cross_val_score(naive_bayes_model, X_resampled_normalized, y_resampled, cv=15, scoring='accuracy')
cv_scores_catboost = cross_val_score(best_catboost_model, X_resampled_normalized, y_resampled, cv=15, scoring='accuracy')

print("Acurácia média com validação cruzada - Decision Tree:", cv_scores_decision_tree.mean())
print("Acurácia média com validação cruzada - Random Forest:", cv_scores_random_forest.mean())
print("Acurácia média com validação cruzada - Gradient Boosting:", cv_scores_gradient_boosting.mean())
print("Acurácia média com validação cruzada - Naive Bayes:", cv_scores_naive_bayes.mean())
print("Acurácia média com validação cruzada - CatBoost:", cv_scores_catboost.mean())

In [None]:
#Matriz Confusão

# Personalize os rótulos e o título
labels = ["N", "P", "PR"]
labels_pred = ["Real N   ", "Real P   ", "Real PR"]
title = "Matriz de Confusão"

# Crie um mapa de calor da matriz de confusão
fig, ax = plt.subplots(figsize=(5, 2))
sns.heatmap(
    conf_matrix, 
    annot=True, 
    cmap="Greys", 
    vmin=0, 
    vmax=40, 
    cbar=True, 
    xticklabels=labels, 
    yticklabels=labels_pred, 
    linewidths=0.5,
    linecolor='black'
)

# # Personalize as bordas do mapa de calor
# ax.spines['bottom'].set_color('black')
# ax.spines['left'].set_color('black')
# ax.spines['top'].set_color('black')
# ax.spines['right'].set_color('black')

# Adicione linhas
#ax.axhline(y=0, color='black', linewidth=1.5)
#ax.axvline(x=0, color='black', linewidth=1.5)
ax.axhline(y=len(labels_pred), color='black', linewidth=1.5)
ax.axvline(x=len(labels), color='black', linewidth=1.5)

# Defina a cor de fundo como preto
# ax.patch.set_facecolor('black')

# Personalize as margens do mapa de calor
# plt.margins(0)

# Mova os rótulos do eixo x para a parte superior
plt.gca().xaxis.tick_top()

# Adicione um título ao gráfico
plt.title(title, fontsize=16, color='black')

# Exiba o gráfico
plt.show()

In [None]:
ensemble_classification_report

In [None]:
import matplotlib.pyplot as plt

# Suponha que você já tenha calculado as métricas de avaliação e as guardado em variáveis
# Por exemplo, ensemble_accuracy, ensemble_classification_report

# Rótulos das métricas
metrics_labels = ["Acurácia", "Precisão", "Recall", "F1-Score"]

# Valores correspondentes às métricas
metrics_values = [ensemble_accuracy,
                  ensemble_classification_report['precision'],
                  ensemble_classification_report['recall'],
                  ensemble_classification_report['f1-score']]

# Crie uma figura e eixos para o gráfico
fig, ax = plt.subplots(figsize=(5, 2))

# Defina as posições das barras
positions = range(len(metrics_labels))

# Crie o gráfico de barras horizontal
plt.barh(positions, metrics_values, color=['gray', 'lightblue', 'lightgreen', 'lightcoral'])

# Personalize os rótulos do eixo x
ax.set_yticks(positions)
ax.set_yticklabels(metrics_labels, fontsize=12, color='black')

# Adicione os valores das métricas nas barras
for i, v in enumerate(metrics_values):
    ax.text(v + 0.02, i, f"{v:.2f}", va='center', fontsize=12, color='black')

# Adicione um título ao gráfico
plt.title("Métricas de Avaliação", fontsize=16, color='black')

# Defina a cor de fundo como preto
ax.set_facecolor('black')

# Exiba o gráfico
plt.show()


In [None]:
# TESTES ENSEMBLE
# y_pred_decision_tree = best_decision_tree_model.predict(X_test)
# y_pred_random_forest = best_random_forest_model.predict(X_test)
# y_pred_gradient_boosting = best_gradient_boosting_model.predict(X_test)
# y_pred_naive_bayes = naive_bayes_model.predict(X_test)
# y_pred_catboost = catboost_model.predict(X_test).squeeze()

# TESTE 1

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_decision_tree
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_decision_tree[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 1:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 2

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_random_forest
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_random_forest[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 2:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 3

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_gradient_boosting
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_gradient_boosting[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 3:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 4

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_naive_bayes
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_naive_bayes[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 4:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 5

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_catboost
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_catboost[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 5:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 12

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_decision_tree,
#     y_pred_random_forest
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_decision_tree[i], y_pred_random_forest[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 12:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 13

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_decision_tree,
#     y_pred_gradient_boosting
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_decision_tree[i], y_pred_gradient_boosting[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 13:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 14

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_decision_tree,
#     y_pred_naive_bayes
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_decision_tree[i], y_pred_naive_bayes[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 14:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 15

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_decision_tree,
#     y_pred_catboost
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_decision_tree[i], y_pred_catboost[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 15:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 23

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_random_forest,
#     y_pred_gradient_boosting
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_random_forest[i], y_pred_gradient_boosting[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 23:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 24

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_random_forest,
#     y_pred_naive_bayes
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_random_forest[i], y_pred_naive_bayes[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 24:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 25

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_random_forest,
#     y_pred_catboost
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_random_forest[i], y_pred_catboost[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 25:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 34

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_gradient_boosting,
#     y_pred_naive_bayes
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_gradient_boosting[i], y_pred_naive_bayes[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 34:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 35

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_gradient_boosting,
#     y_pred_catboost
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_gradient_boosting[i], y_pred_catboost[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 35:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 45

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_naive_bayes,
#     y_pred_catboost
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_naive_bayes[i], y_pred_catboost[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 45:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 123

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_decision_tree,
#     y_pred_random_forest,
#     y_pred_gradient_boosting
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_decision_tree[i], y_pred_random_forest[i], y_pred_gradient_boosting[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 123:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 124

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_decision_tree,
#     y_pred_random_forest,
#     y_pred_naive_bayes
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_decision_tree[i], y_pred_random_forest[i], y_pred_naive_bayes[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 124:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 125

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_decision_tree,
#     y_pred_random_forest,
#     y_pred_catboost
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_decision_tree[i], y_pred_random_forest[i], y_pred_catboost[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 125:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 134

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_decision_tree,
#     y_pred_gradient_boosting,
#     y_pred_naive_bayes
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_decision_tree[i], y_pred_gradient_boosting[i], y_pred_naive_bayes[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 134:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 135

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_decision_tree,
#     y_pred_gradient_boosting,
#     y_pred_catboost
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_decision_tree[i], y_pred_gradient_boosting[i], y_pred_catboost[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 135:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 145

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_decision_tree,
#     y_pred_naive_bayes,
#     y_pred_catboost
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_decision_tree[i], y_pred_naive_bayes[i], y_pred_catboost[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 145:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 234

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_random_forest,
#     y_pred_gradient_boosting,
#     y_pred_naive_bayes
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_random_forest[i], y_pred_gradient_boosting[i], y_pred_naive_bayes[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 234:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 235

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_random_forest,
#     y_pred_gradient_boosting,
#     y_pred_catboost
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_random_forest[i], y_pred_gradient_boosting[i], y_pred_catboost[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 235:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 245

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_random_forest,
#     y_pred_naive_bayes,
#     y_pred_catboost
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_random_forest[i], y_pred_naive_bayes[i], y_pred_catboost[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 245:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 345

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_gradient_boosting,
#     y_pred_naive_bayes,
#     y_pred_catboost
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_gradient_boosting[i], y_pred_naive_bayes[i], y_pred_catboost[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 345:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 1234

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_decision_tree,
#     y_pred_random_forest,
#     y_pred_gradient_boosting,
#     y_pred_naive_bayes
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_decision_tree[i], y_pred_random_forest[i], y_pred_gradient_boosting[i], y_pred_naive_bayes[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 1234:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 1235

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_decision_tree,
#     y_pred_random_forest,
#     y_pred_gradient_boosting,
#     y_pred_catboost
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_decision_tree[i], y_pred_random_forest[i], y_pred_gradient_boosting[i], y_pred_catboost[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 1235:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 1245

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_decision_tree,
#     y_pred_random_forest,
#     y_pred_naive_bayes,
#     y_pred_catboost
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_decision_tree[i], y_pred_random_forest[i], y_pred_naive_bayes[i], y_pred_catboost[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 1245:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 1345

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_decision_tree,
#     y_pred_gradient_boosting,
#     y_pred_naive_bayes,
#     y_pred_catboost
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_decision_tree[i], y_pred_gradient_boosting[i], y_pred_naive_bayes[i], y_pred_catboost[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 1345:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

# TESTE 2345

# Aplicar o modelo de ensemble: voto majoritário
# ensemble_predictions = mode([
#     y_pred_random_forest,
#     y_pred_gradient_boosting,
#     y_pred_naive_bayes,
#     y_pred_catboost
# ], axis=0).mode[0]
ensemble_predictions = []
for i in range(len(y_pred_decision_tree)):
    predictions = [y_pred_random_forest[i], y_pred_gradient_boosting[i], y_pred_naive_bayes[i], y_pred_catboost[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    ensemble_predictions.append(consensus_class)

# Calcular a acurácia do modelo de ensemble nos dados de teste
ensemble_accuracy = accuracy_score(y_test, ensemble_predictions)
print("Acurácia do ensemble nos dados de teste 2345:", ensemble_accuracy)

# Outras métricas de avaliação do modelo de ensemble nos dados de teste
print(classification_report(y_test, ensemble_predictions))

In [None]:
# DADOS REAL

# Carregar os dados do arquivo CSV
data_real = pd.read_csv(
    fr"C:\Users\crist\Desktop\dados_77.csv",
    sep =','
)

# Aplicar a codificação one-hot nas colunas categóricas
data_real = pd.get_dummies(data_real, columns=["j_posicao"])
data_real = pd.get_dummies(data_real, columns=["j_rota"])
data_real = pd.get_dummies(data_real, columns=["j_jardas"])
data_real = pd.get_dummies(data_real, columns=["j_catch"])
data_real = pd.get_dummies(data_real, columns=["j_area_recepcao"])

# Separar as features (X) e o target (y)
r_X = data_real.drop(columns=["target"])  # Remover a coluna "target" das features
r_y = data_real["target"]

# Normalização min-max das features
scaler = MinMaxScaler()
r_X_normalized = scaler.fit_transform(r_X)

# Treinar o modelo Naive Bayes com os dados de treinamento
naive_bayes_model.fit(X_train, y_train)

# Fazer previsões usando o modelo de ensemble otimizado anteriormente
r_y_pred_decision_tree = best_decision_tree_model.predict(r_X_normalized)
r_y_pred_random_forest = best_random_forest_model.predict(r_X_normalized)
r_y_pred_gradient_boosting = best_gradient_boosting_model.predict(r_X_normalized)
r_y_pred_naive_bayes = naive_bayes_model.predict(r_X_normalized)
r_y_pred_catboost = best_catboost_model.predict(r_X_normalized).squeeze()
 
# Aplicar o modelo de ensemble: voto majoritário
r_ensemble_predictions = []
for i in range(len(r_y_pred_decision_tree)):
    predictions = [r_y_pred_decision_tree[i], r_y_pred_random_forest[i], r_y_pred_gradient_boosting[i], r_y_pred_naive_bayes[i]]
    class_counts = Counter(predictions)
    consensus_class = class_counts.most_common(1)[0][0]
    r_ensemble_predictions.append(consensus_class)

print(r_ensemble_predictions)