In [1]:
import os
import numpy as np
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import joblib

In [2]:
# Carregar os dados de treinamento e teste
test_x = np.fromfile("test.txt", dtype=np.uint8).reshape((20000, 32, 32, 3)) / 255
train_x = np.fromfile("train.txt", dtype=np.uint8).reshape((100000, 32, 32, 3)) / 255

In [3]:
test_y = np.append(np.zeros((10000,)), np.ones((10000,)))
train_y = np.append(np.zeros((50000,)), np.ones((50000,)))

O BaggingClassifier espera receber uma matriz de características X de forma (n_samples, n_features).
No caso do conjunto de dados CIFAKE fornecido, as imagens estão no formato (n_samples, height, width, channels).

In [4]:
# Ajustar o formato dos dados para o modelo BaggingClassifier
train_x_reshaped = train_x.reshape((100000, 32 * 32 * 3))
test_x_reshaped = test_x.reshape((20000, 32 * 32 * 3))

In [5]:
train_x_reshaped.shape

(100000, 3072)

In [6]:
# Criar o modelo BaggingClassifier com base em DecisionTreeClassifier
base_model = DecisionTreeClassifier()
bagging_model = BaggingClassifier(base_model, n_estimators=10, random_state=42)


In [7]:
# Treinar o modelo
if os.path.exists('bagging_files/bagging_model.joblib'):
    bagging_model = joblib.load('bagging_files/bagging_model.joblib')
else:
    # Salvar o modelo treinado
    bagging_model.fit(train_x_reshaped, train_y)
    joblib.dump(bagging_model, 'bagging_files/bagging_model.joblib')

In [8]:
# Fazer previsões no conjunto de teste
if os.path.exists('bagging_files/y_pred.npy'):
    bagging_y_pred = np.load('bagging_files/y_pred.npy')
else:
    # Salvar as previsões no conjunto de teste
    y_pred = bagging_model.predict(test_x_reshaped)
    np.save('bagging_files/y_pred.npy', y_pred)

In [9]:
# Calcular a acurácia
accuracy = accuracy_score(test_y, y_pred)

# Imprimir a acurácia
print("Accuracy:", accuracy)

Accuracy: 0.78175
