# Boosting com o CoutoBoost

In [1]:
import pandas as pd

In [2]:
df_original = pd.read_csv("tic-tac-toe.data", names=[0, 1, 2, 3, 4, 5, 6, 7, 8, "Result"])
df_original.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,Result
0,x,x,x,x,o,o,x,o,o,positive
1,x,x,x,x,o,o,o,x,o,positive
2,x,x,x,x,o,o,o,o,x,positive
3,x,x,x,x,o,o,o,b,b,positive
4,x,x,x,x,o,o,b,o,b,positive


In [3]:
print("Exemplos x características: ", (df_original.shape[0], df_original.shape[1]-1))
positive_ratio = len(df_original.loc[df_original["Result"] == "positive"]) / df_original.shape[0]
negative_ratio = 1 - positive_ratio
print(f"Taxa de positivos: {positive_ratio:.2f}")
print(f"Taxa de negativos: {negative_ratio:.2f}")

Exemplos x características:  (958, 9)
Taxa de positivos: 0.65
Taxa de negativos: 0.35


## Preparação dos dados

O algoritmo `CoutoBoostClassifier` implementado é uma versão simplificada do AdaBoost que espera receber as entradas com características binárias e a saída $\epsilon \{-1, +1\}$.



In [4]:
from sklearn.model_selection import train_test_split


# 1. muda a ordem das colunas colocando o label primeiro (para próximo passo)
cols = df_original.columns.tolist()
cols = cols[-1:] + cols[:-1]
df = df_original[cols]


# 2. valor numérico para rótulo
df["Result"] = df["Result"].map({
    "positive":  1,
    "negative": -1
})

# 3. transforma cada coluna de feature em 3 colunas binárias
df = pd.get_dummies(df, columns=[0, 1, 2, 3, 4, 5, 6, 7, 8])

# 4. embaralha
df = df.sample(frac=1)

# 5. DataFrame >> ndarray
# 6. X, y
dados = df.to_numpy()
y = dados[:, 0]
X = dados[:, 1:]

# 7. particiona em dados de treino e teste
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0, test_size=0.2)



Similar aos algoritmos do pacote `sklearn`, o `CoutoBoostClassifier` tem em sua interface pública os métodos `fit(X, y)` e `predict(X)`.

In [5]:
from couto_boost import CoutoBoostClassifier
from sklearn.metrics import accuracy_score

clf = CoutoBoostClassifier(max_estimators=2000)
clf.fit(X_train, y_train)



all_stumps:  108
stumps totais:  108
samples =  766
Peso inicial =  0.0013054830287206266
Iteração t= 0
iteration_stumps:  []
iteration_errors:  []
iteration_alphas:  []
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27

X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
best_stump_index:  105
best_stump:  [26  0  1]
error:  4.0381032066355774e-06
mistakes:  354
Errou 354 exemplos
Iteração t= 6
iteration_stumps:  [53, 53, 105, 105, 105,

X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)
X: (766, 27)

  alpha = np.log((1-error)/error) / 2
  self.input_weights = np.divide(self.input_weights, weight_sum)


<couto_boost.CoutoBoostClassifier at 0x1e3417991f0>

In [6]:
y_hat = clf.predict(X_test)

score = accuracy_score(y_test, y_hat)
print(score)

0.5208333333333334


In [7]:
from sklearn.ensemble import AdaBoostClassifier

clf_baseline = AdaBoostClassifier(n_estimators=2000)
clf_baseline.fit(X_train, y_train)


AdaBoostClassifier(n_estimators=2000)

In [8]:
y_hat_baseline = clf_baseline.predict(X_test)

score_baseline = accuracy_score(y_test, y_hat_baseline)
print(score_baseline)

0.9791666666666666
