# Tic tac toe

> Gradient Tree Boosting implementation test

Imports

In [62]:
import pandas as pd
import numpy as np

# from sklearn import preprocessing, model_selection, naive_bayes, tree, svm, neighbors, ensemble
from sklearn import tree

Parameters

In [173]:
path_file_ds = './file/ds_tic-tac-toe.data'

# Data parsing parameters
class_positive_str, class_negative_str = 'positive', 'negative'
class_positive, class_negative = 1, 0

play_x_str, play_o_str =  'x', 'o'
play_x, play_o = 1, -1

# Hyper parameters
max_depth = 4
max_trees = 10
random_state = 10
max_features = None
learning_rate = 0.4


## Import data

In [174]:
features = [f'x{i + 1}' for i in range(9)]
col_y = 'y'

df = pd.read_csv(path_file_ds, header=None)
df.columns = features + [col_y]

def play_to_number(x: str):
    return play_x if x == play_x_str else play_o

def class_to_number(y: str):
    return class_positive if y == class_positive_str else class_negative

y = ds[col_y].apply(np.vectorize(class_to_number))
X = ds[features].apply(np.vectorize(play_to_number))

n = len(y)
n_positive = np.sum(y == class_positive)
n_negative = np.sum(y == class_negative)

print(f"{n} samples: {n_positive} positive cases; {n_negative} negative cases")


958 samples: 626 positive cases; 332 negative cases


## Gradient Tree Boosting

In [188]:
class GradientTreeBoosting():
    
    def __init__(
        self, 
        max_depth: int = 4,
        max_trees: int = 10,
        max_features: int = None,
        learning_rate: float = 0.4,
        random_state: int = 0
    ):

        self.max_depth = max_depth
        self.max_trees = max_trees
        self.max_features = max_features
        self.learning_rate = learning_rate
        
        self._weak_learners = []
        self._predictions = np.array([])
        self._random_state = random_state
        

    def fit(self, y: np.array, X: np.array):

        n = len(y)
        predictions = np.array([y.mean()] * n)

        for _ in range(self.max_trees):

            # Creating a weak learner 
            weak_learner = tree.DecisionTreeRegressor(
                max_depth=self.max_depth,
                max_features=self.max_features,
                random_state=self._random_state,
            )

            # Growing the tree on the residuals
            residuals = y - predictions
            weak_learner.fit(X, residuals)
            self._weak_learners.append(weak_learner)

            # Updating predictions
            predictions_wl = weak_learner.predict(X)
            predictions += self.learning_rate * predictions_wl

    def predict(self, X: np.array) -> np.array:
        n = len(X)
        yHat = np.zeros( (n,) )
        for weak_learner in self._weak_learners:
            yHat += self.learning_rate * weak_learner.predict(X)
        return yHat

## Train

In [190]:
classifier = GradientTreeBoosting(
    max_depth=max_depth,
    max_trees=max_trees,
    max_features=max_features,
    learning_rate=learning_rate,
    random_state=random_state,
)

classifier.fit(X=X.values, y=y.values)
yHat = classifier.predict(X=X.values)

n_pos = np.sum(yHat > 0)
n_neg = n - n_pos
print(f"n_pos: {n_pos}, n_neg: {n_neg}")

n_pos: 620, n_neg: 338


## Test

In [192]:
yHat = classifier.predict(X=X.values)