# Experimentos


### Imports:

In [1]:
from typing import List, Any, Tuple
from classifier.my_adaboost import MyAdaboostClassifier
from sklearn.ensemble import AdaBoostClassifier
import csv
import numpy as np

### Functions:

In [2]:
def get_tic_tac_toe_data() -> Tuple[list, list]:
    """
        Funcao para ler o arquivo de entrada e retornar os dados em forma
        de listas numericas
    """
    
    def category_convertor(item):
        if item == 'x' or item == 'positive':
            return 1
        if item == 'o' or item == 'negative':
            return -1
        if item == 'b':
            return 0
        raise ValueError

    feature_values, label_values = list(), list()
    with open('tic-tac-toe.data', newline='') as csv_file:
        reader = csv.reader(csv_file)
        for row in reader:
            feature_values.append(np.array([category_convertor(item) for item in row[:-1]]))
            label_values.append(category_convertor(row[-1]))
            
    return feature_values, label_values


def create_k_partitions(features: list, labels: list, k=5, random_seed=42) -> Tuple[List[list], List[list]]:
    """
        Funcao para criar k particoes dos dados de entrada para realizar
        a validacao cruzada
    """
    
    np.random.seed(random_seed)
    partition_size = len(features) // k
    
    features_partitions = list()
    labels_partitions = list()
    for _ in range(k):
        f_partition = list()
        l_partition = list()
        while len(f_partition) < partition_size:
            item_idx = np.random.randint(len(features))
            f_partition.append(features.pop(item_idx))
            l_partition.append(labels.pop(item_idx))
        features_partitions.append(f_partition)
        labels_partitions.append(l_partition)

    return features_partitions, labels_partitions


def cross_validation_score(model: Any, features_partitions: List[list], labels_partitions: List[list]) -> Tuple[float, float]:
    """
        Funcao para calcular a taxa de acertos do classificador com seu 
        respectivo desvio padrao por meio da validacao cruzada
    """
    
    partitions_size = len(features_partitions)
    partitions_accuracy_score = []
    for i in range(partitions_size):
        features_to_test = features_partitions[i]
        labels_to_test = labels_partitions[i]
        features_to_train = list()
        labels_to_train = list()
        for j in range(partitions_size):
            if i != j:
                labels_to_train.extend(labels_partitions[j])
                features_to_train.extend(features_partitions[j])
        model.fit(features_to_test, np.array(labels_to_test))
        partitions_accuracy_score.append(model.score(features_to_train, np.array(labels_to_train)))
    partitions_accuracy_score = np.array(partitions_accuracy_score)
    
    return partitions_accuracy_score.mean(), np.sqrt(partitions_accuracy_score.var())


In [3]:
features, labels = get_tic_tac_toe_data()
features_partitions, labels_partitions = create_k_partitions(features, labels, k=5)

# model_x = MyAdaboostClassifier(n_estimators=100, classifier_model='weak-logistic-regression')
model_x = MyAdaboostClassifier(n_estimators=100, classifier_model='stump')
model_y = AdaBoostClassifier(n_estimators=100)

dd = cross_validation_score(model_x, features_partitions, labels_partitions)
print(dd)
dd = cross_validation_score(model_y, features_partitions, labels_partitions)
print(dd)


(0.8628272251308902, 0.022631491881205244)
(0.8900523560209426, 0.0200565249312858)
