# Classificação de partidas de xadrez

Serão criados modelos de classificação para prever o vencedor de partidas de xadrez, baseado em dados pré-processados no projeto anterior.

Será utilizado o MLFlow para rastreamento de experimentos e comparação de modelos.

# Configuração do MLFLow

In [None]:
%pip install mlflow

In [None]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split

import warnings
import mlflow
import mlflow.sklearn
from mlflow.models import infer_signature
from urllib.parse import urlparse
import logging

logging.basicConfig(level=logging.WARN)
logger = logging.getLogger(__name__)

warnings.filterwarnings("ignore")

mlflow.set_experiment(experiment_name='Chess games classification')

Definição das métricas de avaliação dos modelos:

In [None]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

def eval_metrics(actual, pred):
    rmse = np.sqrt(mean_squared_error(actual, pred))
    mae = mean_absolute_error(actual, pred)
    r2 = r2_score(actual, pred)
    return {"rmse": rmse, "mae": mae, "r2": r2}

Definição do método de rastreamento do mlflow:

In [None]:
def mlflow_track(model, model_name: str, params: dict, model_input, model_output, metrics: dict):
    signature = infer_signature(model_input, model_output)
    mlflow.log_params(params)
    mlflow.log_metrics(metrics)
    mlflow.sklearn.log_model(sk_model=model, artifact_path="sklearn-model", signature=signature, registered_model_name=model_name)

Definição do método de treinamento e rastreamento do mlflow:

In [None]:
def mlflow_train_and_track(model_class, model_name, data: pd.DataFrame, target: str, **params):
    # Split the data into training and validation sets. (0.75, 0.25) split.
    train, val = train_test_split(data)
    train_x = train.drop([target], axis=1)
    val_x = val.drop([target], axis=1)
    train_y = train[[target]]
    val_y = val[[target]]

    with mlflow.start_run(run_name=model_name):
        # Create model, train it, and create predictions
        model = model_class(**params)
        model.fit(train_x, train_y)

        pred = model.predict(val_x)

        # Evaluate the model
        metrics = eval_metrics(val_y, pred)

        print("%s(%s):" %(model_name, str(params).strip("{}")))
        print("  RMSE: %s" % metrics["rmse"])
        print("  MAE: %s" % metrics["mae"])
        print("  R2: %s" % metrics["r2"])

        # Log parameter, metrics, and model to MLflow
        mlflow_track(model, model_name, params, val_x, pred, metrics)

        

# Treinando os modelos

## Importando o dataset

In [None]:
csv_url = "https://raw.githubusercontent.com/Vinicius-resende-cin/intro-dados/master/data/chess_games_cleaned.csv"
try:
    data = pd.read_csv(csv_url, encoding = "ISO-8859-1")
except Exception as e:
    logger.exception(f"Unable to download training & test CSV, check your internet connection. Error: {e}")

Convertendo tipos para execução dos modelos

In [None]:
data['victory_status'] = data['victory_status'].astype('category')
data['winner'] = data['winner'].astype('category')
data['increment_code'] = data['increment_code'].astype('category')
data['white_id'] = data['white_id'].astype('category')
data['black_id'] = data['black_id'].astype('category')
data['moves'] = data['moves'].astype('category')
data['opening_eco'] = data['opening_eco'].astype('category')
data['opening_name'] = data['opening_name'].astype('category')

data['victory_status'] = data['victory_status'].cat.codes
data['winner'] = data['winner'].cat.codes
data['increment_code'] = data['increment_code'].cat.codes
data['white_id'] = data['white_id'].cat.codes
data['black_id'] = data['black_id'].cat.codes
data['moves'] = data['moves'].cat.codes
data['opening_eco'] = data['opening_eco'].cat.codes
data['opening_name'] = data['opening_name'].cat.codes

## Separando dados de treinamento e de teste

In [None]:
# Split the data into training and test sets. (0.8, 0.2) split.
train_data, test_data = train_test_split(data, test_size=0.2)

## Executando o treinamento

Para visualizar o rastreamento numa interface, execute o comando abaixo no diretório deste notebook (`/notebooks`):

```bash
mlflow ui --port 5000
```

A interface será acessível no endereço `http://localhost:5000` em um navegador.

### Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier
mlflow_train_and_track(RandomForestClassifier, 'RandomForestClassifier', train_data, 'winner', n_estimators=100, max_depth=10, random_state=0)