# Modelo Supervisado para Predecir error_label
Este notebook entrena un modelo de clasificación para predecir el tipo de error cometido en una jugada de ajedrez usando los features generados en `training_dataset.csv`.

In [None]:
import pandas as pd

# Cargar el dataset
df = pd.read_csv("training_dataset.csv")

# Inspección inicial
print(df.shape)
print(df.columns)
print(df['error_label'].value_counts())
df.head()


In [None]:
from sklearn.model_selection import train_test_split

# Selección de features
features = [
    'score_diff', 'material_total', 'material_balance', 'num_pieces',
    'branching_factor', 'self_mobility', 'opponent_mobility',
    'phase', 'has_castling_rights', 'is_low_mobility', 
    'is_center_controlled', 'is_pawn_endgame'
]

X = df[features]
y = df['error_label']

# División de datos
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import classification_report, confusion_matrix

# Entrenamiento
clf = DecisionTreeClassifier(max_depth=5, random_state=42)
clf.fit(X_train, y_train)

# Evaluación
y_pred = clf.predict(X_test)
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))


In [None]:
import joblib

# Guardar el modelo entrenado
joblib.dump(clf, 'trained_error_label_model.pkl')


## Comparación con otros modelos (opcional)

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score

models = {
    "Decision Tree": DecisionTreeClassifier(max_depth=5),
    "Random Forest": RandomForestClassifier(n_estimators=100),
    "Logistic Regression": LogisticRegression(max_iter=1000)
}

for name, model in models.items():
    scores = cross_val_score(model, X, y, cv=5)
    print(f"{name} accuracy: {scores.mean():.4f}")
