# Entrenamiento del modelo

In [1]:
import numpy as np
import pandas as pd
import os
import json
import sys
sys.path.append('..')  # Añade el directorio padre (logistic_regression) al path
from src.ft_functions import *

In [2]:
class LogisticRegressionTrainer:
    def __init__(self, learning_rate=0.01, max_iterations=1000, tolerance=1e-4):
        self.learning_rate = learning_rate
        self.max_iterations = max_iterations
        self.tolerance = tolerance
        self.weights = {}
        
    def train_one_vs_all(self, X, y, house):
        """Train a single binary classifier for one house vs all others"""
        n_features = X.shape[1]
        weights = np.zeros(n_features)
        y_binary = prepare_one_vs_all(y, house)
        
        prev_loss = float('inf')
        
        for iteration in range(self.max_iterations):
            # Forward pass
            y_pred = sigmoid(np.dot(X, weights))
            
            # Calculate loss
            current_loss = binary_cross_entropy(y_binary, y_pred)
            
            # Check convergence
            if abs(prev_loss - current_loss) < self.tolerance:
                break
                
            # Compute gradients and update weights
            gradients = binary_gradient(X, y_binary, y_pred)
            weights -= self.learning_rate * gradients
            
            prev_loss = current_loss
            
        return weights
    
    def train(self, X, houses):
        """Train all binary classifiers"""
        unique_houses = ['Gryffindor', 'Hufflepuff', 'Ravenclaw', 'Slytherin']
        
        for house in unique_houses:
            print(f"Training classifier for {house}...")
            self.weights[house] = self.train_one_vs_all(X, houses, house)
            
        return self.weights

### Leemos el DataSet de entrenamiento normalizado

In [3]:
data = pd.read_csv('../datasets/normal_train.csv')

In [4]:
# Separar características y etiquetas
# Excluimos las columnas de casas
feature_columns = ['Herbology', 'Defense Against the Dark Arts', 
                   'Potions', 'Charms', 'Flying', 'Best Hand', 'Age']

X = data[feature_columns].values

In [5]:


# Crear vector de casas
houses = np.where(data['House_Gryffindor'] == 1, 'Gryffindor',
         np.where(data['House_Hufflepuff'] == 1, 'Hufflepuff',
         np.where(data['House_Ravenclaw'] == 1, 'Ravenclaw', 'Slytherin')))

# Entrenar modelo
print("Training model...")
trainer = LogisticRegressionTrainer(
    learning_rate=0.01,
    max_iterations=1000,
    tolerance=1e-4
)

weights = trainer.train(X, houses)

# Convertir los pesos a un formato serializable
weights_dict = {
    house: weights[house].tolist() for house in weights
}

# Guardar pesos en formato JSON
weights_path = '../output/model_weights.json'
print("Saving weights...")
with open(weights_path, 'w') as f:
    json.dump(weights_dict, f, indent=4)
print(f"Weights saved to {weights_path}")

Training model...
Training classifier for Gryffindor...
Training classifier for Hufflepuff...
Training classifier for Ravenclaw...
Training classifier for Slytherin...
Saving weights...
Weights saved to ../output/model_weights.json
