# Entrenamiento de un modelo para clasificación de mascotas

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import json
import joblib

**Dataset:** [URL](https://raw.githubusercontent.com/eliasrrobles/pets-streamlit-classifier/refs/heads/main/data/pets_v2.csv)

In [None]:
URL = "https://raw.githubusercontent.com/eliasrrobles/pets-streamlit-classifier/refs/heads/main/data/pets_v2.csv"
pets = pd.read_csv(URL)

In [None]:
pets.head()

In [None]:
pets["pet_type"].unique()

In [None]:
pets["eye_color"].unique()

In [None]:
pets["fur_length"].unique()

In [None]:
X = pets.drop("pet_type", axis=1)
y = pets["pet_type"]

# Codifica las columnas categóricas
X_encoded = pd.get_dummies(X, columns=["eye_color", "fur_length"])

In [None]:
X_encoded

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_encoded, y, test_size=0.2)

In [None]:
# Guarda las asignaciones de categorías para Streamlit

category_mapping = {
    "eye_color": pd.get_dummies(pets["eye_color"]).columns.tolist(),
    "fur_length": pd.get_dummies(pets["fur_length"]).columns.tolist()
}

with open("category_mapping.json", "w") as f:
  json.dump(category_mapping, f)

In [None]:
model = RandomForestClassifier()
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

print("Precisión", accuracy_score(y_test, y_pred))

In [None]:
joblib.dump(model, "pets_model.joblib")