In [57]:
import pandas as pd 
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_validate
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.datasets import load_iris

# 1) Cargar datos en X,y - DATASET IRIS
iris = load_iris()
# Crear DataFrame con nombres apropiados
df_iris = pd.DataFrame(iris.data, columns=iris.feature_names)
df_iris['species'] = iris.target
df_iris['species_name'] = [iris.target_names[i] for i in iris.target]  # Para mostrar los nombres

display(df_iris.head().style.set_caption("<h3>Primeras filas del dataset Iris<h3>"))

target = "species"  
X = df_iris.drop(columns=[target, 'species_name'])  # Eliminamos species y species_name
y = df_iris[target]  # Target: especies (0, 1, 2)

display(pd.DataFrame(X.head()).style.set_caption("<h3>Características (X)<h3>"))

display(pd.DataFrame({
    target: iris.target_names,
    'Código': range(len(iris.target_names))
}).style.set_caption("<h3>Target (y) <h3>"))
        
display(pd.DataFrame({
    'Variables': X.columns.tolist()
}).style.set_caption("<h3>Nombre de las características <h3>"))

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),species,species_name
0,5.1,3.5,1.4,0.2,0,setosa
1,4.9,3.0,1.4,0.2,0,setosa
2,4.7,3.2,1.3,0.2,0,setosa
3,4.6,3.1,1.5,0.2,0,setosa
4,5.0,3.6,1.4,0.2,0,setosa


Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


Unnamed: 0,species,Código
0,setosa,0
1,versicolor,1
2,virginica,2


Unnamed: 0,Variables
0,sepal length (cm)
1,sepal width (cm)
2,petal length (cm)
3,petal width (cm)


In [66]:
# 2) Codificar y normalizar
# Para Iris no necesita codificación porque ya son numéricas
# Las características ya están en escala similar (cm)

# 3) Separar
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

# 4) Seleccion de modelo(s) 
bosque = RandomForestClassifier(random_state=42)

# 5) Entrenar
bosque.fit(X_train, y_train)

# 6) Probar y evaluar
y_pred = bosque.predict(X_test)

# Resultados en TEST
# Matriz de confusión
display(pd.DataFrame(
    confusion_matrix(y_test, y_pred),
    columns=iris.target_names,
    index=iris.target_names
).style.set_caption("<h4>Matriz de confusión</h4>"))

# Reporte de clasificación
display(pd.DataFrame(
    classification_report(y_test, y_pred, target_names=iris.target_names, output_dict=True)
).transpose().style.set_caption("<h4> Reporte de clasificación </h4>"))

Unnamed: 0,setosa,versicolor,virginica
setosa,15,0,0
versicolor,0,14,1
virginica,0,4,11


Unnamed: 0,precision,recall,f1-score,support
setosa,1.0,1.0,1.0,15.0
versicolor,0.777778,0.933333,0.848485,15.0
virginica,0.916667,0.733333,0.814815,15.0
accuracy,0.888889,0.888889,0.888889,0.888889
macro avg,0.898148,0.888889,0.887767,45.0
weighted avg,0.898148,0.888889,0.887767,45.0


In [67]:
# 7) Validación Cruzada
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
scoring = {
    "accuracy": "accuracy",
    "f1_macro": "f1_macro",
    "roc_auc": "roc_auc_ovr",
    "average_precision": "average_precision",
}

scores = cross_validate(bosque, X_train, y_train, cv=cv, scoring=scoring, return_train_score=True, n_jobs=-1)

In [68]:
# Todas las métricas en una sola tabla
display(pd.DataFrame({
    'Métrica': [
        'Exactitud Validación',
        'Exactitud Entrenamiento', 
        'F1-Score Macro',
        'ROC AUC Medio',
        'Average Precision Medio'
    ],
    'Valor': [
        f"{scores['test_accuracy'].mean():.3f} ± {scores['test_accuracy'].std():.3f}",
        f"{scores['train_accuracy'].mean():.3f} ± {scores['train_accuracy'].std():.3f}",
        f"{scores['test_f1_macro'].mean():.3f} ± {scores['test_f1_macro'].std():.3f}",
        f"{scores['test_roc_auc'].mean():.3f}",
        f"{scores['test_average_precision'].mean():.3f}"
    ]
}).style.set_caption("<h3>VALIDACIÓN CRUZADA (5-FOLD)</h3>"))

Unnamed: 0,Métrica,Valor
0,Exactitud Validación,0.952 ± 0.043
1,Exactitud Entrenamiento,1.000 ± 0.000
2,F1-Score Macro,0.952 ± 0.043
3,ROC AUC Medio,0.998
4,Average Precision Medio,0.995


In [70]:
# Todas las métricas en una sola tabla
display(pd.DataFrame({
    'Métrica': [
        'Métricas Principales',
        'Exactitud Validación',
        'Exactitud Entrenamiento', 
        'F1-Score Macro',
        'ROC AUC Medio',
        'Average Precision Medio'
    ],
    'Valor': [
        f"ACC {scores['test_accuracy'].mean():.3f} ± {scores['test_accuracy'].std():.3f} | F1 {scores['test_f1_macro'].mean():.3f} ± {scores['test_f1_macro'].std():.3f}",
        f"{scores['test_accuracy'].mean():.3f} ± {scores['test_accuracy'].std():.3f}",
        f"{scores['train_accuracy'].mean():.3f} ± {scores['train_accuracy'].std():.3f}",
        f"{scores['test_f1_macro'].mean():.3f} ± {scores['test_f1_macro'].std():.3f}",
        f"{scores['test_roc_auc'].mean():.3f}",
        f"{scores['test_average_precision'].mean():.3f}"
    ]
}).style.set_caption("<h3>VALIDACIÓN CRUZADA (5-FOLD)</h3>"))

Unnamed: 0,Métrica,Valor
0,Métricas Principales,ACC 0.952 ± 0.043 | F1 0.952 ± 0.043
1,Exactitud Validación,0.952 ± 0.043
2,Exactitud Entrenamiento,1.000 ± 0.000
3,F1-Score Macro,0.952 ± 0.043
4,ROC AUC Medio,0.998
5,Average Precision Medio,0.995


In [71]:
# 6) Probar (con el conjunto de test)
y_pred = bosque.predict(X_test)

# 7) Evaluar en el conjunto de entrenamiento
# Reporte y Matriz de Confusión
display(pd.DataFrame(
    confusion_matrix(y_test, y_pred),
    columns=iris.target_names,
    index=iris.target_names
).style.set_caption("<h4>Matriz de Confusión</h4>"))

display(pd.DataFrame(
    classification_report(y_test, y_pred, target_names=iris.target_names, output_dict=True)
).transpose().style.set_caption("<h4>Reporte de Clasificación</h4>"))

display(pd.DataFrame({
    'Métrica': ['Score del Modelo'],
    'Valor': [f"{bosque.score(X_test, y_test):.3f}"]
}).style.set_caption("<h4>Evaluación Final</h4>"))

Unnamed: 0,setosa,versicolor,virginica
setosa,15,0,0
versicolor,0,14,1
virginica,0,4,11


Unnamed: 0,precision,recall,f1-score,support
setosa,1.0,1.0,1.0,15.0
versicolor,0.777778,0.933333,0.848485,15.0
virginica,0.916667,0.733333,0.814815,15.0
accuracy,0.888889,0.888889,0.888889,0.888889
macro avg,0.898148,0.888889,0.887767,45.0
weighted avg,0.898148,0.888889,0.887767,45.0


Unnamed: 0,Métrica,Valor
0,Score del Modelo,0.889
