Modelo

In [1]:
#!pip install wandb

In [6]:
!pip install category_encoders

Collecting category_encoders
  Downloading category_encoders-2.8.1-py3-none-any.whl.metadata (7.9 kB)
Downloading category_encoders-2.8.1-py3-none-any.whl (85 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.7/85.7 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: category_encoders
Successfully installed category_encoders-2.8.1


In [3]:
import wandb
wandb.login()  # Pegá tu API Key desde https://wandb.ai/authorize

True

In [None]:
#!pip install category_encoders wandb scikit-learn pandas matplotlib



In [4]:
from google.colab import files
uploaded = files.upload()

Saving inmuebles_clean.csv to inmuebles_clean.csv


In [7]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import (
    classification_report,
    accuracy_score,
    precision_score,
    recall_score,
    f1_score
)
from category_encoders import TargetEncoder
import wandb
import wandb.sklearn

# 1. Cargar datos
df = pd.read_csv("inmuebles_clean.csv")

# 2. Separar X, y
X = df.drop("franja_precio", axis=1)
y = df["franja_precio"]

# 3. Dividir en Train/Test (80/20)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# 4. Configuración W&B
wandb.init(project="modelo_inmuebles", name="RF_con_kfold", reinit=True)

# 5. KFold + Entrenamiento
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
fold = 0
reports = []

for train_idx, val_idx in skf.split(X_train, y_train):
    fold += 1
    print(f"\n📂 Fold {fold}")

    X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
    y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

    # 6. Target Encoding solo en entrenamiento (para 'barrio')
    encoder = TargetEncoder(cols=["barrio"])
    encoder.fit(X_tr, y_tr)
    X_tr_enc = encoder.transform(X_tr)
    X_val_enc = encoder.transform(X_val)
    X_test_enc = encoder.transform(X_test)  # También transformamos test ahora

    # 7. Modelo
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_tr_enc, y_tr)
    preds = clf.predict(X_val_enc)

    # 8. Métricas
    report = classification_report(y_val, preds, output_dict=True)
    print(classification_report(y_val, preds))
    reports.append(report)

    # Log W&B por fold
    wandb.log({
        f"fold_{fold}_accuracy": report["accuracy"],
        f"fold_{fold}_precision": report["weighted avg"]["precision"],
        f"fold_{fold}_recall": report["weighted avg"]["recall"],
        f"fold_{fold}_f1": report["weighted avg"]["f1-score"]
    })

# 9. Entrenar en todo el X_train y predecir en Test
encoder.fit(X_train, y_train)
X_train_enc = encoder.transform(X_train)
X_test_enc = encoder.transform(X_test)

final_model = RandomForestClassifier(n_estimators=100, random_state=42)
final_model.fit(X_train_enc, y_train)
final_preds = final_model.predict(X_test_enc)
final_probas = final_model.predict_proba(X_test_enc)
labels = final_model.classes_

print("\n🔍 Evaluación Final en Test:")
print(classification_report(y_test, final_preds))

# 10. Log manual de métricas finales en W&B
wandb.log({
    "final_accuracy": accuracy_score(y_test, final_preds),
    "final_precision": precision_score(y_test, final_preds, average='weighted'),
    "final_recall": recall_score(y_test, final_preds, average='weighted'),
    "final_f1": f1_score(y_test, final_preds, average='weighted')
})

# 11. Matriz de confusión (funciona bien)
wandb.sklearn.plot_confusion_matrix(y_test, final_preds, labels=labels)

# 12. Finalizar sesión
wandb.finish()





📂 Fold 1
              precision    recall  f1-score   support

        alto       0.98      0.99      0.99       106
        bajo       0.99      0.99      0.99       107
       medio       0.98      0.97      0.98       107

    accuracy                           0.98       320
   macro avg       0.98      0.98      0.98       320
weighted avg       0.98      0.98      0.98       320


📂 Fold 2
              precision    recall  f1-score   support

        alto       0.99      0.95      0.97       106
        bajo       0.96      1.00      0.98       107
       medio       0.98      0.97      0.98       107

    accuracy                           0.97       320
   macro avg       0.98      0.97      0.97       320
weighted avg       0.98      0.97      0.97       320


📂 Fold 3
              precision    recall  f1-score   support

        alto       0.95      0.98      0.96       106
        bajo       0.99      0.96      0.98       107
       medio       0.98      0.97      0.98  

0,1
final_accuracy,▁
final_f1,▁
final_precision,▁
final_recall,▁
fold_1_accuracy,▁
fold_1_f1,▁
fold_1_precision,▁
fold_1_recall,▁
fold_2_accuracy,▁
fold_2_f1,▁

0,1
final_accuracy,0.97
final_f1,0.96999
final_precision,0.97
final_recall,0.97
fold_1_accuracy,0.98438
fold_1_f1,0.98436
fold_1_precision,0.98437
fold_1_recall,0.98438
fold_2_accuracy,0.975
fold_2_f1,0.97496
