# ML Model for Madrid

This notebook builds a RandomForest model on the Madrid dataset. It removes non-target PRICE columns and uses Weights & Biases (wandb) for experiment tracking.

In [27]:
import sys
import os
import pandas as pd
from dotenv import load_dotenv

load_dotenv() # Load the environment variables from .env file


import importlib
# Añadir la ruta al directorio src para importar módulos personalizados
sys.path.append("../src")
# Unload the module if already loaded
if "idealista18.loader" in sys.modules:
    importlib.reload(sys.modules["idealista18.loader"])
else:
    import idealista18.loader as loader


from idealista18 import loader

In [28]:
# Load and clean data for Madrid
df = loader.load_data("Madrid")
df = loader.remove_geo_outliers(df, zscore=5)
print("Data loaded:")
print(df.head())

Data loaded:
                 ASSETID  PERIOD   PRICE    UNITPRICE ADTYPOLOGYID  \
0  A15019136831406238029  201803  126000  2680.851064         HOME   
1   A6677225905472065344  201803  235000  4351.851852         HOME   
2  A13341979748618524775  201803  373000  4973.333333         HOME   
3   A4775182175615276542  201803  284000  5916.666667         HOME   
4   A2492087730711701973  201803  228000  4560.000000         HOME   

  ADOPERATIONID  CONSTRUCTEDAREA  ROOMNUMBER  BATHNUMBER  HASTERRACE  ...  \
0          SALE               47           1           1           0  ...   
1          SALE               54           1           1           0  ...   
2          SALE               75           2           1           0  ...   
3          SALE               48           1           1           0  ...   
4          SALE               50           0           1           0  ...   

   BUILTTYPEID_2  BUILTTYPEID_3  DISTANCE_TO_CITY_CENTER  DISTANCE_TO_METRO  \
0              1        

In [29]:
df.columns

Index(['ASSETID', 'PERIOD', 'PRICE', 'UNITPRICE', 'ADTYPOLOGYID',
       'ADOPERATIONID', 'CONSTRUCTEDAREA', 'ROOMNUMBER', 'BATHNUMBER',
       'HASTERRACE', 'HASLIFT', 'HASAIRCONDITIONING', 'AMENITYID',
       'HASPARKINGSPACE', 'ISPARKINGSPACEINCLUDEDINPRICE', 'PARKINGSPACEPRICE',
       'HASNORTHORIENTATION', 'HASSOUTHORIENTATION', 'HASEASTORIENTATION',
       'HASWESTORIENTATION', 'HASBOXROOM', 'HASWARDROBE', 'HASSWIMMINGPOOL',
       'HASDOORMAN', 'HASGARDEN', 'ISDUPLEX', 'ISSTUDIO', 'ISINTOPFLOOR',
       'CONSTRUCTIONYEAR', 'FLOORCLEAN', 'FLATLOCATIONID',
       'CADCONSTRUCTIONYEAR', 'CADMAXBUILDINGFLOOR', 'CADDWELLINGCOUNT',
       'CADASTRALQUALITYID', 'BUILTTYPEID_1', 'BUILTTYPEID_2', 'BUILTTYPEID_3',
       'DISTANCE_TO_CITY_CENTER', 'DISTANCE_TO_METRO',
       'DISTANCE_TO_CASTELLANA', 'LONGITUDE', 'LATITUDE', 'CITYNAME',
       'ADTYPOLOGY', 'ADOPERATION'],
      dtype='object')

In [30]:
TARGET_COLUMN = "UNITPRICE"
#
# 1) Remove all PRICE-related columns except 'UNITPRICE'
# 2) Remove unnecessary columns:  Typology, Operation, AssetID, Period
#
if not df.empty:
    price_cols = [col for col in df.columns if col in ['ADTYPOLOGY', 'ADOPERATION', 
                                                       'ASSETID', 'PERIOD', 'ADTYPOLOGYID', 'ADOPERATIONID', 'CITYNAME']
                                                       or (col.endswith('PRICE') and col != TARGET_COLUMN)]

    print("Dropping columns:", price_cols)
    df = df.drop(columns=price_cols)
else:
    print("Dataframe is empty")

Dropping columns: ['ASSETID', 'PERIOD', 'PRICE', 'ADTYPOLOGYID', 'ADOPERATIONID', 'ISPARKINGSPACEINCLUDEDINPRICE', 'PARKINGSPACEPRICE', 'CITYNAME', 'ADTYPOLOGY', 'ADOPERATION']


In [31]:
# Prepare features and target variable

if TARGET_COLUMN in df.columns:
    X = df.drop(columns=[TARGET_COLUMN])
    y = df[TARGET_COLUMN]
    print("Features and target prepared.")
else:
    X, y = df, None
    print("UNITPRICE column not found in dataframe.")

Features and target prepared.


In [32]:
import wandb
import random

# 1) Grab the W&B token

wandb_token = os.getenv("WANDB_API_KEY")
if wandb_token is None:
    raise RuntimeError("WANDB_API_KEY not found in environment. Did you create a .env with that variable?")
# 2) Login with the token
wandb.login(key=wandb_token)


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/davidreyblanco/.netrc


True

In [33]:
# Train/test split and model training
import unidecode as unidecode
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error, median_absolute_error

TEAM_ENTITY = "mds9"  # Replace with your team entity
ALGORITHM = "Random Forest"
CITY = "Madrid"
DATASET = "idealista18"
algorithm_slug = unidecode.unidecode(ALGORITHM).lower().replace(" ", "")
PROJECT = f"{DATASET}_{CITY.lower()}_{algorithm_slug}_{TARGET_COLUMN}_{TEAM_ENTITY.lower()}"


# 1) Grab the W&B token
wandb_token = os.getenv("WANDB_API_KEY")
if wandb_token is None:
    raise RuntimeError("WANDB_API_KEY not found in environment. Did you create a .env with that variable?")

# 2) Login with the token
wandb.login(key=wandb_token)

for n_estimators in [10, 20, 50, 100]:
    for max_depth in [5, 10]:

        # 3) Train some models
        with wandb.init(project=PROJECT, config={"algorithm": ALGORITHM,
                                                "target": TARGET_COLUMN,
                                                "city": CITY,
                                                "dataset": DATASET }) as run:

            #
            # Create a grid search for hyperparameters
            #
                random_state=42
                print(f"{n_estimators} estimators, max depth {max_depth}")
                X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=random_state)
                model = RandomForestRegressor(n_estimators=n_estimators, max_depth = 10, random_state=random_state)
                model.fit(X_train, y_train)
                preds = model.predict(X_test)
                mse = mean_squared_error(y_test, preds)
                r2 = r2_score(y_test, preds)
                mae = mean_absolute_error(y_test, preds)
                mape = mean_absolute_percentage_error(y_test, preds)
                medape = median_absolute_error(y_test, preds) / y_test.median()

                print(f"MAPE: {mape:.2f} MedAPE: {medape:.4f} MSE:{mse}")

                run.log({
                    "mse": mse,
                    "r2": r2,
                    "mape": mape,
                    "medape": medape,
                    "mae": mae,
                    "random_state": random_state,
                    'n_train': len(X_train),
                    'n_test': len(X_test),
                    "n_estimators": n_estimators,
                    "max_depth": max_depth, 
                })

                # Create a model filename fingerprint
                
                model_fp = f"estimators-{n_estimators}_depth-{max_depth}"
                model_artifact_name = f"{DATASET}_{CITY.lower()}_{algorithm_slug}_{TARGET_COLUMN}_{model_fp}"

                model_filename = f"../models/{model_artifact_name}.pickle"
                joblib.dump(model, model_filename, compress=('gzip', 3))
                model_size = os.path.getsize(model_filename)
                print(f"Model saved to {model_filename} ({model_size / 1024:.2f} KB)")

                # Guardamos el modelo en el repositorio de modelos de W&B
                model_artifact_name = f'idealista18_madrid_{ALGORITHM.lower().replace(" ", "_")}'

                artifact = wandb.Artifact(name=model_artifact_name, 
                                        type="model", 
                                        description=f"{ALGORITHM} model for Madrid housing prices")
                artifact.metadata["tags"] = [model_fp, CITY, DATASET]
                artifact.metadata["aliases"] = [model_fp]
                artifact.add_file(local_path=model_filename)
                artifact.save()
                run.log_artifact(artifact, aliases=["latest", "production", model_fp])

                df = pd.DataFrame({
                    "actual_price": y_test,
                    "predicted_price": preds,
                    "error": abs(y_test - preds),            # e.g. point‐size or color
                })
                df = wandb.Table(dataframe=df)

                # 1) Simple scatter with axis labels and a color dimension (error)
                run.log({
                    "price_scatter": wandb.plot.scatter(
                        df,
                        x="actual_price",
                        y="predicted_price",
                        title="Predicted vs Actual Price",
                        #xname="Actual (EUR)",
                        #yname="Predicted (EUR)"
                    )
                })

                # 2) If you want to color‐code each point by “error”:
                run.log({
                    "price_scatter_colored": wandb.plot.scatter(
                        df,
                        x="actual_price",
                        y="predicted_price",
                        title="Predicted vs Actual Price (colored by error)",
                        #xname="Actual (EUR)",
                        #yname="Predicted (EUR)",
                        #color="error",    # column in df
                        #colorscale="Viridis"  # any Plotly colorscale name
                    )
                })



[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/davidreyblanco/.netrc


10 estimators, max depth 5
MAPE: 0.16 MedAPE: 0.1069 MSE:603391.8059098464
Model saved to ../models/idealista18_madrid_randomforest_UNITPRICE_estimators-10_depth-5.pickle (435.43 KB)


0,1
mae,▁
mape,▁
max_depth,▁
medape,▁
mse,▁
n_estimators,▁
n_test,▁
n_train,▁
r2,▁
random_state,▁

0,1
mae,537.20319
mape,0.16343
max_depth,5.0
medape,0.1069
mse,603391.80591
n_estimators,10.0
n_test,18963.0
n_train,75851.0
r2,0.78965
random_state,42.0


10 estimators, max depth 10
MAPE: 0.16 MedAPE: 0.1069 MSE:603391.8059098464
Model saved to ../models/idealista18_madrid_randomforest_UNITPRICE_estimators-10_depth-10.pickle (435.43 KB)


0,1
mae,▁
mape,▁
max_depth,▁
medape,▁
mse,▁
n_estimators,▁
n_test,▁
n_train,▁
r2,▁
random_state,▁

0,1
mae,537.20319
mape,0.16343
max_depth,10.0
medape,0.1069
mse,603391.80591
n_estimators,10.0
n_test,18963.0
n_train,75851.0
r2,0.78965
random_state,42.0


20 estimators, max depth 5
MAPE: 0.16 MedAPE: 0.1063 MSE:596265.3969084613
Model saved to ../models/idealista18_madrid_randomforest_UNITPRICE_estimators-20_depth-5.pickle (873.17 KB)


0,1
mae,▁
mape,▁
max_depth,▁
medape,▁
mse,▁
n_estimators,▁
n_test,▁
n_train,▁
r2,▁
random_state,▁

0,1
mae,533.36249
mape,0.16222
max_depth,5.0
medape,0.10629
mse,596265.39691
n_estimators,20.0
n_test,18963.0
n_train,75851.0
r2,0.79213
random_state,42.0


20 estimators, max depth 10
MAPE: 0.16 MedAPE: 0.1063 MSE:596265.3969084613
Model saved to ../models/idealista18_madrid_randomforest_UNITPRICE_estimators-20_depth-10.pickle (873.17 KB)


0,1
mae,▁
mape,▁
max_depth,▁
medape,▁
mse,▁
n_estimators,▁
n_test,▁
n_train,▁
r2,▁
random_state,▁

0,1
mae,533.36249
mape,0.16222
max_depth,10.0
medape,0.10629
mse,596265.39691
n_estimators,20.0
n_test,18963.0
n_train,75851.0
r2,0.79213
random_state,42.0


50 estimators, max depth 5
MAPE: 0.16 MedAPE: 0.1057 MSE:591739.8206843077
Model saved to ../models/idealista18_madrid_randomforest_UNITPRICE_estimators-50_depth-5.pickle (2174.25 KB)


0,1
mae,▁
mape,▁
max_depth,▁
medape,▁
mse,▁
n_estimators,▁
n_test,▁
n_train,▁
r2,▁
random_state,▁

0,1
mae,531.24001
mape,0.16164
max_depth,5.0
medape,0.10572
mse,591739.82068
n_estimators,50.0
n_test,18963.0
n_train,75851.0
r2,0.79371
random_state,42.0


50 estimators, max depth 10
MAPE: 0.16 MedAPE: 0.1057 MSE:591739.8206843077
Model saved to ../models/idealista18_madrid_randomforest_UNITPRICE_estimators-50_depth-10.pickle (2174.25 KB)


0,1
mae,▁
mape,▁
max_depth,▁
medape,▁
mse,▁
n_estimators,▁
n_test,▁
n_train,▁
r2,▁
random_state,▁

0,1
mae,531.24001
mape,0.16164
max_depth,10.0
medape,0.10572
mse,591739.82068
n_estimators,50.0
n_test,18963.0
n_train,75851.0
r2,0.79371
random_state,42.0


100 estimators, max depth 5
MAPE: 0.16 MedAPE: 0.1060 MSE:590570.5414241096
Model saved to ../models/idealista18_madrid_randomforest_UNITPRICE_estimators-100_depth-5.pickle (4336.62 KB)


0,1
mae,▁
mape,▁
max_depth,▁
medape,▁
mse,▁
n_estimators,▁
n_test,▁
n_train,▁
r2,▁
random_state,▁

0,1
mae,530.93472
mape,0.16151
max_depth,5.0
medape,0.10601
mse,590570.54142
n_estimators,100.0
n_test,18963.0
n_train,75851.0
r2,0.79412
random_state,42.0


100 estimators, max depth 10
MAPE: 0.16 MedAPE: 0.1060 MSE:590570.5414241096
Model saved to ../models/idealista18_madrid_randomforest_UNITPRICE_estimators-100_depth-10.pickle (4336.62 KB)


0,1
mae,▁
mape,▁
max_depth,▁
medape,▁
mse,▁
n_estimators,▁
n_test,▁
n_train,▁
r2,▁
random_state,▁

0,1
mae,530.93472
mape,0.16151
max_depth,10.0
medape,0.10601
mse,590570.54142
n_estimators,100.0
n_test,18963.0
n_train,75851.0
r2,0.79412
random_state,42.0


Ahora descargamos el artefacto desde el proyecto, tomamos la última versión del artefacto

In [34]:
import joblib

tag = "latest"  # or any specific tag you want to use

model_artifact_name = 'idealista18_madrid_random_forest'

with wandb.init(project=PROJECT) as run:
    # Load the artifact from wandb and run inference on X_test
    artifact_path = f"{model_artifact_name}:{tag}"  # Use the artifact name and tag
    artifact = run.use_artifact(artifact_path, type="model")
    artifact_dir = artifact.download()
    
    files = os.listdir(artifact_dir)
    model_filename = files[0]  
    print(f"Model file found: {model_filename}")

    loaded_model = joblib.load(os.path.join(artifact_dir, model_filename))

    # Run inference
    loaded_preds = loaded_model.predict(X_test)
    print("Inference complete. Example predictions:", loaded_preds[:5])


[34m[1mwandb[0m:   1 of 1 files downloaded.  


Model file found: idealista18_madrid_randomforest_UNITPRICE_estimators-100_depth-10.pickle
Inference complete. Example predictions: [1764.98531117 4718.75693724 5796.55918938 1744.09998795 4626.98169403]


# Simple regression Tree

In [37]:
import unidecode as unidecode
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error, median_absolute_error
from sklearn.tree import DecisionTreeRegressor

TEAM_ENTITY = "mds9"  # Replace with your team entity
ALGORITHM = "Decision Tree"
CITY = "Madrid"
DATASET = "idealista18"
algorithm_slug = unidecode.unidecode(ALGORITHM).lower().replace(" ", "")
PROJECT = f"{DATASET}_{CITY.lower()}_{algorithm_slug}_{TARGET_COLUMN}_{TEAM_ENTITY.lower()}"


# 1) Grab the W&B token
wandb_token = os.getenv("WANDB_API_KEY")
if wandb_token is None:
    raise RuntimeError("WANDB_API_KEY not found in environment. Did you create a .env with that variable?")

# 2) Login with the token
wandb.login(key=wandb_token)

for max_depth in [5, 10, 30]:

        # 3) Train some models
        with wandb.init(project=PROJECT, config={"algorithm": ALGORITHM,
                                                "target": TARGET_COLUMN,
                                                "city": CITY,
                                                "dataset": DATASET }) as run:

            #
            # Create a grid search for hyperparameters
            #
                random_state=42
                print(f"{n_estimators} estimators, max depth {max_depth}")
                model = DecisionTreeRegressor(max_depth=max_depth, random_state=random_state)
                model.fit(X_train, y_train)
                preds = model.predict(X_test)
                mse = mean_squared_error(y_test, preds)
                r2 = r2_score(y_test, preds)
                mae = mean_absolute_error(y_test, preds)
                mape = mean_absolute_percentage_error(y_test, preds)
                medape = median_absolute_error(y_test, preds) / y_test.median()

                print(f"MAPE: {mape:.2f} MedAPE: {medape:.4f} MSE:{mse}")

                run.log({
                    "mse": mse,
                    "r2": r2,
                    "mape": mape,
                    "medape": medape,
                    "mae": mae,
                    "random_state": random_state,
                    'n_train': len(X_train),
                    'n_test': len(X_test),
                    "n_estimators": n_estimators,
                    "max_depth": max_depth, 
                })

                # Create a model filename fingerprint
                
                model_fp = f"depth-{max_depth}"
                model_artifact_name = f"{DATASET}_{CITY.lower()}_{algorithm_slug}_{TARGET_COLUMN}_{model_fp}"

                model_filename = f"../models/{model_artifact_name}.pickle"
                joblib.dump(model, model_filename, compress=('gzip', 3))
                model_size = os.path.getsize(model_filename)
                print(f"Model saved to {model_filename} ({model_size / 1024:.2f} KB)")

                # Guardamos el modelo en el repositorio de modelos de W&B = idealista18_madrid_random_forest
                model_artifact_name = f'idealista18_madrid_{ALGORITHM.lower().replace(" ", "_")}'

                artifact = wandb.Artifact(name=model_artifact_name, 
                                        type="model", 
                                        description=f"{ALGORITHM} model for Madrid housing prices")

                artifact.metadata["tags"] = [model_fp, CITY, DATASET]
                artifact.metadata["aliases"] = [model_fp]
                artifact.add_file(local_path=model_filename)
                artifact.save()
                run.log_artifact(artifact, aliases=["latest", "production", model_fp])

                df = pd.DataFrame({
                    "actual_price": y_test,
                    "predicted_price": preds,
                    "error": abs(y_test - preds),            # e.g. point‐size or color
                })
                df = wandb.Table(dataframe=df)

                # 1) Simple scatter with axis labels and a color dimension (error)
                run.log({
                    "price_scatter": wandb.plot.scatter(
                        df,
                        x="actual_price",
                        y="predicted_price",
                        title="Predicted vs Actual Price",
                        #xname="Actual (EUR)",
                        #yname="Predicted (EUR)"
                    )
                })

                # 2) If you want to color‐code each point by “error”:
                run.log({
                    "price_scatter_colored": wandb.plot.scatter(
                        df,
                        x="actual_price",
                        y="predicted_price",
                        title="Predicted vs Actual Price (colored by error)",
                        #xname="Actual (EUR)",
                        #yname="Predicted (EUR)",
                        #color="error",    # column in df
                        #colorscale="Viridis"  # any Plotly colorscale name
                    )
                })


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/davidreyblanco/.netrc


100 estimators, max depth 5
MAPE: 0.22 MedAPE: 0.1496 MSE:974779.535222676
Model saved to ../models/idealista18_madrid_decisiontree_UNITPRICE_depth-5.pickle (3.06 KB)


0,1
mae,▁
mape,▁
max_depth,▁
medape,▁
mse,▁
n_estimators,▁
n_test,▁
n_train,▁
r2,▁
random_state,▁

0,1
mae,714.26429
mape,0.21785
max_depth,5.0
medape,0.14956
mse,974779.53522
n_estimators,100.0
n_test,18963.0
n_train,75851.0
r2,0.66018
random_state,42.0


100 estimators, max depth 10
MAPE: 0.17 MedAPE: 0.1143 MSE:680536.3614929119
Model saved to ../models/idealista18_madrid_decisiontree_UNITPRICE_depth-10.pickle (45.71 KB)


0,1
mae,▁
mape,▁
max_depth,▁
medape,▁
mse,▁
n_estimators,▁
n_test,▁
n_train,▁
r2,▁
random_state,▁

0,1
mae,570.86571
mape,0.17215
max_depth,10.0
medape,0.1143
mse,680536.36149
n_estimators,100.0
n_test,18963.0
n_train,75851.0
r2,0.76275
random_state,42.0


100 estimators, max depth 30
MAPE: 0.15 MedAPE: 0.0766 MSE:699292.9996179165
Model saved to ../models/idealista18_madrid_decisiontree_UNITPRICE_depth-30.pickle (2348.33 KB)


0,1
mae,▁
mape,▁
max_depth,▁
medape,▁
mse,▁
n_estimators,▁
n_test,▁
n_train,▁
r2,▁
random_state,▁

0,1
mae,504.72104
mape,0.1512
max_depth,30.0
medape,0.07664
mse,699292.99962
n_estimators,100.0
n_test,18963.0
n_train,75851.0
r2,0.75622
random_state,42.0


# Simple regression

In [38]:
import unidecode as unidecode
import joblib
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error, median_absolute_error
from sklearn.linear_model import LinearRegression

TEAM_ENTITY = "mds9"  # Replace with your team entity
ALGORITHM = "Multiple Regression"
CITY = "Madrid"
DATASET = "idealista18"
algorithm_slug = unidecode.unidecode(ALGORITHM).lower().replace(" ", "")
PROJECT = f"{DATASET}_{CITY.lower()}_{algorithm_slug}_{TARGET_COLUMN}_{TEAM_ENTITY.lower()}"


# 1) Grab the W&B token
wandb_token = os.getenv("WANDB_API_KEY")
if wandb_token is None:
    raise RuntimeError("WANDB_API_KEY not found in environment. Did you create a .env with that variable?")

# 2) Login with the token
wandb.login(key=wandb_token)


    # 3) Train some models
with wandb.init(project=PROJECT, config={"algorithm": ALGORITHM,
                                                "target": TARGET_COLUMN,
                                                "city": CITY,
                                                "dataset": DATASET }) as run:

            #
            # Create a grid search for hyperparameters
            #
                random_state=42
                print(f"{n_estimators} estimators, max depth {max_depth}")
                model = LinearRegression()
                # Fill NA values in X_train and X_test with column means
                X_train_lr = X_train.fillna(X_train.mean())
                X_test_lr = X_test.fillna(X_train.mean())
                model.fit(X_train_lr, y_train)
                preds = model.predict(X_test_lr)
                mse = mean_squared_error(y_test, preds)
                r2 = r2_score(y_test, preds)
                mae = mean_absolute_error(y_test, preds)
                mape = mean_absolute_percentage_error(y_test, preds)
                medape = median_absolute_error(y_test, preds) / y_test.median()

                print(f"MAPE: {mape:.2f} MedAPE: {medape:.4f} MSE:{mse}")

                run.log({
                    "mse": mse,
                    "r2": r2,
                    "mape": mape,
                    "medape": medape,
                    "mae": mae,
                    "random_state": random_state,
                    'n_train': len(X_train),
                    'n_test': len(X_test),
                    "n_estimators": n_estimators,
                    "max_depth": max_depth, 
                })

                # Create a model filename fingerprint
                
                model_fp = f"plain"
                model_artifact_name = f"{DATASET}_{CITY.lower()}_{algorithm_slug}_{TARGET_COLUMN}_{model_fp}"

                model_filename = f"../models/{model_artifact_name}.pickle"
                joblib.dump(model, model_filename, compress=('gzip', 3))
                model_size = os.path.getsize(model_filename)
                print(f"Model saved to {model_filename} ({model_size / 1024:.2f} KB)")

                # Guardamos el modelo en el repositorio de modelos de W&B
                model_artifact_name = f'idealista18_madrid_{ALGORITHM.lower().replace(" ", "_")}'

                artifact = wandb.Artifact(name=model_artifact_name, 
                                        type="model", 
                                        description=f"{ALGORITHM} model for Madrid housing prices")
                artifact.metadata["tags"] = [model_fp, CITY, DATASET]
                artifact.metadata["aliases"] = [model_fp]
                artifact.add_file(local_path=model_filename)
                artifact.save()
                run.log_artifact(artifact, aliases=["latest", "production", model_fp])

                df = pd.DataFrame({
                    "actual_price": y_test,
                    "predicted_price": preds,
                    "error": abs(y_test - preds),            # e.g. point‐size or color
                })
                df = wandb.Table(dataframe=df)

                # 1) Simple scatter with axis labels and a color dimension (error)
                run.log({
                    "price_scatter": wandb.plot.scatter(
                        df,
                        x="actual_price",
                        y="predicted_price",
                        title="Predicted vs Actual Price",
                        #xname="Actual (EUR)",
                        #yname="Predicted (EUR)"
                    )
                })

                # 2) If you want to color‐code each point by “error”:
                run.log({
                    "price_scatter_colored": wandb.plot.scatter(
                        df,
                        x="actual_price",
                        y="predicted_price",
                        title="Predicted vs Actual Price (colored by error)",
                        #xname="Actual (EUR)",
                        #yname="Predicted (EUR)",
                        #color="error",    # column in df
                        #colorscale="Viridis"  # any Plotly colorscale name
                    )
                })

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /Users/davidreyblanco/.netrc


100 estimators, max depth 30
MAPE: 0.26 MedAPE: 0.1764 MSE:1081204.6200973417
Model saved to ../models/idealista18_madrid_multipleregression_UNITPRICE_plain.pickle (1.44 KB)


0,1
mae,▁
mape,▁
max_depth,▁
medape,▁
mse,▁
n_estimators,▁
n_test,▁
n_train,▁
r2,▁
random_state,▁

0,1
mae,780.07291
mape,0.25674
max_depth,30.0
medape,0.17636
mse,1081204.6201
n_estimators,100.0
n_test,18963.0
n_train,75851.0
r2,0.62307
random_state,42.0
