### 1. Set up for all Runs
#### • Importing libraries
#### • Load dataset
#### • Preview dataset
#### • Set experiment 


In [1]:
import os, pandas as pd, numpy as np, mlflow, mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from mlflow.models.signature import infer_signature

mlruns_path = "/Users/zahra/Desktop/ChronusMLOps/mlruns"
os.makedirs(mlruns_path, exist_ok=True)
mlflow.set_tracking_uri(f"file://{mlruns_path}")
mlflow.set_experiment("Baghdad-Baseline-Models")

CLEAN_PATH = "../data/interim/rent_v3.csv"
df = pd.read_csv(CLEAN_PATH)
print("✅ rent_v3 dataset loaded")
display(df.head())

parent = mlflow.start_run(run_name="Baghdad-Experimental-Models")
PARENT_ID = parent.info.run_id          
mlflow.set_tag("clean_version", "v3")   
print("Parent run:", PARENT_ID)


✅ rent_v3 dataset loaded


Unnamed: 0,ID,listing_date,type,category,Bedrooms,bathrooms,sqmt_living,floors,condition,furnished,year_built,area_type,floor_apartment,county,view,final_rent_price_usd
0,1,2024-07-27,adu,rent,3.0,1.0,120.0,1.0,old,no,,unknown,,mansour,,3000.0
1,2,2024-07-27,house,rent,3.0,1.0,100.0,,good,,,unknown,,mansour,corner,2000.0
2,3,2024-07-27,house,rent,5.0,3.0,600.0,2.0,good,no,,unknown,,mansour,highway,3000.0
3,4,2024-07-26,apartment,rent,1.0,1.0,90.0,1.0,good,fully,,unknown,,mansour,,1200.0
4,5,2024-07-28,house,rent,3.0,3.0,100.0,2.0,new,half,2023.0,unknown,,mansour,neighbourhood,4000.0


Parent run: 8136214461fe4623bbdb5d3c5f10701f


### Run #1 – Linear Regression 
**Dataset**  
`rent_v3.csv` – Baghdad Mansour rentals (clean version 3)

**Pre-processing**  
- Dropped columns: `ID`, `listing_date`  
- Numeric NaNs → column mean  
- Categorical NaNs → mode  
- One-hot encoding (`drop_first=True`)

**Train / Test**  
- 80 % / 20 % split (`random_state=42`)

**Model**  
- `LinearRegression()` (scikit-learn defaults)

**Logged to MLflow**  
- Params: model name, `train_size`, feature count, test_size  
- Metrics: RMSE, R²  
- Artifact: fitted model with signature  

> *Run name in MLflow:* **baseline_lr_v3**  
> *Experiment:* **Baghdad-Baseline-Models**


In [3]:

X = df.drop(columns=["ID", "listing_date", "final_rent_price_usd"], errors="ignore")
y = df["final_rent_price_usd"].fillna(df["final_rent_price_usd"].mean())

numeric_cols = X.select_dtypes(include="number").columns
X[numeric_cols] = X[numeric_cols].fillna(X[numeric_cols].mean())

categorical_cols = X.select_dtypes(include="object").columns
for col in categorical_cols:
    X[col] = X[col].fillna(X[col].mode()[0])

X = pd.get_dummies(X, drop_first=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

with mlflow.start_run(run_name="baseline_lr_v3", nested=True) as run:
    model = LinearRegression()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)
    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2_score", r2)

    signature = infer_signature(X_train, model.predict(X_train))
    mlflow.sklearn.log_model(model, "model", signature=signature, input_example=X_train.head(1))

    mlflow.log_param("model", "LinearRegression"),
    mlflow.log_param("train_size", len(X_train)),
    mlflow.log_param("features", len(X.columns)),
    mlflow.log_param("num_impute", "mean"),
    mlflow.log_param("cat_impute", "mode"),
    mlflow.set_tag("clean_version", "v3"),

    print("\n Run logged to MLflow:", run.info.run_id)
    print("Metrics:\n  RMSE:", rmse, "\n  R2:", r2)



 Run logged to MLflow: f128035a71ca43f98cebca6562be171f
Metrics:
  RMSE: 1368.4303794291538 
  R2: 0.4726745535235133


## Run #2 – Random Forest Regressor (clean v3)

**Dataset**: `rent_v3.csv`  
**Changes vs. Run #1**:  
- Model: `RandomForestRegressor`  
- Parameters: `n_estimators=200`, `max_depth=10`  

> *Logged as a nested MLflow run under:* `baseline_models_v3_parent`


In [4]:
from sklearn.ensemble import RandomForestRegressor

X = df.drop(columns=["ID", "listing_date", "final_rent_price_usd"], errors="ignore")
y = df["final_rent_price_usd"].fillna(df["final_rent_price_usd"].mean())

num_cols = X.select_dtypes(include="number").columns
X[num_cols] = X[num_cols].fillna(X[num_cols].mean())

cat_cols = X.select_dtypes(include="object").columns
for col in cat_cols:
    X[col] = X[col].fillna(X[col].mode()[0])

X = pd.get_dummies(X, drop_first=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

n_estimators = 200
max_depth = 10

with mlflow.start_run(run_name="rf_baseline_v3", nested=True) as run:
    model = RandomForestRegressor(
        n_estimators=n_estimators,
        max_depth=max_depth,
        random_state=42,
    )

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2_score", r2)

    signature = infer_signature(X_train, model.predict(X_train))
    mlflow.sklearn.log_model(model, "model", signature=signature, input_example=X_train.head(1))

    mlflow.log_param("model", "RandomForestRegressor")
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)
    mlflow.log_param("test_size", 0.2)
    mlflow.log_param("n_features", X.shape[1])
    mlflow.log_param("num_impute", "mean")
    mlflow.log_param("cat_impute", "mode")

    mlflow.set_tag("clean_version", "v3")

    print(f" Child run logged ({run.info.run_id}) — RMSE={rmse:.2f}, R²={r2:.2f}")


🔹 Child run logged (c870c911775448f1a99e3c9210b65871) — RMSE=1628.02, R²=0.30


## Run #3 – Random Forest Regressor (parameter tuning, clean v3)

**Dataset**: `rent_v3.csv`  
**Changes vs. Run #2**:  
- Increased `n_estimators` to 500  
- Removed `max_depth` restriction  
- Set `min_samples_leaf` to 5  

Goal: Allow trees to grow deeper and stabilize splits.


In [5]:
from sklearn.ensemble import RandomForestRegressor

X = df.drop(columns=["ID", "listing_date", "final_rent_price_usd"], errors="ignore")
y = df["final_rent_price_usd"].fillna(df["final_rent_price_usd"].mean())

num_cols = X.select_dtypes(include="number").columns
X[num_cols] = X[num_cols].fillna(X[num_cols].mean())

cat_cols = X.select_dtypes(include="object").columns
for col in cat_cols:
    X[col] = X[col].fillna(X[col].mode()[0])

X = pd.get_dummies(X, drop_first=True)

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

n_estimators = 500
max_depth = None
min_samples_leaf = 5

with mlflow.start_run(run_name="rf_tuned_v3", nested=True) as run:
    model = RandomForestRegressor(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_leaf=min_samples_leaf,
        random_state=42,
    )

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2_score", r2)

    signature = infer_signature(X_train, model.predict(X_train))
    mlflow.sklearn.log_model(model, "model", signature=signature, input_example=X_train.head(1))

    mlflow.log_param("model", "RandomForestRegressor")
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)
    mlflow.log_param("min_samples_leaf", min_samples_leaf)
    mlflow.log_param("test_size", 0.2)
    mlflow.log_param("n_features", X.shape[1])
    mlflow.log_param("num_impute", "mean")
    mlflow.log_param("cat_impute", "mode")

    mlflow.set_tag("clean_version", "v3")

    print(f"Child run logged ({run.info.run_id}) — RMSE={rmse:.2f}, R²={r2:.2f}")


🔹 Child run logged (635c29316d7449d481af0147100d5199) — RMSE=1670.93, R²=0.27


## Run #4 – Linear Regression (clean v4)

**Dataset**: `rent_v4.csv`  
**Changes vs. previous runs**:  
- Used cleaned version v4 (simplified cleaning)  
- Linear Regression model (default settings)  
- Train/Test split: 80/20

Goal: Check if better cleaning improves baseline performance.


In [7]:
df = pd.read_csv("../data/interim/rent_v4.csv")

X = df.drop(columns=["final_rent_price_usd"], errors="ignore")
y = df["final_rent_price_usd"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

with mlflow.start_run(run_name="lr_baseline_v4", nested=True) as run:
    model = LinearRegression()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2_score", r2)

    signature = infer_signature(X_train, model.predict(X_train))
    mlflow.sklearn.log_model(
        model, 
        "model",
        signature=signature,
        input_example=X_train.head(1)
    )

    mlflow.log_param("model", "LinearRegression")
    mlflow.log_param("test_size", 0.2)
    mlflow.log_param("n_features", X.shape[1])
    mlflow.set_tag("clean_version", "v4")

    print(f"Child run logged ({run.info.run_id}) — RMSE={rmse:.2f}, R²={r2:.2f}")


Child run logged (1ac03ed5a5d745599169a9f29588c851) — RMSE=1615.05, R²=0.31


## Run #5 – Random Forest Regressor after Outlier Removal (clean v5)

**Dataset**: `rent_v5.csv`  
**Changes vs. previous runs**:  
- Outliers removed (top 1% and bottom 1% prices, top 1% sqmt_living)
- Random Forest Regressor model
- Parameters tuned:
  - `n_estimators=500`
  - `max_depth=None`
  - `min_samples_leaf=5`

Goal: Train a more stable Random Forest model after cleaning extreme entries.


In [8]:
from sklearn.ensemble import RandomForestRegressor

df = pd.read_csv("../data/interim/rent_v5.csv")

X = df.drop(columns=["final_rent_price_usd"], errors="ignore")
y = df["final_rent_price_usd"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

n_estimators = 500
max_depth = None
min_samples_leaf = 5

with mlflow.start_run(run_name="rf_outlier_removed_v5", nested=True) as run:
    model = RandomForestRegressor(
        n_estimators=n_estimators,
        max_depth=max_depth,
        min_samples_leaf=min_samples_leaf,
        random_state=42,
    )

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2_score", r2)

    signature = infer_signature(X_train, model.predict(X_train))
    mlflow.sklearn.log_model(
        model,
        "model",
        signature=signature,
        input_example=X_train.head(1)
    )

    mlflow.log_param("model", "RandomForestRegressor")
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)
    mlflow.log_param("min_samples_leaf", min_samples_leaf)
    mlflow.log_param("test_size", 0.2)
    mlflow.log_param("n_features", X.shape[1])
    mlflow.set_tag("clean_version", "v5")

    print(f"🔹 Child run logged ({run.info.run_id}) — RMSE={rmse:.2f}, R²={r2:.2f}")


🔹 Child run logged (611c0777cada4dc1884d280fd11f9dc0) — RMSE=1007.66, R²=0.23


## Run #6 – Linear Regression (after Outlier Removal, clean v5)

**Dataset**: `rent_v5.csv`  
**Changes vs. Run #4**:  
- Using cleaned dataset with outliers removed (v5)
- Simpler Linear Regression model
- Train/Test split: 80/20

Goal: Test if simple models work better after cleaning compared to complex ones.


In [9]:
df = pd.read_csv("../data/interim/rent_v5.csv")

X = df.drop(columns=["final_rent_price_usd"], errors="ignore")
y = df["final_rent_price_usd"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

with mlflow.start_run(run_name="lr_outlier_removed_v5", nested=True) as run:
    model = LinearRegression()
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2_score", r2)

    signature = infer_signature(X_train, model.predict(X_train))
    mlflow.sklearn.log_model(
        model,
        "model",
        signature=signature,
        input_example=X_train.head(1)
    )

    mlflow.log_param("model", "LinearRegression")
    mlflow.log_param("test_size", 0.2)
    mlflow.log_param("n_features", X.shape[1])
    mlflow.set_tag("clean_version", "v5")

    print(f"🔹 Child run logged ({run.info.run_id}) — RMSE={rmse:.2f}, R²={r2:.2f}")


🔹 Child run logged (a06e42baa69b40308994fab35694d6b3) — RMSE=942.83, R²=0.33


## Run #7 – Linear Regression with StandardScaler (clean v5)

**Dataset**: `rent_v5.csv`  
**Changes vs. Run #6**:  
- Standardized all features using `StandardScaler`
- Re-trained Linear Regression on scaled features

Goal: Improve performance by making feature scales uniform.


In [11]:
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import Pipeline

df = pd.read_csv("../data/interim/rent_v5.csv")

X = df.drop(columns=["final_rent_price_usd"], errors="ignore")
y = df["final_rent_price_usd"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("model", LinearRegression())
])

with mlflow.start_run(run_name="lr_scaled_v5", nested=True) as run:
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2_score", r2)

    signature = infer_signature(X_train, pipeline.predict(X_train))
    mlflow.sklearn.log_model(
        pipeline,
        "model",
        signature=signature,
        input_example=X_train.head(1)
    )

    mlflow.log_param("model", "LinearRegression + StandardScaler")
    mlflow.log_param("test_size", 0.2)
    mlflow.log_param("n_features", X.shape[1])
    mlflow.set_tag("clean_version", "v5")

    print(f"🔹 Child run logged ({run.info.run_id}) — RMSE={rmse:.2f}, R²={r2:.2f}")


🔹 Child run logged (a9ad534cab2243678134ad5a37c300a3) — RMSE=937.43, R²=0.33


## Run #8 – Ridge Regression with StandardScaler (clean v5)

**Dataset**: `rent_v5.csv`  
**Changes vs. Run #7**:  
- Replaced Linear Regression with Ridge Regression (`alpha=1.0`)
- StandardScaler still applied
- Train/Test split: 80/20

Goal: See if light regularization improves RMSE/R² after scaling.


In [18]:
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

df = pd.read_csv("../data/interim/rent_v5.csv")

X = df.drop(columns=["final_rent_price_usd"], errors="ignore")
y = df["final_rent_price_usd"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

alpha = 1.0    

pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("model", Ridge(alpha=alpha, random_state=42))
])

with mlflow.start_run(run_name="ridge_scaled_v5", nested=True) as run:
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2_score", r2)

    signature = infer_signature(X_train, pipeline.predict(X_train))
    mlflow.sklearn.log_model(
        pipeline,
        "model",
        signature=signature,
        input_example=X_train.head(1)
    )

    mlflow.log_param("model", "Ridge Regression + StandardScaler")
    mlflow.log_param("alpha", alpha)
    mlflow.log_param("test_size", 0.2)
    mlflow.log_param("n_features", X.shape[1])
    mlflow.set_tag("clean_version", "v5")

    print(f"🔹 Child run logged ({run.info.run_id}) — RMSE={rmse:.2f}, R²={r2:.2f}")


🔹 Child run logged (f00483716ff34d5ea0e1ed2bf41fece2) — RMSE=934.71, R²=0.34


## Run #9 – Ridge Regression with StandardScaler (clean v6)

**Dataset**: `rent_v6.csv`  
**Changes vs. Run #8**:  
- Dropped extra noisy columns: `floor_apartment`, `listing_date`, `condition`, `view`
- Continued to apply StandardScaler
- Ridge Regression (`alpha=1.0`)

Goal: Check if extra data cleaning improves RMSE and R².


In [15]:
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import mlflow
import mlflow.sklearn
from mlflow.models.signature import infer_signature
import pandas as pd

df = pd.read_csv("../data/interim/rent_v6.csv")

X = df.drop(columns=["final_rent_price_usd"], errors="ignore")
y = df["final_rent_price_usd"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

alpha = 1.0    

pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("model", Ridge(alpha=alpha, random_state=42))
])

with mlflow.start_run(run_name="ridge_scaled_v6", nested=True) as run:
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2_score", r2)

    signature = infer_signature(X_train, pipeline.predict(X_train))
    mlflow.sklearn.log_model(
        pipeline,
        "model",
        signature=signature,
        input_example=X_train.head(1)
    )

    mlflow.log_param("model", "Ridge Regression + StandardScaler")
    mlflow.log_param("alpha", alpha)
    mlflow.log_param("test_size", 0.2)
    mlflow.log_param("n_features", X.shape[1])
    mlflow.set_tag("clean_version", "v6")

    print(f"🔹 Child run logged ({run.info.run_id}) — RMSE={rmse:.2f}, R²={r2:.2f}")


🔹 Child run logged (4b4b6a6128544336bd250e3a5283f8d2) — RMSE=958.86, R²=0.30


## Run #10 – Linear Regression with StandardScaler (clean v6)

**Dataset**: `rent_v6.csv`  
**Changes vs. Run #7**:  
- Used cleaned version v6 (extra columns dropped)
- Applied StandardScaler
- Linear Regression model (no regularization)

Goal: Check if simple scaling and linear model outperform Ridge after heavy cleaning.


In [25]:
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import mlflow
import mlflow.sklearn
from mlflow.models.signature import infer_signature
import pandas as pd

df = pd.read_csv("../data/interim/rent_v6.csv")

X = df.drop(columns=["final_rent_price_usd"], errors="ignore")
y = df["final_rent_price_usd"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("model", LinearRegression())
])

with mlflow.start_run(run_name="lr_scaled_v6", nested=True) as run:
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2_score", r2)

    signature = infer_signature(X_train, pipeline.predict(X_train))
    mlflow.sklearn.log_model(
        pipeline,
        "model",
        signature=signature,
        input_example=X_train.head(1)
    )

    mlflow.log_param("model", "LinearRegression + StandardScaler")
    mlflow.log_param("test_size", 0.2)
    mlflow.log_param("n_features", X.shape[1])
    mlflow.set_tag("clean_version", "v6")

    print(f"🔹 Run complete — RMSE: {rmse:.2f}, R²: {r2:.2f}")
    print(f"🔁 Run ID: {run.info.run_id}")


🔹 Run complete — RMSE: 961.42, R²: 0.30
🔁 Run ID: 4d12bedbe2e34d84b7a52d13e67a1163


## Final Retraining – Ridge Regression on rent_v7.csv

- Dataset: `rent_v7.csv` (listing dates removed)
- Model: Ridge Regression with StandardScaler
- Features: Simplified feature set for stable prediction and easy deployment
- Goal: Create a final, deployable model with minimal input complexity


In [27]:
from sklearn.linear_model import Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from mlflow.models.signature import infer_signature
import mlflow
import mlflow.sklearn
import pandas as pd

df = pd.read_csv("../data/interim/rent_v7.csv")

X = df.drop(columns=["final_rent_price_usd"])
y = df["final_rent_price_usd"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

pipeline = Pipeline([
    ("scaler", StandardScaler()),
    ("model", Ridge(alpha=1.0, random_state=42))
])

with mlflow.start_run(run_name="ridge_clean_v7", nested=True) as run:
    pipeline.fit(X_train, y_train)
    y_pred = pipeline.predict(X_test)

    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    mlflow.log_metric("rmse", rmse)
    mlflow.log_metric("r2_score", r2)

    signature = infer_signature(X_train, pipeline.predict(X_train))
    mlflow.sklearn.log_model(
        pipeline,
        artifact_path="model",
        signature=signature,
        input_example=X_train.head(1)
    )

    mlflow.log_param("model", "Ridge")
    mlflow.log_param("alpha", 1.0)
    mlflow.log_param("n_features", X.shape[1])
    mlflow.set_tag("clean_version", "v7 (no dates)")

    print(f"🔁 Run ID: {run.info.run_id}")
    print(f"📊 RMSE = {rmse:.2f}  |  R² = {r2:.2f}")


🔁 Run ID: fd0954aceff7401c8ed535e335b259de
📊 RMSE = 885.62  |  R² = 0.41


In [None]:
mlflow.end_run() 
print("Parent run closed.")