In [18]:
import mlflow
import pandas as pd
import mlflow.sklearn
from urllib.parse import urlparse
from mlflow.models import infer_signature
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split,GridSearchCV


### Describtion: Applying Random forest regressor for housing calefronia 

### Project Structure
- Dataset and hyperparameters grid Prepration
- Model and Grid Search
- MLflow setting then tracking

In [6]:
housing_dataset = fetch_california_housing()
housing_dataset.keys()

dict_keys(['data', 'target', 'frame', 'target_names', 'feature_names', 'DESCR'])

In [13]:
data = pd.DataFrame(housing_dataset.data, columns=housing_dataset.feature_names)
data['Price'] = housing_dataset.target

X = data.drop(columns=['Price'])
y = data["Price"]

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20)

# Set mlflow model input/output signitur:
signature = infer_signature(X_train, y_train)

param_grid = {
    'n_estimators': [100, 200],
    'max_depth': [5, 10, None],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}

In [26]:
def tuning_model_parameters(X_train, y_train, param_grid):
    
    rf = RandomForestRegressor()

    grid_search = GridSearchCV(estimator=rf, param_grid=param_grid, \
                                cv=3, n_jobs=-1, verbose= 2, scoring="neg_mean_squared_error")
    
    grid_search.fit(X_train, y_train)

    return grid_search



In [None]:
# ML Experiement Setting
with mlflow.start_run():
    
    # Search > best model > evaluate > logging > tracking uri

    # Grid Search and Best Model.

    grid_search = tuning_model_parameters(X_train, y_train, param_grid)

    best_model = grid_search.best_estimator_

    # Model Evaluate

    y_pred = best_model.predict(X_test)

    mse = mean_squared_error(y_true=y_test, y_pred=y_pred)


    # Log metrics and parameters
    mlflow.log_param("best_n_estimators", grid_search.best_params_["n_estimators"])
    mlflow.log_param("best_max_depth", grid_search.best_params_["max_depth"])
    mlflow.log_param("best_min_samples_split", grid_search.best_params_["min_samples_split"])
    mlflow.log_param("best_min_samples_leaf", grid_search.best_params_["min_samples_leaf"])

    mlflow.log_param("mse", mse)

    # URI Tracking

    mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")

    # Register the model into mlflow if it is not already registered by checking the response

    tracking_url_type_score = urlparse( mlflow.get_tracking_uri()).scheme

    if tracking_url_type_score != 'file':
        mlflow.sklearn.log_model(best_model, "best_rf_model", registered_model_name="Best RandomForest Model")

    else:
        mlflow.sklearn.log_model(best_model, "best_rf_model", signature=signature)


    print(f"Best Hyperparameters: {grid_search.best_params_}")
    print(f"Mean Squared Error: {mse}")

    

Successfully registered model 'Best RandomForest Model'.
2025/07/29 09:20:13 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: Best RandomForest Model, version 1


Best Hyperparameters: {'max_depth': None, 'min_samples_leaf': 2, 'min_samples_split': 2, 'n_estimators': 100}
Mean Squared Error: 0.25240934790254815
🏃 View run blushing-boar-205 at: http://127.0.0.1:5000/#/experiments/0/runs/fc926474297c4f7682d023a5a08516d5
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0


Created version '1' of model 'Best RandomForest Model'.


In [34]:
print("Tracking URI:", mlflow.get_tracking_uri())

Tracking URI: http://127.0.0.1:5000


In [None]:
# Printing Last experieement infos

last_active_run = mlflow.last_active_run()
print("Run ID:", last_active_run.info.run_id)
print("Experiment ID:", last_active_run.info.experiment_id)
print("Run Name:", last_active_run.data.tags.get("mlflow.runName", "No name"))
print("Status:", last_active_run.info.status)
print("Start Time:", last_active_run.info.start_time)
print("Artifact URI:", last_active_run.info.artifact_uri)



Run ID: fc926474297c4f7682d023a5a08516d5
Experiment ID: 0
Run Name: blushing-boar-205
Status: FINISHED
Start Time: 1753770011066
Artifact URI: file:///home/eman/Documents/Projects/mlops-krish/HousePricingMLflow/mlruns/0/fc926474297c4f7682d023a5a08516d5/artifacts
