In [6]:
import mlflow
import mlflow.sklearn
from mlflow.models import infer_signature
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_diabetes
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

# Where to save info
mlflow.set_tracking_uri("file:./mlruns")

# set the experiment id
mlflow.set_experiment("Diabetes_RF_Test")

#mlflow.autolog()
db = load_diabetes()

X_train, X_test, y_train, y_test = train_test_split(db.data, db.target)

n_estimators = 110
max_depth = 3
max_features = 2

# Start a run
with mlflow.start_run():
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)
    mlflow.log_param("max_features", max_features)
    
    # Create and train model
    rf = RandomForestRegressor(n_estimators=n_estimators, max_depth=max_depth, max_features=max_features)
    rf.fit(X_train, y_train)

    # Log model with signature and input example
    signature = infer_signature(X_train, rf.predict(X_train))
    mlflow.sklearn.log_model(
        rf, 
        name="random_forest_model",
        signature=signature,
        input_example=X_train[:5],  # First 5 rows as example
        registered_model_name="diabetes_neural_network"
    )

    # Calculate training predictions
    training_predictions = rf.predict(X_train)
    # Calculate regression metrics
    training_mse = mean_squared_error(y_train, training_predictions)
    training_rmse = np.sqrt(training_mse)
    training_mae = mean_absolute_error(y_train, training_predictions)
    training_r2 = r2_score(y_train, training_predictions)
    training_mape = np.mean(np.abs((y_train - training_predictions) / y_train)) * 100
    
    # Make test predictions
    predictions = rf.predict(X_test)    
    # Calculate regression metrics
    mse = mean_squared_error(y_test, predictions)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    mape = np.mean(np.abs((y_test - predictions) / y_test)) * 100
    
    # Log metrics to MLflow
    mlflow.log_metric("training_mean_squared_error", round(training_mse, 2))
    mlflow.log_metric("training_root_mean_squared_error", round(training_rmse, 2))
    mlflow.log_metric("training_mean_absolute_error", round(training_mae, 2))
    mlflow.log_metric("training_r2_score", round(training_r2, 2))
    mlflow.log_metric("training_mape", round(training_mape,2))
    mlflow.log_metric("test_mse", round(mse,2))
    mlflow.log_metric("test_rmse", round(rmse,2))
    mlflow.log_metric("test_mae", round(mae,2))
    mlflow.log_metric("test_r2_score", round(r2,2))
    mlflow.log_metric("test_mape", round(mape,2))
    
    # Print results
    print(f"Mean Squared Error: {mse:.2f}")
    print(f"Root Mean Squared Error: {rmse:.2f}")
    print(f"Mean Absolute Error: {mae:.2f}")
    print(f"R² Score: {r2:.3f}")
    print(f"Mean Absolute Percentage Error: {mape:.2f}%")


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Registered model 'diabetes_neural_network' already exists. Creating a new version of this model...
Created version '6' of model 'diabetes_neural_network'.


Mean Squared Error: 3836.61
Root Mean Squared Error: 61.94
Mean Absolute Error: 53.45
R² Score: 0.388
Mean Absolute Percentage Error: 53.53%
