# MLflow v2.16 Data Setup for Migration Testing

In [None]:
# Install dependencies

!pip3 install mlflow==2.16.2
!pip3 install sagemaker-mlflow==0.2.0

In [None]:
# import modules

import mlflow
from mlflow.models import infer_signature
from sklearn.datasets import make_regression
from sklearn.ensemble import RandomForestRegressor

import numpy as np
from sklearn.linear_model import LinearRegression
import mlflow
from mlflow import MlflowClient

In [None]:
# Configuration - Replace with your actual values
tracking_server_name = '<your-tracking-server-name>'
tracking_server_arn = "<your-tracking-server-arn>"
registered_model_name = "<your-model-name>"
mlflow.set_tracking_uri(tracking_server_arn)

## MLflow v2.16 Model Training and Logging Functions

In [None]:
@mlflow.trace
def train_and_log_linear_regression_model(experiment_name):

    mlflow.set_experiment(experiment_name)
    # enable autologging
    mlflow.sklearn.autolog(registered_model_name=registered_model_name)
    
    with mlflow.start_span(name="data_preparation") as span:
        # prepare training data
        X = np.array([[1, 1], [1, 2], [2, 2], [2, 3]])
        y = np.dot(X, np.array([1, 2])) + 3
        span.set_inputs({"X_shape": X.shape, "y_shape": y.shape})
        span.set_outputs({"data_ready": True})
    
    with mlflow.start_span(name="model_training") as span:
        model = LinearRegression()
        with mlflow.start_run() as run:
            # Log additional data
            mlflow.log_param("data_shape", X.shape)
            mlflow.log_param("model_type", "LinearRegression")
            mlflow.log_metric("training_samples", len(X))
            
            model.fit(X, y)
            span.set_inputs({"model": "LinearRegression", "training_data": X.shape})
            span.set_outputs({"model_trained": True})
            
            with mlflow.start_span(name="model_evaluation") as eval_span:
                # Log predictions and score
                predictions = model.predict(X)
                score = model.score(X, y)
                mlflow.log_metric("r2_score", score)
                mlflow.log_metric("mean_prediction", np.mean(predictions))
                
                # Log model coefficients
                mlflow.log_param("coefficients", model.coef_.tolist())
                mlflow.log_param("intercept", model.intercept_)
                
                eval_span.set_inputs({"test_data": X.shape})
                eval_span.set_outputs({"r2_score": score, "predictions": predictions.tolist()})

In [None]:
def create_sample_experiments():
    exps = [f"linear-regression-baseline-{tracking_server_name}", f"model-performance-analysis-{tracking_server_name}", f"sklearn-migration-test-{tracking_server_name}", 
            f"data-validation-experiment-{tracking_server_name}", f"regression-model-comparison-{tracking_server_name}"]
    
    for exp in exps:
        train_and_log_linear_regression_model(exp)

In [None]:
## Create Sample Experiments for Migration Testing

create_sample_experiments()