In [1]:
import mlflow
from mlflow.tracking import MlflowClient
client = MlflowClient(tracking_uri=mlflow.get_tracking_uri())

import pandas as pd
import seaborn as sns
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from typing import Tuple

### Defining paths and loading data

In [2]:
DATA_FOLDER = "../data/processed"
train_path = f"{DATA_FOLDER}/train.csv"
test_path = f"{DATA_FOLDER}/test.csv"

In [3]:
def load_data(path: str) -> pd.DataFrame:
    return pd.read_csv(path)

train_df = load_data(train_path)
test_df = load_data(test_path)

In [4]:
def extract_x_y(df: pd.DataFrame) -> Tuple[pd.DataFrame, np.ndarray]:
    y = df["age"].values
    X = df.drop(columns={"age"})
    return X, y

X_train, y_train = extract_x_y(train_df)
X_test, y_test = extract_x_y(test_df)

### Training Linear Regression model

In [5]:
def train_model(X_train: pd.DataFrame, y_train: np.ndarray) -> LinearRegression:
    lr = LinearRegression()
    lr.fit(X_train, y_train)
    return lr

model = train_model(X_train, y_train)

In [6]:
def predict_age(input_data: pd.DataFrame, model: LinearRegression):
    return model.predict(input_data)

def evaluate_model(y_true: np.ndarray, y_pred: np.ndarray):
    return mean_squared_error(y_true, y_pred)

prediction = predict_age(X_train, model)
train_mse = evaluate_model(y_train, prediction)
train_mse

4.7839456136066145

In [7]:
y_pred_test = predict_age(X_test, model)
test_mse = evaluate_model(y_test, y_pred_test)
test_mse

4.703226771601591

### MLFlow

In [8]:
# Set the experiment name
mlflow.set_experiment("abalone-prediction")

# Start a run
with mlflow.start_run() as run:
    run_id = run.info.run_id

    # Load data
    train_df, test_df = load_data(train_path), load_data(test_path)

    # Extract X and y
    X_train, y_train = extract_x_y(train_df)
    X_test, y_test = extract_x_y(test_df)

    # Train model
    model = train_model(X_train, y_train)

    # Evaluate model
    y_pred_train = model.predict(X_train)
    mse_train = evaluate_model(y_train, y_pred_train)

    # Evaluate model on test set
    y_pred_test = model.predict(X_test)
    mse_test = evaluate_model(y_test, y_pred_test)

    # Log your model
    mlflow.log_metric("Training MSE", mse_train)
    mlflow.log_metric("Test MSE", mse_test)

    # Register your model in mlfow model registry
    result = mlflow.register_model(f"runs:/{run_id}/models", "abalone_lr_model")

2023/10/23 11:52:01 INFO mlflow.tracking.fluent: Experiment with name 'abalone-prediction' does not exist. Creating a new experiment.


Successfully registered model 'abalone_lr_model'.
2023/10/23 11:52:01 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: abalone_lr_model, version 1
Created version '1' of model 'abalone_lr_model'.


In [9]:
client = MlflowClient()
client.transition_model_version_stage(
    name='abalone_lr_model',
    version=1,
    stage="Production"
) 

<ModelVersion: aliases=[], creation_timestamp=1698054721664, current_stage='Production', description=None, last_updated_timestamp=1698054721728, name='abalone_lr_model', run_id='883cebfd1cfc4a3c9c38e2e1fee8927f', run_link=None, source='file:///c:/Users/Joseph%20Moussa/Desktop/X/4A/Project/xhec-mlops-JIMAMS/notebooks/mlruns/450678049195762416/883cebfd1cfc4a3c9c38e2e1fee8927f/artifacts/models', status='READY', status_message=None, tags={}, user_id=None, version=1>