In [31]:
from zenml import step, pipeline
from zenml.client import Client
import mlflow
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error,r2_score
from sklearn.datasets import load_diabetes
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd

In [32]:

experiment_tracker = Client().active_stack.experiment_tracker

In [33]:

@step
def ingest_data() -> pd.DataFrame:
    """Loads the diabetes dataset."""
    diabetes = load_diabetes()
    data = pd.DataFrame(diabetes.data, columns=diabetes.feature_names)
    data['target'] = diabetes.target
    return data

In [34]:

@step
def clean_data(data: pd.DataFrame) -> pd.DataFrame:
    """Cleans the data by dropping null values."""
    return data.dropna()

In [35]:

@step(experiment_tracker=experiment_tracker)
def train_model(data: pd.DataFrame) -> LinearRegression:
    """Trains a linear regression model with MLflow tracking."""
    mlflow.sklearn.autolog()
    
    X = data.drop('target', axis=1)
    y = data['target']
    X_train, _, y_train, _ = train_test_split(X, y, test_size=0.2, random_state=42)
    
    with mlflow.start_run():
        model = LinearRegression()
        model.fit(X_train, y_train)
    
    return model

In [36]:

@step(experiment_tracker=experiment_tracker)
def evaluate_model(model: LinearRegression, data: pd.DataFrame) -> None:
    """Evaluates the model and logs metrics to MLflow."""
    X = data.drop('target', axis=1)
    y = data['target']
    _, X_test, _, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    with mlflow.start_run():
        y_pred = model.predict(X_test)
        mse = mean_squared_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        
        mlflow.log_metric("mse", mse)
        mlflow.log_metric("r2", r2)

In [37]:

@pipeline
def Mlflow_pipeline():
    """Basic regression pipeline."""
    data = ingest_data()
    cleaned_data = clean_data(data)
    model = train_model(cleaned_data)
    evaluate_model(model, cleaned_data)

In [38]:

if __name__ == "__main__":
    Mlflow_pipeline()

[1;35mInitiating a new run for the pipeline: [0m[1;36mMlflow_pipeline[1;35m.[0m
[1;35mRegistered new pipeline: [0m[1;36mMlflow_pipeline[1;35m.[0m
[1;35mUsing user: [0m[1;36mdefault[1;35m[0m
[1;35mUsing stack: [0m[1;36mdefault[1;35m[0m
[1;35m  orchestrator: [0m[1;36mdefault[1;35m[0m
[1;35m  artifact_store: [0m[1;36mdefault[1;35m[0m
[1;35mYou can visualize your pipeline runs in the [0m[1;36mZenML Dashboard[1;35m. In order to try it locally, please run [0m[1;36mzenml login --local[1;35m.[0m
[1;35mUsing cached version of step [0m[1;36mingest_data[1;35m.[0m
[1;35mUsing cached version of step [0m[1;36mclean_data[1;35m.[0m
[1;35mUsing cached version of step [0m[1;36mtrain_model[1;35m.[0m
[1;35mUsing cached version of step [0m[1;36mevaluate_model[1;35m.[0m
[1;35mAll steps of the pipeline run were cached.[0m
