In [1]:
import pandas as pd
import mlflow

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [2]:
mlflow.set_tracking_uri("sqlite:///mlflow.db")
mlflow.set_experiment("churn-experiment")

%load_ext lab_black

In [3]:
path = "../data/preprocessed.parquet"

In [4]:
df = pd.read_parquet(path)

In [5]:
X = df.drop("Churn", axis=1)
y = df["Churn"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [6]:
with mlflow.start_run():

    mlflow.log_param("data-path", path)

    max_iter = 500
    mlflow.log_param("max_iter", max_iter)

    lr = LogisticRegression(max_iter=max_iter)
    lr.fit(X_train, y_train)

    mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path="sklearn-model",
        registered_model_name="sklearn-logreg-model",
    )

    train_score = lr.score(X_train, y_train)

    y_pred = lr.predict(X_test)

    mlflow.log_metric("accuracy_score", accuracy_score(y_test, y_pred))
    mlflow.log_metric("precision_score", precision_score(y_test, y_pred))
    mlflow.log_metric("recall_score", recall_score(y_test, y_pred))
    mlflow.log_metric("f1_score", f1_score(y_test, y_pred, average="weighted"))

Registered model 'sklearn-logreg-model' already exists. Creating a new version of this model...
2023/01/26 23:38:04 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation.                     Model name: sklearn-logreg-model, version 2
Created version '2' of model 'sklearn-logreg-model'.
