In [2]:
import os
import mlflow
import psycopg2 as psycopg
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.metrics import roc_auc_score, precision_score, recall_score, f1_score

In [4]:

# Настройка MLflow
EXPERIMENT_NAME = "churn_ivan_panchenko"
RUN_NAME = "model_0_registry"
REGISTRY_MODEL_NAME = "churn_model_ivan_panchenko"
TRACKING_SERVER_HOST = "127.0.0.1"
TRACKING_SERVER_PORT = 5000


# Настройка окружения
os.environ["MLFLOW_S3_ENDPOINT_URL"] = "https://storage.yandexcloud.net"
os.environ["AWS_ACCESS_KEY_ID"] = os.getenv("AWS_ACCESS_KEY_ID")
os.environ["AWS_SECRET_ACCESS_KEY"] = os.getenv("AWS_SECRET_ACCESS_KEY")

mlflow.set_tracking_uri(f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}")
mlflow.set_registry_uri(f"http://{TRACKING_SERVER_HOST}:{TRACKING_SERVER_PORT}")


In [5]:

# Подключение к базе данных и получение данных
connection = {"sslmode": "require", "target_session_attrs": "read-write"}
postgres_credentials = {
    "host": os.getenv('DB_DESTINATION_HOST'),
    "port": os.getenv('DB_DESTINATION_PORT'),
    "dbname": os.getenv('DB_DESTINATION_NAME'),
    "user": os.getenv('DB_DESTINATION_USER'),
    "password": os.getenv('DB_DESTINATION_PASSWORD')
}
assert all([var_value != "" for var_value in list(postgres_credentials.values())])

connection.update(postgres_credentials)

TABLE_NAME = "users_churn"

with psycopg.connect(**connection) as conn:
    with conn.cursor() as cur:
        cur.execute(f"SELECT * FROM {TABLE_NAME}")
        data = cur.fetchall()
        columns = [col[0] for col in cur.description]

df = pd.DataFrame(data, columns=columns)


In [6]:

# Подготовка данных
X = df.drop(['target', 'customer_id', 'begin_date', 'end_date'], axis=1)
y = df['target']

X = pd.get_dummies(X, drop_first=True)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

imputer = SimpleImputer(strategy='mean')
X_train_imputed = imputer.fit_transform(X_train)
X_test_imputed = imputer.transform(X_test)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_imputed)
X_test_scaled = scaler.transform(X_test_imputed)

# Обучение модели
model = LogisticRegression(random_state=42)
model.fit(X_train_scaled, y_train)

# Получение предсказаний
prediction = model.predict(X_test_scaled)
proba = model.predict_proba(X_test_scaled)[:, 1]


In [7]:
# Расчет метрик
roc_auc = roc_auc_score(y_test, proba)
precision = precision_score(y_test, prediction)
recall = recall_score(y_test, prediction)
f1 = f1_score(y_test, prediction)


In [8]:
# Подготовка артефактов для MLflow
pip_requirements = [f"scikit-learn=={sklearn.__version__}", f"pandas=={pd.__version__}"]
signature = mlflow.models.infer_signature(X_test_scaled, prediction)
input_example = X_test_scaled[:5]
metadata = {"description": "Logistic Regression model for churn prediction"}


In [11]:
# Логирование модели и метрик в MLflow
experiment_id = mlflow.get_experiment_by_name(EXPERIMENT_NAME).experiment_id

In [12]:
experiment_id

'11'

In [13]:

with mlflow.start_run(run_name=RUN_NAME, experiment_id=experiment_id) as run:
    run_id = run.info.run_id
    
    # Логирование метрик
    mlflow.log_metric("roc_auc", roc_auc)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.log_metric("f1", f1)
    
    # Логирование модели
    mlflow.sklearn.log_model(
        sk_model=model,
        artifact_path="model",
        pip_requirements=pip_requirements,
        signature=signature,
        input_example=input_example,
        registered_model_name=REGISTRY_MODEL_NAME,
        metadata=metadata
    )

print(f"Model logged with run_id: {run_id}")

# Получение версии модели
client = mlflow.tracking.MlflowClient()
model_version = client.get_latest_versions(REGISTRY_MODEL_NAME, stages=["None"])[0].version

print(f"Model version: {model_version}")

Registered model 'churn_model_ivan_panchenko' already exists. Creating a new version of this model...
2025/08/05 13:04:46 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: churn_model_ivan_panchenko, version 2


Model logged with run_id: f576840ade3245bbba26102b0f2cbf25
Model version: 2


Created version '2' of model 'churn_model_ivan_panchenko'.


In [14]:
# Получение последней версии модели
client = mlflow.tracking.MlflowClient()
latest_version = client.get_latest_versions(REGISTRY_MODEL_NAME, stages=["None"])[0].version

# Переход модели в стадию "Production"
client.transition_model_version_stage(
    name=REGISTRY_MODEL_NAME,
    version=latest_version,
    stage="Production"
)

print(f"Model {REGISTRY_MODEL_NAME} version {latest_version} transitioned to Production stage")

Model churn_model_ivan_panchenko version 2 transitioned to Production stage


Должно быть как-то так:

Registered model **'{your_model_name}'** already exists. Creating a new version of this model...
2023/10/19 17:34:03 INFO mlflow.tracking._model_registry.client: Waiting up to 60 seconds for model version to finish creation. Model name: **'{your_model_name}'**, version 2
Created version '2' of model **'{your_model_name}'.** 