In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

import mlflow
from mlflow.models import infer_signature

from dotenv import load_dotenv

import yaml
import boto3
import pickle
import warnings
import datetime as dt

load_dotenv()
warnings.filterwarnings('ignore')

os.environ['AWS_ACCESS_KEY_ID'] = "admin" # логин s3
os.environ['AWS_SECRET_ACCESS_KEY'] = "23wesdxc" # пароль s3
os.environ['MLFLOW_S3_ENDPOINT_URL'] = "http://localhost:9000" # адрес s3
os.environ['MLFLOW_TRACKING_USERNAME'] = "admin" # логин MLFlow
os.environ['MLFLOW_TRACKING_PASSWORD'] = "23wesdxc" # пароль MLFlow 
os.environ['MLFLOW_TRACKING_URI'] = "http://localhost:8080" # адрес MLFlow 

# Подключение к MLFlow
mlflow.set_tracking_uri(uri=os.getenv("MLFLOW_TRACKING_URI"))

# # Подключение к s3
# session = boto3.session.Session()
# s3 = session.client(
#     service_name='s3',
#     endpoint_url=os.environ['MLFLOW_S3_ENDPOINT_URL']
# )

config = yaml.safe_load(open("./config.yaml"))

Data preprocess

In [2]:
dfs = pd.read_csv('data/apple_quality.csv')
dfs.dropna(inplace=True)
dfs.Quality = dfs.Quality.apply(lambda x: 1 if x == 'good' else 0)
dfs.rename(columns={'Quality':'Class'}, inplace=True)

df = dfs.copy()
df = df.sample(3000)
y = df['Class']
X = df.drop(columns='Class')

scaler = StandardScaler()
X = scaler.fit_transform(X)

params = config['train']

Tracking params

In [3]:
# Выбираем эксперимент в котором будем работать
experiment_name = 'exp3'
bucket_name = 'mlflow-bucket' # Имя бакета, куда будут складиваться артефакты

mlflow.set_experiment(
    experiment_name=experiment_name
)

# Параметры версионирования
registered_model_name = experiment_name + "_model"
MODEL_DESCR = 'some desc'
TRAINING_INFO = "Basic LR model for apples data"
USER_NAME = os.getenv('MLFLOW_TRACKING_USERNAME')
CURRENT_FILENAME = 'tracker.ipynb'
TAGS = {
    "Training Info": TRAINING_INFO,
    'mlflow.user': USER_NAME,
    'mlflow.source.name': CURRENT_FILENAME

}

2025/01/23 15:46:10 INFO mlflow.tracking.fluent: Experiment with name 'exp3' does not exist. Creating a new experiment.


In [4]:
# Запуск обучения
with mlflow.start_run(
        # experiment_id=1,
        # run_name = "exp_run", # Если не указывать, то будет сгенерировано рандомное имя
        tags=TAGS,
        description='Some description',
        log_system_metrics=True
    ) as run:
    try:
        # Обучение модели
        log_regr = LogisticRegression(**params)
        log_regr.fit(X, y)
        y_pred_proba = log_regr.predict_proba(X)[:,1]
        y_pred = log_regr.predict(X)

        # Логируем гиперпараметры
        mlflow.log_params(params)

        # Логируем метрики модели
        mlflow.log_metric("accuracy", accuracy_score(df['Class'], y_pred))
        mlflow.log_metric("accuracy_2", accuracy_score(df['Class'], y_pred))
        mlflow.log_metric("logloss", log_loss(df['Class'], y_pred_proba))
        mlflow.log_metric("rocauc", roc_auc_score(df['Class'], y_pred_proba))

        # Пушим данные в бакет
        model_info = mlflow.sklearn.log_model(
            sk_model=log_regr,
            artifact_path="model", # оставляем так, чтобы сохранялась модель в бакете в папку 'model'
            signature=infer_signature(X, log_regr.predict(X)),
            input_example=X,
            registered_model_name=registered_model_name
        )

        # Фиксируем что модель принимает на вход и выдает на выходе
        mlflow.log_input(
            mlflow.data.from_pandas(df, source="where_you_took_the_data"),
            context='training'
        )

        # Получаем инфо о модели
        model_info = mlflow.models.get_model_info(mlflow.get_artifact_uri() + "/model")

        print(f"Model has been saved to: '{model_info.model_uri}'")
    except Exception as e:
        print(e)
    finally:
        mlflow.end_run()

2025/01/23 15:46:12 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
Successfully registered model 'exp3_model'.
2025/01/23 15:46:16 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: exp3_model, version 1
Created version '1' of model 'exp3_model'.


Model has been saved to: 's3://mlflow-bucket/1/cab99e052c9c4f15b4beec5521880641/artifacts/model'
🏃 View run delightful-jay-109 at: http://localhost:8080/#/experiments/1/runs/cab99e052c9c4f15b4beec5521880641
🧪 View experiment at: http://localhost:8080/#/experiments/1


2025/01/23 15:46:17 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/01/23 15:46:17 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


In [5]:
# Показать запуски
mlflow.search_runs(
    experiment_names=[experiment_name]
)

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.logloss,metrics.accuracy_2,metrics.rocauc,metrics.accuracy,params.solver,params.max_iter,tags.Training Info,tags.mlflow.note.content,tags.mlflow.user,tags.mlflow.log-model.history,tags.mlflow.source.name,tags.mlflow.source.type,tags.mlflow.runName
0,cab99e052c9c4f15b4beec5521880641,1,FINISHED,s3://mlflow-bucket/1/cab99e052c9c4f15b4beec552...,2025-01-23 12:46:12.302000+00:00,2025-01-23 12:46:17.013000+00:00,0.506705,0.754333,0.829947,0.754333,lbfgs,1000,Basic LR model for apples data,Some description,admin,"[{""run_id"": ""cab99e052c9c4f15b4beec5521880641""...",tracker.ipynb,LOCAL,delightful-jay-109


In [18]:
# # # Инфо по конкертному id запуска
# mlflow.get_run('68e58f39743a4d899ec475617f083dae')