In [1]:
import pandas as pd
import numpy as np
import os
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, log_loss, roc_auc_score
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

import mlflow
from mlflow.models import infer_signature

from dotenv import load_dotenv

import yaml
import boto3
import pickle
import warnings
import datetime as dt

load_dotenv()
warnings.filterwarnings('ignore')

os.environ['AWS_ACCESS_KEY_ID'] = "admin" # –ª–æ–≥–∏–Ω s3
os.environ['AWS_SECRET_ACCESS_KEY'] = "23wesdxc" # –ø–∞—Ä–æ–ª—å s3
os.environ['MLFLOW_S3_ENDPOINT_URL'] = "http://localhost:9000" # –∞–¥—Ä–µ—Å s3
os.environ['MLFLOW_TRACKING_USERNAME'] = "admin" # –ª–æ–≥–∏–Ω MLFlow
os.environ['MLFLOW_TRACKING_PASSWORD'] = "23wesdxc" # –ø–∞—Ä–æ–ª—å MLFlow 
os.environ['MLFLOW_TRACKING_URI'] = "http://localhost:8080" # –∞–¥—Ä–µ—Å MLFlow 

# –ü–æ–¥–∫–ª—é—á–µ–Ω–∏–µ –∫ MLFlow
mlflow.set_tracking_uri(uri=os.getenv("MLFLOW_TRACKING_URI"))

# # –ü–æ–¥–∫–ª—é—á–µ–Ω–∏–µ –∫ s3
# session = boto3.session.Session()
# s3 = session.client(
#     service_name='s3',
#     endpoint_url=os.environ['MLFLOW_S3_ENDPOINT_URL']
# )

config = yaml.safe_load(open("./config.yaml"))

Data preprocess

In [2]:
dfs = pd.read_csv('data/apple_quality.csv')
dfs.dropna(inplace=True)
dfs.Quality = dfs.Quality.apply(lambda x: 1 if x == 'good' else 0)
dfs.rename(columns={'Quality':'Class'}, inplace=True)

df = dfs.copy()
df = df.sample(3000)
y = df['Class']
X = df.drop(columns='Class')

scaler = StandardScaler()
X = scaler.fit_transform(X)

params = config['train']

Tracking params

In [3]:
# –í—ã–±–∏—Ä–∞–µ–º —ç–∫—Å–ø–µ—Ä–∏–º–µ–Ω—Ç –≤ –∫–æ—Ç–æ—Ä–æ–º –±—É–¥–µ–º —Ä–∞–±–æ—Ç–∞—Ç—å
experiment_name = 'exp3'
bucket_name = 'mlflow-bucket' # –ò–º—è –±–∞–∫–µ—Ç–∞, –∫—É–¥–∞ –±—É–¥—É—Ç —Å–∫–ª–∞–¥–∏–≤–∞—Ç—å—Å—è –∞—Ä—Ç–µ—Ñ–∞–∫—Ç—ã

mlflow.set_experiment(
    experiment_name=experiment_name
)

# –ü–∞—Ä–∞–º–µ—Ç—Ä—ã –≤–µ—Ä—Å–∏–æ–Ω–∏—Ä–æ–≤–∞–Ω–∏—è
registered_model_name = experiment_name + "_model"
MODEL_DESCR = 'some desc'
TRAINING_INFO = "Basic LR model for apples data"
USER_NAME = os.getenv('MLFLOW_TRACKING_USERNAME')
CURRENT_FILENAME = 'tracker.ipynb'
TAGS = {
    "Training Info": TRAINING_INFO,
    'mlflow.user': USER_NAME,
    'mlflow.source.name': CURRENT_FILENAME

}

2025/01/23 15:46:10 INFO mlflow.tracking.fluent: Experiment with name 'exp3' does not exist. Creating a new experiment.


In [4]:
# –ó–∞–ø—É—Å–∫ –æ–±—É—á–µ–Ω–∏—è
with mlflow.start_run(
        # experiment_id=1,
        # run_name = "exp_run", # –ï—Å–ª–∏ –Ω–µ —É–∫–∞–∑—ã–≤–∞—Ç—å, —Ç–æ –±—É–¥–µ—Ç —Å–≥–µ–Ω–µ—Ä–∏—Ä–æ–≤–∞–Ω–æ —Ä–∞–Ω–¥–æ–º–Ω–æ–µ –∏–º—è
        tags=TAGS,
        description='Some description',
        log_system_metrics=True
    ) as run:
    try:
        # –û–±—É—á–µ–Ω–∏–µ –º–æ–¥–µ–ª–∏
        log_regr = LogisticRegression(**params)
        log_regr.fit(X, y)
        y_pred_proba = log_regr.predict_proba(X)[:,1]
        y_pred = log_regr.predict(X)

        # –õ–æ–≥–∏—Ä—É–µ–º –≥–∏–ø–µ—Ä–ø–∞—Ä–∞–º–µ—Ç—Ä—ã
        mlflow.log_params(params)

        # –õ–æ–≥–∏—Ä—É–µ–º –º–µ—Ç—Ä–∏–∫–∏ –º–æ–¥–µ–ª–∏
        mlflow.log_metric("accuracy", accuracy_score(df['Class'], y_pred))
        mlflow.log_metric("accuracy_2", accuracy_score(df['Class'], y_pred))
        mlflow.log_metric("logloss", log_loss(df['Class'], y_pred_proba))
        mlflow.log_metric("rocauc", roc_auc_score(df['Class'], y_pred_proba))

        # –ü—É—à–∏–º –¥–∞–Ω–Ω—ã–µ –≤ –±–∞–∫–µ—Ç
        model_info = mlflow.sklearn.log_model(
            sk_model=log_regr,
            artifact_path="model", # –æ—Å—Ç–∞–≤–ª—è–µ–º —Ç–∞–∫, —á—Ç–æ–±—ã —Å–æ—Ö—Ä–∞–Ω—è–ª–∞—Å—å –º–æ–¥–µ–ª—å –≤ –±–∞–∫–µ—Ç–µ –≤ –ø–∞–ø–∫—É 'model'
            signature=infer_signature(X, log_regr.predict(X)),
            input_example=X,
            registered_model_name=registered_model_name
        )

        # –§–∏–∫—Å–∏—Ä—É–µ–º —á—Ç–æ –º–æ–¥–µ–ª—å –ø—Ä–∏–Ω–∏–º–∞–µ—Ç –Ω–∞ –≤—Ö–æ–¥ –∏ –≤—ã–¥–∞–µ—Ç –Ω–∞ –≤—ã—Ö–æ–¥–µ
        mlflow.log_input(
            mlflow.data.from_pandas(df, source="where_you_took_the_data"),
            context='training'
        )

        # –ü–æ–ª—É—á–∞–µ–º –∏–Ω—Ñ–æ –æ –º–æ–¥–µ–ª–∏
        model_info = mlflow.models.get_model_info(mlflow.get_artifact_uri() + "/model")

        print(f"Model has been saved to: '{model_info.model_uri}'")
    except Exception as e:
        print(e)
    finally:
        mlflow.end_run()

2025/01/23 15:46:12 INFO mlflow.system_metrics.system_metrics_monitor: Started monitoring system metrics.
Successfully registered model 'exp3_model'.
2025/01/23 15:46:16 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: exp3_model, version 1
Created version '1' of model 'exp3_model'.


Model has been saved to: 's3://mlflow-bucket/1/cab99e052c9c4f15b4beec5521880641/artifacts/model'
üèÉ View run delightful-jay-109 at: http://localhost:8080/#/experiments/1/runs/cab99e052c9c4f15b4beec5521880641
üß™ View experiment at: http://localhost:8080/#/experiments/1


2025/01/23 15:46:17 INFO mlflow.system_metrics.system_metrics_monitor: Stopping system metrics monitoring...
2025/01/23 15:46:17 INFO mlflow.system_metrics.system_metrics_monitor: Successfully terminated system metrics monitoring!


In [5]:
# –ü–æ–∫–∞–∑–∞—Ç—å –∑–∞–ø—É—Å–∫–∏
mlflow.search_runs(
    experiment_names=[experiment_name]
)

Unnamed: 0,run_id,experiment_id,status,artifact_uri,start_time,end_time,metrics.logloss,metrics.accuracy_2,metrics.rocauc,metrics.accuracy,params.solver,params.max_iter,tags.Training Info,tags.mlflow.note.content,tags.mlflow.user,tags.mlflow.log-model.history,tags.mlflow.source.name,tags.mlflow.source.type,tags.mlflow.runName
0,cab99e052c9c4f15b4beec5521880641,1,FINISHED,s3://mlflow-bucket/1/cab99e052c9c4f15b4beec552...,2025-01-23 12:46:12.302000+00:00,2025-01-23 12:46:17.013000+00:00,0.506705,0.754333,0.829947,0.754333,lbfgs,1000,Basic LR model for apples data,Some description,admin,"[{""run_id"": ""cab99e052c9c4f15b4beec5521880641""...",tracker.ipynb,LOCAL,delightful-jay-109


In [18]:
# # # –ò–Ω—Ñ–æ –ø–æ –∫–æ–Ω–∫–µ—Ä—Ç–Ω–æ–º—É id –∑–∞–ø—É—Å–∫–∞
# mlflow.get_run('68e58f39743a4d899ec475617f083dae')