## 1. ML flow settings

## 2. Model Load

In [1]:
# 라이브러리 import
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import mlflow
import mlflow.sklearn

In [4]:
iris = load_iris() # 꽃 받침과 꽃 잎 시리즈를 가지고 꽃의 종류를 결정

X = iris.data
y = iris.target

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 학습 데이터와 테스트 데이터로 분리 => train_test_split()
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=123)

In [9]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

model = LogisticRegression(max_iter=0)
model.fit(X_train, y_train) # train=모의고사 # 학습을 시킬 때는 학습 데이터만 제공

y_pred = model.predict(X_test)   # 수능 문제를 제공

accuracy = accuracy_score(y_test, y_pred)

print(f"정확도 : {accuracy * 100}")

정확도 : 96.66666666666667


### 모델 학습과 모델 성능

- 심플하게 모든 것을 ML flow에게 맡긴다. => mlflow.autolog()
- autolog에서 추적하지 못하는 다른 파라미터, 메트릭, 메타데이터 등등의 값을 수동으로 기록

In [12]:
mlflow.set_tracking_uri("http://127.0.0.1:5000")
print("Tracking URI : ", mlflow.get_tracking_uri())

Tracking URI :  http://127.0.0.1:5000


In [13]:
exp = mlflow.set_experiment(experiment_name='iris_classification_experiment')
print(f"Name: {exp.name}")
print(f"ID: {exp.experiment_id}")
print(f"Location: {exp.artifact_location}")
print(f"Tags: {exp.tags}")
print(f"Lifecycle: {exp.lifecycle_stage}")
print(f"Create Timestamp: {exp.creation_time}")

2024/08/14 14:32:15 INFO mlflow.tracking.fluent: Experiment with name 'iris_classification_experiment' does not exist. Creating a new experiment.


Name: iris_classification_experiment
ID: 622491102150390550
Location: mlflow-artifacts:/622491102150390550
Tags: {}
Lifecycle: active
Create Timestamp: 1723613535583


In [15]:
import mlflow.sklearn
mlflow.autolog()

# with, end 구문을 붙이지 않아도 알아서 실험 종료가 됨.
with mlflow.start_run():  # 실험 시작
    model = LogisticRegression(max_iter=200)
    model.fit(X_train, y_train) # train=모의고사 # 학습을 시킬 때는 학습 데이터만 제공

    y_pred = model.predict(X_test)   # 수능 문제를 제공

    accuracy = accuracy_score(y_test, y_pred)

    print(f"정확도 : {accuracy * 100}")

2024/08/14 14:37:22 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2024/08/14 14:37:23 INFO mlflow.tracking._tracking_service.client: 🏃 View run adaptable-snake-806 at: http://127.0.0.1:5000/#/experiments/622491102150390550/runs/69adb9faa565433d89de33bb9d04e2ab.
2024/08/14 14:37:23 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/622491102150390550.


정확도 : 96.66666666666667


In [19]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

models = {
    "LogisticRegression" : LogisticRegression(
        max_iter=200,   # 최대 반복 횟수
        C=1.0, # 규제 강도(C값이 작을수록 규제가 강해짐)
        solver='lbfgs', # 최적화 알고리즘
        random_state=123
    ),
    "RandomForest" : RandomForestClassifier(
        n_estimators=100,   # 트리의 갯수
        max_depth=None,
        random_state=123
    ),
    "SVC" : SVC(
        kernel='linear',   # linear, sigmoid, poly, rbf
        random_state=123
    )
}

In [27]:
# 위 모델들을 한 번에 불러와서(반복문) => 최고의 모델을 찾아내고, 해당 파라미터를 기록합니다.

mlflow.autolog()

best_accuracy = 0
best_model = None
best_model_name = None

with mlflow.start_run(nested=True):
    for model_name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)

        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model_name = model_name
            best_model = model

        print(f"Model Name: {model_name}, Accuracy: {accuracy}")

        mlflow.log_param('best_model', best_model_name)  # 파라미터 로그
        mlflow.log_metric('best_accuracy', best_accuracy) # 메트릭 로그

    print(f"Best Model Name: {best_model_name}, Best Accuracy: {best_accuracy}")

2024/08/14 15:24:13 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


Model Name: LogisticRegression, Accuracy: 0.9666666666666667
Model Name: RandomForest, Accuracy: 0.9333333333333333


2024/08/14 15:25:18 INFO mlflow.tracking._tracking_service.client: 🏃 View run beautiful-mink-571 at: http://127.0.0.1:5000/#/experiments/622491102150390550/runs/761b56e9de0b4f46bc3bbed69a84abc0.
2024/08/14 15:25:18 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/622491102150390550.


Model Name: SVC, Accuracy: 0.9333333333333333
Best Model Name: LogisticRegression, Best Accuracy: 0.9666666666666667


In [32]:
mlflow.autolog()
# 전체 모델에 대해서 기록을 하고 싶은데?

for model_name, model in models.items():
    with mlflow.start_run(run_name=model_name, nested=True):
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)

        # 모델을 mlflow에 저장
        model_path = f"{model_name}_model"
        mlflow.sklearn.log_model(model, model_path)  # 모델을 artifact 디렉토리에 저장

        mlflow.log_param(f'{model_name}_param', model.get_params())  # 파라미터 로그
        mlflow.log_metric(f'{model_name}_accuracy', accuracy) # 메트릭 로그

    print(f"Model Name: {model_name}, Accuracy: {accuracy}")

2024/08/14 15:39:43 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2024/08/14 15:39:46 INFO mlflow.tracking._tracking_service.client: 🏃 View run LogisticRegression at: http://127.0.0.1:5000/#/experiments/622491102150390550/runs/cfbde3b3c10243d481f1eba7bc42a896.
2024/08/14 15:39:46 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/622491102150390550.


Model Name: LogisticRegression, Accuracy: 0.9666666666666667


2024/08/14 15:39:48 INFO mlflow.tracking._tracking_service.client: 🏃 View run RandomForest at: http://127.0.0.1:5000/#/experiments/622491102150390550/runs/88b24c1652da4f8ca9f3f102e70e4391.
2024/08/14 15:39:48 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/622491102150390550.


Model Name: RandomForest, Accuracy: 0.9333333333333333


2024/08/14 15:39:50 INFO mlflow.tracking._tracking_service.client: 🏃 View run SVC at: http://127.0.0.1:5000/#/experiments/622491102150390550/runs/94cd09103ece4358b47708fae5150de3.
2024/08/14 15:39:50 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/622491102150390550.


Model Name: SVC, Accuracy: 0.9333333333333333


In [35]:
# 모델 관리
from mlflow.tracking import MlflowClient

client = MlflowClient()

# 모델을 등록하고, 해당 모델의 버전을 반환
def register_model(model_name, run_id, model_uri='model'):         # 모델 등록
    model_uri = f"runs:/{run_id}/{model_uri}"
    model_version = mlflow.register_model(model_uri, model_name)
    return model_version

# 등록된 모델을 stage 단계로 승격
def promote_to_staging(model_name, run_id, model_uri):      # stage
    model_version = register_model(model_name, run_id, model_uri)

    client.set_model_version_tag(
        name=model_name,
        version=model_version.version,
        key='stage',
        value='staging'
    )
    print(f"Model: {model_name}, version: {model_version} promoted to Production...")

def promote_to_production(model_name, version):   # production
    client.set_model_version_tag(
        name=model_name,
        version=version,
        key='stage',
        value='production'
    )

    print(f"Model: {model_name}, version: {version} promoted to Production...")

def archive_model(model_name, version):           # archive: 모델 폐기 단계
    client.set_model_version_tag(
        name=model_name,
        version=version,
        key='stage',
        value='archived'
    )

    print(f"Model: {model_name}, version: {version} Archived ...")

In [34]:
# http://127.0.0.1:5000/#/experiments/622491102150390550/runs/cfbde3b3c10243d481f1eba7bc42a896
# 실험 ID: 622491102150390550
# 실행 ID: cfbde3b3c10243d481f1eba7bc42a896
# Model Name: LogisticRegression

# (1) 모델 등록
run_id = 'cfbde3b3c10243d481f1eba7bc42a896'
model_name = 'LogisticRegression'

model_version = register_model(model_name, run_id)
print(model_version)

Successfully registered model 'LogisticRegression'.
2024/08/14 16:41:41 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: LogisticRegression, version 1


<ModelVersion: aliases=[], creation_timestamp=1723621301559, current_stage='None', description='', last_updated_timestamp=1723621301559, name='LogisticRegression', run_id='cfbde3b3c10243d481f1eba7bc42a896', run_link='', source='mlflow-artifacts:/622491102150390550/cfbde3b3c10243d481f1eba7bc42a896/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='1'>


Created version '1' of model 'LogisticRegression'.


In [38]:
# (2) 모델을 staging 단계로 승격
promote_to_staging(model_name, run_id, 'model')

Registered model 'LogisticRegression' already exists. Creating a new version of this model...
2024/08/14 16:49:31 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: LogisticRegression, version 3


Model: LogisticRegression, version: <ModelVersion: aliases=[], creation_timestamp=1723621771621, current_stage='None', description='', last_updated_timestamp=1723621771621, name='LogisticRegression', run_id='cfbde3b3c10243d481f1eba7bc42a896', run_link='', source='mlflow-artifacts:/622491102150390550/cfbde3b3c10243d481f1eba7bc42a896/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='3'> promoted to Production...


Created version '3' of model 'LogisticRegression'.


In [37]:
# (3) 모델을 Production 단계로 승격
promote_to_production(model_name, '2')

Model: LogisticRegression, version: 2 promoted to Production...


In [39]:
# (4) 새로운 버전의 모델을 Production으로 승격시키고, 기존의 Production 버전은 Archived

promote_to_production(model_name, '3') # 3 staging -> production
archive_model(model_name, '2') # production -> archive

Model: LogisticRegression, version: 3 promoted to Production...
Model: LogisticRegression, version: 2 Archived ...


### 모델 Serving

- FastAPI, Flask ... => API로 언제 만들지....?
- mlflow가 해결을 해준다.
- inference: 값을 전달하고, 그 값에 대한 예측값을 return(API)

In [42]:
#  PM 결과를 보여줘야 되는 경우 있음. 성능 어때요? 결과 어때요? - PM은 모름 (눈으로 보여줘야됨)
# (1) Model Load
model_name = 'LogisticRegression'
model_version = 3

model_uri = f'models:/{model_name}/{model_version}'

loaded_model = mlflow.pyfunc.load_model(model_uri)

test_input = X_test[:10]
loaded_model.predict(test_input)



array([1, 2, 2, 1, 0, 2, 1, 0, 0, 1])

### Model API Serving
- 서버가 하나 더 필요합니다. REST API
- mlflow 설치 할 때 flask => API를 내려줄 flask 서버를 하나 더 띄워줘야 한다.

http://127.0.0.1:5000/#/experiments/622491102150390550/runs/cfbde3b3c10243d481f1eba7bc42a896

mlflow models serve -m ./mlartifacts/622491102150390550/cfbde3b3c10243d481f1eba7bc42a896/artifacts/model -p 5001 --no-conda

In [45]:
import pandas as pd

X_test_df = pd.DataFrame(X_test, columns=iris.feature_names)

data = {
    'dataframe_split' : X_test_df[:10].to_dict(orient="split")
}   # data type: dict (json: X) -> json

url = "http://127.0.0.1:5001/invocations"

headers = {"Content-Type":"application/json"}

import requests
import json

res = requests.post(url, headers=headers, data=json.dumps(data))

print("Server Response(inference) : ", res.json())

Server Response(inference) :  {'predictions': [1, 2, 2, 1, 0, 2, 1, 0, 0, 1]}
