## 1. ML flow settings

- pip install mlflow
- mlflow ui

## 2. Model Load

In [1]:
# 라이브러리 import
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

import mlflow
import mlflow.sklearn

In [2]:
iris = load_iris() # 꽃 받침과 꽃 잎 사이즈를 가지고 꽃의 종류를 결정

X = iris.data
y = iris.target

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 학습 데이터와 테스트 데이터로 분리 => train_test_split()
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=123)

In [3]:
from sklearn.linear_model import LogisticRegression

LogisticRegression?

[0;31mInit signature:[0m
[0mLogisticRegression[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mpenalty[0m[0;34m=[0m[0;34m'l2'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0;34m*[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdual[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtol[0m[0;34m=[0m[0;36m0.0001[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mC[0m[0;34m=[0m[0;36m1.0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mfit_intercept[0m[0;34m=[0m[0;32mTrue[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mintercept_scaling[0m[0;34m=[0m[0;36m1[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mclass_weight[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mrandom_state[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0msolver[0m[0;34m=[0m[0;34m'lbfgs'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmax_iter[0m[0;34m=[0m[0;36m100[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmulti_class[0m[0;34m=[0m[

In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

model = LogisticRegression(max_iter=0)
model.fit(X_train, y_train) # train=모의고사 # 학습을 시킬 때는 학습 데이터만 제공

y_pred = model.predict(X_test) # 수능 문제를 제공
accuracy = accuracy_score(y_test, y_pred)

print(f"정확도 : {accuracy * 100}")

정확도 : 93.33333333333333


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


### 모델 학습과 모델 성능

- 심플하게 모든 것을 ML flow에게 맡긴다. => mlflow.autolog()
- autolog에서 추적하지 못하는 다른 파라미터, 메트릭, 메타데이터 등의 값을 수동으로 기록

In [5]:
mlflow.set_tracking_uri("http://127.0.0.1:5000")
print("Tracking URI", mlflow.get_tracking_uri())

Tracking URI http://127.0.0.1:5000


In [6]:
exp = mlflow.set_experiment(experiment_name='iris_classification_experiment')

print(f"Name: {exp.name}") # option + shift + 화살표 = copy
print(f"ID: {exp.experiment_id}")
print(f"Location: {exp.artifact_location}")
print(f"Tags: {exp.tags}")
print(f"Lifecycle: {exp.lifecycle_stage}")
print(f"Create Timestamp: {exp.creation_time}")


Name: iris_classification_experiment
ID: 471758380148410695
Location: mlflow-artifacts:/471758380148410695
Tags: {}
Lifecycle: active
Create Timestamp: 1723613298348


In [7]:
import time

time.time()

1723625878.5851882

In [8]:
import mlflow.sklearn

mlflow.autolog()

mlflow.start_run() # 실험 시작
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train) # 학습 시킬 때는 학습 데이터만 제공

y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print(f"accuracy: {accuracy * 100}")

mlflow.end_run() # 실험 종료

2024/08/14 17:57:58 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2024/08/14 17:58:00 INFO mlflow.tracking._tracking_service.client: 🏃 View run abrasive-snake-442 at: http://127.0.0.1:5000/#/experiments/471758380148410695/runs/2aa88d9534004c08a27bec3df522ba66.
2024/08/14 17:58:00 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/471758380148410695.


accuracy: 96.66666666666667


In [9]:
import mlflow.sklearn

mlflow.autolog()

# with, end 구문을 붙이지 않아도 알아서 실험 종료가 됩니다.
with mlflow.start_run(nested=True): # 실험 시작
    model = LogisticRegression(max_iter=200)
    model.fit(X_train, y_train) # 학습 시킬 때는 학습 데이터만 제공

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    print(f"accuracy: {accuracy * 100}")

2024/08/14 17:58:00 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2024/08/14 17:58:02 INFO mlflow.tracking._tracking_service.client: 🏃 View run upset-skink-597 at: http://127.0.0.1:5000/#/experiments/471758380148410695/runs/3d7ab2aa8d7e456688dcbefa3dff30d1.
2024/08/14 17:58:02 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/471758380148410695.


accuracy: 96.66666666666667


In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

models = {
    "LogisticRegression": LogisticRegression(
        max_iter=200, # 최대 반복 횟수
        C=1.0, # 규제 강도(C값이 작을수록 규제가 강해짐)
        solver='lbfgs', # 최적화 알고리즘
        random_state=123
    ),
    "RandomForest": RandomForestClassifier(
        n_estimators=100, # 트리의 갯수
        max_depth=None,
        random_state=123
    ),
    "SVC": SVC(
        kernel='linear', # linear, sigmoid, poly, rbf
        random_state=123 # 재연성을 위해 설정
    )
}

In [11]:
# 위 모델들을 한번에 불러와서(반복문) => 최고의 모델을 찾아내고, 해당 파라미터를 기록합니다.

mlflow.autolog()

best_accuracy = 0
best_model = None
best_model_name = None

with mlflow.start_run():
    for model_name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)

        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model_name = model_name
            best_model = model

        print(f"Model Name: {model_name}, Accuracy: {accuracy}")

        mlflow.log_param('best_model', best_model_name) # 파라미터 로그
        mlflow.log_metric('best_accuracy', best_accuracy) # 메트릭 로그

    print(f"Best Model Name: {best_model_name}, Best Accuracy: {best_accuracy}")

2024/08/14 17:58:02 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


Model Name: LogisticRegression, Accuracy: 0.9666666666666667
Model Name: RandomForest, Accuracy: 0.9333333333333333


2024/08/14 17:59:06 INFO mlflow.tracking._tracking_service.client: 🏃 View run industrious-hare-447 at: http://127.0.0.1:5000/#/experiments/471758380148410695/runs/89f4341fff234e7fad0898858514490e.
2024/08/14 17:59:06 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/471758380148410695.


Model Name: SVC, Accuracy: 0.9333333333333333
Best Model Name: LogisticRegression, Best Accuracy: 0.9666666666666667


In [12]:
mlflow.autolog()

# 전체 모델에 대해서 기록을 하고 싶은데?
for model_name, model in models.items():
    with mlflow.start_run(run_name=model_name):
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)

        # model
        model_path = f"{model_name}_model"
        mlflow.sklearn.log_model(model, model_path) # 모델을 artifact 디렉토리에 저장

        mlflow.log_param(f'{model_name}_param', model.get_params()) # 파라미터 로그
        mlflow.log_metric(f'{model_name}_accuracy', accuracy) # 메트릭 로그

    print(f"Model Name: {model_name}, Accuracy: {accuracy}")

2024/08/14 17:59:06 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2024/08/14 17:59:08 INFO mlflow.tracking._tracking_service.client: 🏃 View run LogisticRegression at: http://127.0.0.1:5000/#/experiments/471758380148410695/runs/53a83d5e40c44e4e8c9bac7998fed68b.
2024/08/14 17:59:08 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/471758380148410695.


Model Name: LogisticRegression, Accuracy: 0.9666666666666667


2024/08/14 17:59:11 INFO mlflow.tracking._tracking_service.client: 🏃 View run RandomForest at: http://127.0.0.1:5000/#/experiments/471758380148410695/runs/1b063b7b3f8c40bab3689a577c529231.
2024/08/14 17:59:11 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/471758380148410695.


Model Name: RandomForest, Accuracy: 0.9333333333333333


2024/08/14 17:59:13 INFO mlflow.tracking._tracking_service.client: 🏃 View run SVC at: http://127.0.0.1:5000/#/experiments/471758380148410695/runs/c7b4c1e5ef134c12951c4f21b7552207.
2024/08/14 17:59:13 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/471758380148410695.


Model Name: SVC, Accuracy: 0.9333333333333333


# 모델 관리

In [13]:
from mlflow.tracking import MlflowClient

client = MlflowClient()

# 모델을 등록하고, 해당 모델의 버전을 반환
def register_model(model_name,run_id, model_uri='model'): # 모델 등록
    model_uri = f'runs:/{run_id}/{model_uri}'
    model_version = mlflow.register_model(model_uri, model_name)
    return model_version # json 형태

# 등록된 모델을 stage 단계로 승격
def promote_to_staging(model_name, run_id, model_uri): # stage
    model_version = register_model(model_name, run_id, model_uri)

    client.set_model_version_tag(
        name=model_name,
        version=model_version.version, # json을 변환
        key='stage',
        value='staging'
    )

    print(f'Model: {model_name}, Version: {model_version} promoted to Staging...')

def promote_to_production(model_name, version):  # production, 눈으로 보고 판단해야해서 버전을 직접 입력함
    client.set_model_version_tag(
        name=model_name,
        version=version,
        key='stage',
        value='production'
    )

    print(f'Model: {model_name}, Version: {version} promoted to Production...')

def archive_model(model_name, version):  # archive: 모델 폐기 단계, 눈으로 보고 판단해야해서 버전을 직접 입력함
    client.set_model_version_tag(
        name=model_name,
        version=version,
        key='stage',
        value='archived'
    )

    print(f'Model: {model_name}, Version: {version} Archived...')

In [14]:
# http://127.0.0.1:5000/#/experiments/0/runs/922ddd6e306d4849be80101808e94cd6
# 실험 ID: 0
# 실행 ID: 922ddd6e306d4849be80101808e94cd6
# Model Name: LogisticRegression

# (1) 모델 등록
run_id = '922ddd6e306d4849be80101808e94cd6'
model_name = 'LogisticRegression'

model_version = register_model(model_name, run_id)
print(model_version)

Registered model 'LogisticRegression' already exists. Creating a new version of this model...
2024/08/14 17:59:13 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: LogisticRegression, version 9


<ModelVersion: aliases=[], creation_timestamp=1723625953244, current_stage='None', description='', last_updated_timestamp=1723625953244, name='LogisticRegression', run_id='922ddd6e306d4849be80101808e94cd6', run_link='', source='mlflow-artifacts:/0/922ddd6e306d4849be80101808e94cd6/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='9'>


Created version '9' of model 'LogisticRegression'.


In [15]:
# (2) 모델을 staging 단계로 승격
promote_to_staging(model_name, run_id, 'model')

Registered model 'LogisticRegression' already exists. Creating a new version of this model...
2024/08/14 17:59:13 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: LogisticRegression, version 10


Model: LogisticRegression, Version: <ModelVersion: aliases=[], creation_timestamp=1723625953265, current_stage='None', description='', last_updated_timestamp=1723625953265, name='LogisticRegression', run_id='922ddd6e306d4849be80101808e94cd6', run_link='', source='mlflow-artifacts:/0/922ddd6e306d4849be80101808e94cd6/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='10'> promoted to Staging...


Created version '10' of model 'LogisticRegression'.


In [16]:
# (3) 모델을 Production 단계로 승격
promote_to_production(model_name, '3')

Model: LogisticRegression, Version: 3 promoted to Production...


In [17]:
# (4) 새로운 버전의 모델을 Production 단계로 승격
promote_to_production(model_name, '4')

# (4) 기존 Production 버전의 모델을 archive 단계로 폐기
archive_model(model_name, '3')

Model: LogisticRegression, Version: 4 promoted to Production...
Model: LogisticRegression, Version: 3 Archived...


### 모델 Serving

- Fast API, Flask ... => API로 어떻게 만들지?
- mlflow가 해결
- inference: 값을 전달하고, 그 값에 대한 예측값을 return (API)

In [18]:
# (1) Model Load
model_name = 'LogisticRegression'
model_version = 1

model_uri = f'models:/{model_name}/{model_version}'

loaded_model = mlflow.pyfunc.load_model(model_uri)

test_input = X_test[:10]
loaded_model.predict(test_input)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [19]:
X_test[:10]

array([[ 5.53333275e-01, -1.28296331e+00,  6.49083415e-01,
         3.95774101e-01],
       [ 1.15917263e+00, -1.31979479e-01,  9.90107977e-01,
         1.18556721e+00],
       [ 6.74501145e-01, -5.92373012e-01,  1.04694540e+00,
         1.31719939e+00],
       [-2.94841818e-01, -1.31979479e-01,  1.94384000e-01,
         1.32509732e-01],
       [-1.14301691e+00,  1.24920112e+00, -1.34022653e+00,
        -1.44707648e+00],
       [ 1.89829664e-01, -1.31979479e-01,  5.92245988e-01,
         7.90670654e-01],
       [ 5.53333275e-01, -1.74335684e+00,  3.64896281e-01,
         1.32509732e-01],
       [-1.74885626e+00,  3.28414053e-01, -1.39706395e+00,
        -1.31544430e+00],
       [-1.74885626e+00, -3.62176246e-01, -1.34022653e+00,
        -1.31544430e+00],
       [-4.16009689e-01, -1.05276654e+00,  3.64896281e-01,
         8.77547895e-04]])

### Model API Serving

- 서버가 하나 더 필요 => REST API
- mlflow 설치할 때 flask => API를 내려줄 flask 서버를 하나 더 띄워야 함

http://127.0.0.1:5000/#/experiments/0/runs/1b74db9e2ab44b41a35cbc26a09baf7f

mlflow models serve -m ./mlartifacts/0/1b74db9e2ab44b41a35cbc26a09baf7f/artifacts/model -p 5001 --no-conda
=> 로컬에서 돌리고 있는데, 추후 AWS Sage Maker에 올려서 운영 가능

In [20]:
import pandas as pd

X_test_df = pd.DataFrame(X_test, columns=iris.feature_names)

data = {
    'dataframe_split': X_test_df[:10].to_dict(orient="split")
} # dict

url = 'http://127.0.0.1:5001/invocations'

headers = {'Content-Type':'application/json'}

import requests
import json

res = requests.post(url, headers=headers, data=json.dumps(data))
print("Server Response(inference) : ", res.json())

Server Response(inference) :  {'predictions': [1, 1, 1, 0, 0, 0, 0, 0, 0, 0]}
