In [1]:
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.preprocessing import StandardScaler

# 데이터 준비
iris = load_iris() # 꽃 받침과 꽃 잎 사이즈를 가지고 꽃의 종류를 결정

X = iris.data
y = iris.target

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


# 학습 데이터와 테스트 데이터로 분리 => train_test_split()
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=123)


X_scaled


# 얼굴인식 => ??? => 사람의 얼굴을 수치화 => Open CV => 무인차 (Open CV)


array([[-9.00681170e-01,  1.01900435e+00, -1.34022653e+00,
        -1.31544430e+00],
       [-1.14301691e+00, -1.31979479e-01, -1.34022653e+00,
        -1.31544430e+00],
       [-1.38535265e+00,  3.28414053e-01, -1.39706395e+00,
        -1.31544430e+00],
       [-1.50652052e+00,  9.82172869e-02, -1.28338910e+00,
        -1.31544430e+00],
       [-1.02184904e+00,  1.24920112e+00, -1.34022653e+00,
        -1.31544430e+00],
       [-5.37177559e-01,  1.93979142e+00, -1.16971425e+00,
        -1.05217993e+00],
       [-1.50652052e+00,  7.88807586e-01, -1.34022653e+00,
        -1.18381211e+00],
       [-1.02184904e+00,  7.88807586e-01, -1.28338910e+00,
        -1.31544430e+00],
       [-1.74885626e+00, -3.62176246e-01, -1.34022653e+00,
        -1.31544430e+00],
       [-1.14301691e+00,  9.82172869e-02, -1.28338910e+00,
        -1.44707648e+00],
       [-5.37177559e-01,  1.47939788e+00, -1.28338910e+00,
        -1.31544430e+00],
       [-1.26418478e+00,  7.88807586e-01, -1.22655167e+00,
      

In [2]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train) # train=모의고사 # 학습을 시킬 때는 학습 데이터만 제공

model.predict(X_test) # 예측을 시킬 때는 테스트 데이터만 제공

y_pred = model.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)

print(f"정확도 : {accuracy * 100}")


정확도 : 96.66666666666667


모델 학습과 모델 성능
심플하게 모든 것은 ML flow에게 맡긴다. => mlflow.autolog()
autolog에서 추적하지 못하는 다른 파라미터,메트릭,메타데이터 등등의 값을 수동으로 기록

In [3]:
mlflow.set_tracking_uri("http://127.0.0.1:5000")
print("",mlflow.get_tracking_uri())

 http://127.0.0.1:5000


In [4]:
import mlflow.sklearn

mlflow.autolog()

with mlflow.start_run(nested=True):
    model = LogisticRegression(max_iter=200)
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)

    print(f"정확도 : {accuracy * 100}")


2024/08/14 16:44:36 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2024/08/14 16:44:38 INFO mlflow.tracking._tracking_service.client: 🏃 View run funny-eel-434 at: http://127.0.0.1:5000/#/experiments/0/runs/6d08ac38911b40fb89f39d4ae5b477db.
2024/08/14 16:44:38 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/0.


정확도 : 96.66666666666667


In [5]:
exp = mlflow.set_experiment(experiment_name='iris_classification_experiment')

print(f"Name: {exp.name}")
print(f"ID: {exp.experiment_id}")
print(f"Location: {exp.artifact_location}")
print(f"Tags: {exp.tags}")
print(f"Lifecycle: {exp.lifecycle_stage}")
print(f"Create Timestamp: {exp.creation_time}")


Name: iris_classification_experiment
ID: 787161780912017658
Location: mlflow-artifacts:/787161780912017658
Tags: {}
Lifecycle: active
Create Timestamp: 1723621157396


In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC

models = {
    "LogisticRegression" :LogisticRegression(
        max_iter=200, #최대 반복횟수"
        C=1.0, # 규제 강도(C값이 적을 수록 규제가 강화됨)
        solver='lbfgs', #최적화 알고리즘
        random_state=123
    ),
    "RandomForest" : RandomForestClassifier(
        n_estimators=100, #트리의 갯수
        max_depth=None,
        random_state=123
    ),
    "SVC" : SVC(
        kernel='linear', # linear, sigmoid, poly, rbf
        random_state=123
    ),
}   


In [7]:
# 위 모델들을 한번씩 불러와서 (반복문) => 최고의 모델을 찾아내고, 해당 파라미터를 기록합니다.

mlflow.autolog()

best_accuracy = 0
best_model = None
best_model_name = None

with mlflow.start_run(nested=True):
    for model_name, model in models.items():
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)

        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_model = model
            best_model_name = model_name

        print(f"Model Name: {model_name}, Accuracy: {accuracy}")

        mlflow.log_param('best_model', best_model_name) # 파라미터 로그
        mlflow.log_metric('best_accuracy', best_accuracy) # 메트릭 로그

    print(f"Best Model Name: {best_model_name}, Best Accuracy: {best_accuracy}")
    

2024/08/14 16:44:39 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


Model Name: LogisticRegression, Accuracy: 0.9666666666666667
Model Name: RandomForest, Accuracy: 0.9333333333333333


2024/08/14 16:45:44 INFO mlflow.tracking._tracking_service.client: 🏃 View run delightful-roo-311 at: http://127.0.0.1:5000/#/experiments/787161780912017658/runs/c10da89fc0864b37a25c55e6ebb2af60.
2024/08/14 16:45:44 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/787161780912017658.


Model Name: SVC, Accuracy: 0.9333333333333333
Best Model Name: LogisticRegression, Best Accuracy: 0.9666666666666667


In [23]:
mlflow.autolog()
# 전체 모델에 대해서 기록을 하고 싶은데?

for model_name, model in models.items():
    with mlflow.start_run(run_name=model_name, nested=True):
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, y_pred)

        #모델을 mlflow에 저장
        model_path = f"model/{model_name}"
        mlflow.sklearn.log_model(model, model_path)

        mlflow.log_param(f'{model_name}_param', model.get_params())
        mlflow.log_metric(f'{model_name}_accuracy', accuracy)

        print(f"Model Name: {model_name}, Accuracy: {accuracy}")



2024/08/14 17:03:27 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.
2024/08/14 17:03:30 INFO mlflow.tracking._tracking_service.client: 🏃 View run LogisticRegression at: http://127.0.0.1:5000/#/experiments/787161780912017658/runs/d3102882f781401b967ad332d678940c.
2024/08/14 17:03:30 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/787161780912017658.


Model Name: LogisticRegression, Accuracy: 0.9666666666666667


2024/08/14 17:03:33 INFO mlflow.tracking._tracking_service.client: 🏃 View run RandomForest at: http://127.0.0.1:5000/#/experiments/787161780912017658/runs/7770bf3e2da54f0f8f60b58a6768acd9.
2024/08/14 17:03:33 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/787161780912017658.


Model Name: RandomForest, Accuracy: 0.9333333333333333


2024/08/14 17:03:36 INFO mlflow.tracking._tracking_service.client: 🏃 View run SVC at: http://127.0.0.1:5000/#/experiments/787161780912017658/runs/5ac551a223d54eb1981c000b0806877a.
2024/08/14 17:03:36 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/787161780912017658.


Model Name: SVC, Accuracy: 0.9333333333333333


In [26]:
# 모델 관리
from mlflow.tracking import MlflowClient
client = MlflowClient()
# 모델을 등록하고, 해당 모델의 버전을 반환
def register_model(model_name, run_id, model_uri='model'): # 모델 등록
    model_uri = f"runs:/{run_id}/{model_uri}"
    model_version = mlflow.register_model(model_uri, model_name)
    return model_version
# 등록된 모델을 stage 단계로 승격
def promote_to_staging(model_name, run_id, model_uri): # stage
    model_version = register_model(model_name, run_id, model_uri)
    client.set_model_version_tag(
        name=model_name,
        version=model_version.version,
        key='stage',
        value='staging'
    )
    print(f"Model: {model_name}, version: {model_version} promoted to Staging...")
def promote_to_production(model_name, version): # production
    client.set_model_version_tag(
        name=model_name,
        version=version,
        key='stage',
        value='production'
    )
    print(f"Model: {model_name}, version: {version} promoted to Production...")
def archive_model(model_name, version): # archive: 모델 폐기 단계
    client.set_model_version_tag(
        name=model_name,
        version=version,
        key='stage',
        value='archived'
    )
    print(f"Model: {model_name}, version: {version} Archived ...")

In [37]:
# http://127.0.0.1:5000/#/experiments/787161780912017658/runs/cbd04b96570b4d28aa996ea6e5e43e00
# 실험ID: 787161780912017658
# 실행ID: cbd04b96570b4d28aa996ea6e5e43e00
# Model Name: LogisticRegression
# (1) 모델 등록
run_id = 'cbd04b96570b4d28aa996ea6e5e43e00'
model_name = 'LogisticRegression'

model_version = register_model(model_name, run_id)
print(model_version)

Registered model 'LogisticRegression' already exists. Creating a new version of this model...
2024/08/14 17:08:32 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: LogisticRegression, version 4


<ModelVersion: aliases=[], creation_timestamp=1723622912516, current_stage='None', description='', last_updated_timestamp=1723622912516, name='LogisticRegression', run_id='cbd04b96570b4d28aa996ea6e5e43e00', run_link='', source='mlflow-artifacts:/787161780912017658/cbd04b96570b4d28aa996ea6e5e43e00/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='4'>


Created version '4' of model 'LogisticRegression'.


In [31]:
# (2) 모델을 staging 단계로 승격
promote_to_staging(model_name, run_id, 'model')

Registered model 'LogisticRegression' already exists. Creating a new version of this model...
2024/08/14 17:07:34 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: LogisticRegression, version 3


Model: LogisticRegression, version: <ModelVersion: aliases=[], creation_timestamp=1723622854436, current_stage='None', description='', last_updated_timestamp=1723622854436, name='LogisticRegression', run_id='cbd04b96570b4d28aa996ea6e5e43e00', run_link='', source='mlflow-artifacts:/787161780912017658/cbd04b96570b4d28aa996ea6e5e43e00/artifacts/model', status='READY', status_message='', tags={}, user_id='', version='3'> promoted to Staging...


Created version '3' of model 'LogisticRegression'.


In [36]:
# (3) 모델을 Production 단계로 승격
promote_to_production(model_name, '3')

Model: LogisticRegression, version: 3 promoted to Production...


In [38]:
# (4) 새로운 버전의 모델을 Production으로 승격시키고, 기존의 Production 버전은 Archived
promote_to_production(model_name, '4') # 4 staging -> production
archive_model(model_name, '3') # production -> archive

Model: LogisticRegression, version: 4 promoted to Production...
Model: LogisticRegression, version: 3 Archived ...


### 모델 Serving
- FastAPI, Flask ... => API로 언제만들지?
- mlflow가 해결해줌
- inference: 값을 전달하고, 그 값에 대한 예측값을 return (API)

In [47]:
# PM 결과를 보여줘야하는데 PM은 모름 눈으로 보여줘야함
# (1) Model Load
model_name = 'LogisticRegression'
model_version = 4

model_uri = f'models:/{model_name}/{model_version}'

loaded_model = mlflow.pyfunc.load_model(model_uri)

test_input = X_test[:10]
loaded_model.predict(test_input)

array([1, 2, 2, 1, 0, 2, 1, 0, 0, 1])

### Model API Serving
- 서버가 하나 더 필요합니다.Rest API
- mlflow 설치 할 때 flask=>API 내려줄 flask 서버를 하나 더 띄워줘야 합니다.

http://127.0.0.1:5000/#/experiments/787161780912017658/runs/d3102882f781401b967ad332d678940c

[text](../mlartifacts/)

로컬실행
mlflow models serve -m ./mlartifacts/968704052837447115/2b8120e167474469b9b9cbe753cb643b/artifacts/model -p 5001 --no-conda
=> 로컬에서 돌리고 있는데, AWS Sage Maker 올려서 운영을 하시면 됩니다.

In [48]:
import pandas as pd

X_text_df = pd.DataFrame(X_test, columns=iris.feature_names)

data = {
    'dataframe_split': X_text_df[:10].to_dict(orient='split'),
} # data type: dict -> json

url = "http://127.0.0.1:5001/invocations"

headers = {"Content-Type":"application/json"}

import requests
import json

res = requests.post(url, headers=headers, data=json.dumps(data))


print("Server response(infernece):", res.json())


Server response(infernece): {'predictions': [1, 2, 2, 1, 0, 1, 1, 0, 0, 1]}
