# MLFlow

In [11]:
import mlflow
import mlflow.sklearn
from sklearn.datasets import load_iris
import pandas as pd
from sklearn.model_selection import train_test_split

In [12]:
random_state = 123

In [13]:
iris = load_iris()

iris.data
iris.target
iris.feature_names

df = pd.DataFrame(iris.data, columns=iris.feature_names)
df['Label'] = iris.target

In [14]:
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.2, random_state=random_state)

In [15]:
mlflow.set_tracking_uri('http://localhost:5001')
display(mlflow.get_tracking_uri())

'http://localhost:5001'

In [16]:
exp = mlflow.set_experiment(experiment_name='iris_classification')

display(exp.name)
display(exp.experiment_id)
display(exp.artifact_location)
display(exp.creation_time)

from datetime import datetime
datetime.fromtimestamp(exp.creation_time // 1000)

'iris_classification'

'790932059215726165'

'mlflow-artifacts:/790932059215726165'

1727344504598

datetime.datetime(2024, 9, 26, 18, 55, 4)

In [17]:
mlflow.autolog()

2024/10/07 20:50:24 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


In [18]:
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# 알고리즘별 성능 테스트
models = {
    "LogisticRegression": LogisticRegression(
        max_iter=0,
        C=1.0,
        solver='lbfgs',
        random_state=random_state,
    ),
    "RandomForestClassifier": RandomForestClassifier(
        n_estimators=1,
        random_state=random_state,
    ),
    "SVC": SVC(),
}

with mlflow.start_run():
    for model_name, model in models.items():
        model.fit(X_train, y_train)
        pred = model.predict(X_test)
        accuracy = accuracy_score(y_test, pred)

        mlflow.log_param('log_param_key', 'log_param_value')
        mlflow.log_metric('log_metric_key', 123)

        print(f'모델명: {model_name}, 정확도: {accuracy*100}')


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


모델명: LogisticRegression, 정확도: 36.666666666666664
모델명: RandomForestClassifier, 정확도: 93.33333333333333




모델명: SVC, 정확도: 96.66666666666667


2024/10/07 20:53:21 INFO mlflow.tracking._tracking_service.client: 🏃 View run adventurous-bass-874 at: http://localhost:5001/#/experiments/790932059215726165/runs/99703c3ab91048eb94b2d35ef5acbc6b.
2024/10/07 20:53:21 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://localhost:5001/#/experiments/790932059215726165.
