In [1]:
import mlflow
from mlflow.models import infer_signature

from sklearn import datasets
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
# 추가 선형 모델 import
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.metrics import mean_squared_error, r2_score

import pandas as pd
import numpy as np

# Iris 데이터셋 로드
iris = datasets.load_iris()
X = iris.data
y = iris.target

# 데이터 분할
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [2]:
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")

## create a new MLflow experiment
mlflow.set_experiment("Modle Compare")

2025/03/09 18:59:44 INFO mlflow.tracking.fluent: Experiment with name 'Modle Compare' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/148841437545004186', creation_time=1741514384397, experiment_id='148841437545004186', last_update_time=1741514384397, lifecycle_stage='active', name='Modle Compare', tags={}>

### LogisticRegression

In [10]:

# 1. LogisticRegression 모델 학습 및 평가
with mlflow.start_run(run_name="logistic_regression"):
    lr_model = LogisticRegression(max_iter=200)
    lr_model.fit(X_train, y_train)
    
    # 예측
    y_pred = lr_model.predict(X_test)
    
    # 정확도 계산
    accuracy = accuracy_score(y_test, y_pred)
    print(f"LogisticRegression 정확도: {accuracy:.4f}")
    
    # MLflow에 메트릭 기록
    mlflow.log_param("model_type", "LogisticRegression")
    mlflow.log_param("max_iter", 200)
    mlflow.log_metric("accuracy", accuracy)
    
    # 모델 서명 추론 및 모델 저장
    signature = infer_signature(X_test, y_pred)
    LogisticRegression_info = mlflow.sklearn.log_model(lr_model, "logistic_regression_model", signature=signature)


LogisticRegression 정확도: 1.0000
🏃 View run logistic_regression at: http://127.0.0.1:5000/#/experiments/148841437545004186/runs/10b5cafb0228480f86803989c3dd8966
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/148841437545004186


### LinearRegression

In [11]:
# 2. LinearRegression 모델 학습 및 평가
with mlflow.start_run(run_name="linear_regression"):
    linear_model = LinearRegression()
    linear_model.fit(X_train, y_train)
    
    # 예측
    y_pred_linear = linear_model.predict(X_test)
    # 반올림하여 분류 결과로 변환
    y_pred_linear_rounded = np.round(y_pred_linear).astype(int)
    # 범위를 벗어나는 값 처리
    y_pred_linear_rounded = np.clip(y_pred_linear_rounded, 0, 2)
    
    # 평가 지표 계산
    accuracy_linear = accuracy_score(y_test, y_pred_linear_rounded)
    mse = mean_squared_error(y_test, y_pred_linear)
    r2 = r2_score(y_test, y_pred_linear)
    
    print(f"LinearRegression 정확도: {accuracy_linear:.4f}")
    print(f"LinearRegression MSE: {mse:.4f}")
    print(f"LinearRegression R²: {r2:.4f}")
    
    # MLflow에 메트릭 기록
    mlflow.log_param("model_type", "LinearRegression")
    mlflow.log_metric("accuracy", accuracy_linear)
    mlflow.log_metric("mse", mse)
    mlflow.log_metric("r2", r2)
    
    # 모델 서명 추론 및 모델 저장
    signature = infer_signature(X_test, y_pred_linear)
    LinearRegression_info = mlflow.sklearn.log_model(linear_model, "linear_regression_model", signature=signature)


LinearRegression 정확도: 1.0000
LinearRegression MSE: 0.0371
LinearRegression R²: 0.9469
🏃 View run linear_regression at: http://127.0.0.1:5000/#/experiments/148841437545004186/runs/8a20f99dd2f24e4e910f73bed42e8353
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/148841437545004186


### Ridge

In [12]:
# 3. Ridge 모델 학습 및 평가
with mlflow.start_run(run_name="ridge_regression"):
    ridge_model = Ridge(alpha=1.0)
    ridge_model.fit(X_train, y_train)
    
    # 예측
    y_pred_ridge = ridge_model.predict(X_test)
    y_pred_ridge_rounded = np.round(y_pred_ridge).astype(int)
    y_pred_ridge_rounded = np.clip(y_pred_ridge_rounded, 0, 2)
    
    # 평가 지표 계산
    accuracy_ridge = accuracy_score(y_test, y_pred_ridge_rounded)
    mse_ridge = mean_squared_error(y_test, y_pred_ridge)
    r2_ridge = r2_score(y_test, y_pred_ridge)
    
    print(f"Ridge 정확도: {accuracy_ridge:.4f}")
    print(f"Ridge MSE: {mse_ridge:.4f}")
    print(f"Ridge R²: {r2_ridge:.4f}")
    
    # MLflow에 메트릭 기록
    mlflow.log_param("model_type", "Ridge")
    mlflow.log_param("alpha", 1.0)
    mlflow.log_metric("accuracy", accuracy_ridge)
    mlflow.log_metric("mse", mse_ridge)
    mlflow.log_metric("r2", r2_ridge)
    
    # 모델 서명 추론 및 모델 저장
    signature = infer_signature(X_test, y_pred_ridge)
    Ridge_info = mlflow.sklearn.log_model(ridge_model, "ridge_model", signature=signature)



Ridge 정확도: 1.0000
Ridge MSE: 0.0391
Ridge R²: 0.9441
🏃 View run ridge_regression at: http://127.0.0.1:5000/#/experiments/148841437545004186/runs/bfe31efc0d4042d78860104517185a57
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/148841437545004186


### Lasso

In [13]:
# 4. Lasso 모델 학습 및 평가
with mlflow.start_run(run_name="lasso_regression"):
    lasso_model = Lasso(alpha=0.1)
    lasso_model.fit(X_train, y_train)
    
    # 예측
    y_pred_lasso = lasso_model.predict(X_test)
    y_pred_lasso_rounded = np.round(y_pred_lasso).astype(int)
    y_pred_lasso_rounded = np.clip(y_pred_lasso_rounded, 0, 2)
    
    # 평가 지표 계산
    accuracy_lasso = accuracy_score(y_test, y_pred_lasso_rounded)
    mse_lasso = mean_squared_error(y_test, y_pred_lasso)
    r2_lasso = r2_score(y_test, y_pred_lasso)
    
    print(f"Lasso 정확도: {accuracy_lasso:.4f}")
    print(f"Lasso MSE: {mse_lasso:.4f}")
    print(f"Lasso R²: {r2_lasso:.4f}")
    
    # MLflow에 메트릭 기록
    mlflow.log_param("model_type", "Lasso")
    mlflow.log_param("alpha", 0.1)
    mlflow.log_metric("accuracy", accuracy_lasso)
    mlflow.log_metric("mse", mse_lasso)
    mlflow.log_metric("r2", r2_lasso)
    
    # 모델 서명 추론 및 모델 저장
    signature = infer_signature(X_test, y_pred_lasso)
    Lasso_info = mlflow.sklearn.log_model(lasso_model, "lasso_model", signature=signature)


Lasso 정확도: 0.9667
Lasso MSE: 0.0668
Lasso R²: 0.9045
🏃 View run lasso_regression at: http://127.0.0.1:5000/#/experiments/148841437545004186/runs/71a6c3b392fb4fe0ad1fdf91cf7f2ed6
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/148841437545004186


In [7]:
# 모델 성능 비교
models = ["LogisticRegression", "LinearRegression", "Ridge", "Lasso"]
accuracies = [accuracy, accuracy_linear, accuracy_ridge, accuracy_lasso]

comparison_df = pd.DataFrame({
    "Model": models,
    "Accuracy": accuracies
})
print("\n모델 정확도 비교:")
print(comparison_df.sort_values("Accuracy", ascending=False))


모델 정확도 비교:
                Model  Accuracy
0  LogisticRegression  1.000000
1    LinearRegression  1.000000
2               Ridge  1.000000
3               Lasso  0.966667
