In [1]:
import mlflow
from mlflow.models import infer_signature

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.preprocessing import StandardScaler

In [6]:
df = pd.read_csv("../data/predictions.csv")
df.head(5)

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,predictions
0,41505.0,-16.526507,8.584972,-18.649853,9.505594,-13.793819,-2.832404,-16.701694,7.517344,-8.507059,...,1.190739,-1.12767,-2.358579,0.673461,-1.4137,-0.462762,-2.018575,-1.042804,364.19,1
1,44261.0,0.339812,-2.743745,-0.13407,-1.385729,-1.451413,1.015887,-0.524379,0.22406,0.899746,...,-0.213436,-0.942525,-0.526819,-1.156992,0.311211,-0.746647,0.040996,0.102038,520.12,1
2,35484.0,1.39959,-0.590701,0.168619,-1.02995,-0.539806,0.040444,-0.712567,0.002299,-0.971747,...,0.102398,0.168269,-0.166639,-0.81025,0.505083,-0.23234,0.011409,0.004634,31.0,0
3,167123.0,-0.432071,1.647895,-1.669361,-0.349504,0.785785,-0.630647,0.27699,0.586025,-0.484715,...,0.358932,0.873663,-0.178642,-0.017171,-0.207392,-0.157756,-0.237386,0.001934,1.5,0
4,168473.0,2.01416,-0.137394,-1.015839,0.327269,-0.182179,-0.956571,0.043241,-0.160746,0.363241,...,-0.238644,-0.6164,0.347045,0.061561,-0.360196,0.17473,-0.078043,-0.070571,0.89,0


In [7]:
print(df["predictions"].value_counts())

predictions
0    98
1     2
Name: count, dtype: int64


In [8]:
# dataset
X = df.drop(columns="predictions")
y = df["predictions"]

# Train, Test 데이터셋 분할
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 모델 하이퍼파라미터 정의
params = {
    "solver": "lbfgs",
    "max_iter": 1000,
    "multi_class": "auto",
    "random_state": 42,
}

# 모델 학습
lr = LogisticRegression(**params)
lr.fit(X_train, y_train)

# test 데이터셋 Predict
y_pred = lr.predict(X_test)

# Metrics 계산
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)

  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


In [9]:
# Tracking Server 설정
mlflow.set_tracking_uri("http://127.0.0.1:5000")

# 새로운 MLFlow 실험을 생성
mlflow.set_experiment("creditcard_fraud_experiment")

# MLflow 실험 시작
with mlflow.start_run():
    mlflow.log_params(params)
    mlflow.log_metric("accuracy", accuracy)
    mlflow.log_metric("precision", precision)
    mlflow.log_metric("recall", recall)
    mlflow.set_tag("Training Info", "Logistic Regression for creditcard fraud")

    signature = infer_signature(X_train, lr.predict(X_train))

    model_info = mlflow.sklearn.log_model(
        sk_model=lr,
        artifact_path="model",
        signature=signature,
        input_example=X_train,
        registered_model_name="creditcard_fraud_model"
    )

2025/10/01 17:06:25 INFO mlflow.tracking.fluent: Experiment with name 'creditcard_fraud_experiment' does not exist. Creating a new experiment.
Successfully registered model 'creditcard_fraud_model'.
2025/10/01 17:06:31 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: creditcard_fraud_model, version 1


🏃 View run melodic-calf-386 at: http://127.0.0.1:5000/#/experiments/254477767552458013/runs/e0286ce238b14839ab10ea2a262680ef
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/254477767552458013


Created version '1' of model 'creditcard_fraud_model'.


In [10]:
# 예측을 위해 일반 Python 함수 모델(pyfunc)로 모델을 다시 불러옵니다.
loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)

feature_columns = X.columns

X_scaled = scaler.transform(X)
predictions = loaded_model.predict(X_scaled)

result = pd.DataFrame(X_scaled, columns=feature_columns)
result["actual_class"] = y
result["predicted_class"] = predictions

result.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V22,V23,V24,V25,V26,V27,V28,Amount,actual_class,predicted_class
0,-1.216072,-4.338632,2.869882,-8.683089,5.816289,-5.159774,-1.488126,-8.078522,6.118347,-7.800999,...,-1.583035,-2.122807,1.063469,-3.05446,-1.021876,-2.867179,-2.441457,0.64523,1,0
1,-1.157456,0.141908,-0.742665,0.078942,-1.030672,-0.501699,0.46604,-0.319415,0.203158,0.691341,...,-1.31182,-0.422845,-2.037515,0.541818,-1.650011,-0.103587,0.39443,1.039593,1,1
2,-1.344129,0.423439,-0.056094,0.22218,-0.807007,-0.157654,-0.029291,-0.409675,0.023299,-0.998218,...,0.315361,-0.08858,-1.450096,0.946025,-0.512034,-0.143288,0.153151,-0.197442,0,0
3,1.455621,-0.063143,0.657759,-0.647588,-0.379236,0.342629,-0.370071,0.064945,0.496728,-0.558532,...,1.348679,-0.09972,-0.106535,-0.539419,-0.347007,-0.477128,0.146463,-0.272051,0,0
4,1.484334,0.586699,0.088459,-0.338328,0.046225,-0.022684,-0.535576,-0.047168,-0.108937,0.206992,...,-0.834086,0.388143,0.026845,-0.858002,0.388667,-0.263317,-0.033138,-0.273593,0,0
