In [2]:
import argparse
import mlflow
import mlflow.sklearn
import numpy
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from mlflow.models import infer_signature

df = pd.read_csv('../data/creditcard.csv')
df

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,0
1,0,1.191857,0.266151,0.166480,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.167170,0.125895,-0.008983,0.014724,2.69,0
2,1,-1.358354,-1.340163,1.773209,0.379780,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,0
3,1,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.108300,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.50,0
4,2,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.206010,0.502292,0.219422,0.215153,69.99,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76995,56834,1.411662,-0.980237,0.702831,-1.306067,-1.545296,-0.697165,-0.956105,-0.138694,-1.960606,...,-0.387743,-0.749927,0.260327,0.350000,0.018392,-0.474421,0.053667,0.032403,30.00,0
76996,56835,-0.647634,0.256468,1.535691,0.755673,0.484788,-1.145051,0.037840,0.067013,-0.495018,...,-0.100845,-0.493438,0.230580,0.552368,-0.427992,0.152965,0.001078,0.042935,2.69,0
76997,56835,1.176402,0.184132,0.514655,0.449999,-0.196922,-0.187330,-0.091912,0.056878,-0.280982,...,-0.174013,-0.483428,0.149135,0.023985,0.129143,0.105657,-0.013510,0.006145,0.89,0
76998,56835,0.639332,-1.014053,-0.735564,0.189471,-0.406596,-0.808740,0.713173,-0.261931,-0.271799,...,0.178081,-0.325279,-0.429730,0.069986,0.404561,1.040671,-0.189242,0.034921,350.37,0


In [None]:
parser = argparse.ArgumentParser()
parser.add_argument("--max_iter", type=int, default=1000)
parser.add_argument("--n_estimators", type=int, default=100)

args = parser.parse_args()

In [None]:
X = df.drop(columns="Class")
y = df["Class"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Tracking Server 설정
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")

In [None]:
# 새로운 MLFlow 실험을 생성
mlflow.set_experiment("creditcard_experiment")

In [None]:
with mlflow.start_run(run_name="LogisticRegression"):
    model = LogisticRegression(max_iter=args.max_iter)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    acc = accuracy_score(y_test, preds)
    prec = precision_score(y_test, preds)
    rec = precision_score(y_test, preds)

    mlflow.log_param("model", "LogisticRegression")
    mlflow.log_param("max_iter", 1000)
    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("precision", prec)
    mlflow.log_metric("recall", rec)

    signature = infer_signature(X_train, preds)

    mlflow.sklearn.log_model(model, "model", signature=signature)

In [None]:
with mlflow.start_run(run_name="RandomForest"):
    model = RandomForestClassifier(n_estimators=args.n_estimators)
    model.fit(X_train, y_train)
    preds = model.predict(X_test)

    acc = accuracy_score(y_test, preds)
    prec = precision_score(y_test, preds)
    rec = precision_score(y_test, preds)

    mlflow.log_param("model", "RandomForest")
    mlflow.log_param("n_estimators", 100)
    mlflow.log_metric("accuracy", acc)
    mlflow.log_metric("precision", prec)
    mlflow.log_metric("recall", rec)

    signature = infer_signature(X_train, preds)

    mlflow.sklearn.log_model(model, "model", signature=signature)