# mlflow 무작정 따라하기

## 1.환경준비

* 라이브러리 Import

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import *

import mlflow

* 데이터 준비

In [2]:
path = "https://raw.githubusercontent.com/DA4BAM/dataset/master/Graduate_apply.csv"
data = pd.read_csv(path)
data.head()

Unnamed: 0,admit,gre,gpa,rank
0,0,380,3.61,3
1,1,660,3.67,3
2,1,800,4.0,1
3,1,640,3.19,4
4,0,520,2.93,4


## 2. 데이터 전처리

* x, y 나누기

In [3]:
target = 'admit'
x = data.drop(target, axis=1)
y = data.loc[:, target]

* 가변수화

In [4]:
cat_cols = ['rank']
x = pd.get_dummies(x, columns = cat_cols, drop_first = True)

* 데이터분할

In [5]:
x_train, x_val, y_train, y_val = train_test_split(x, y, test_size=.2, random_state = 20)

## 3. mlflow 설정

* SQLite에 기록
    * mlflow.db가 생성됨

In [7]:
mlflow_uri = "sqlite:///mlflow.db"
mlflow.set_tracking_uri(mlflow_uri)

## 4. 모델링 & Tracking

In [6]:
with mlflow.start_run():  # 추적 시작 지정

    model = DecisionTreeClassifier()
    model.fit(x_train, y_train)

    pred = model.predict(x_val)

    accuracy = accuracy_score(y_val, pred)

    mlflow.log_metric("accuracy", accuracy)
    mlflow.sklearn.log_model(model, "model", registered_model_name="Test_Model")

Successfully registered model 'Test_Model'.
2023/05/18 10:28:59 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: Test_Model, version 1
Created version '1' of model 'Test_Model'.


## 5. mlflow 서버 실행

* 터미널을 열고
* 경로 확인 : 현재 폴더 위치
* 아래 명령어 실행  
`mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./artifacts`

## 6. 간단한 실습
* 위 4번의 코드를 붙여넣고
* 하이퍼파라미터 max_depth = 4로 설정하여
* 한번 더 실행해 봅시다.(다른 부분은 수정하지 않아도 됩니다.)
* 그리고 mlfow 웹에 접속하여 결과 확인하기

In [9]:
with mlflow.start_run():  # 추적 시작 지정

    model = DecisionTreeClassifier(max_depth = 4)
    model.fit(x_train, y_train)

    pred = model.predict(x_val)

    accuracy = accuracy_score(y_val, pred)

    mlflow.log_metric("accuracy", accuracy)
    mlflow.sklearn.log_model(model, "model", registered_model_name="Test_Model")

Registered model 'Test_Model' already exists. Creating a new version of this model...
2023/05/18 10:49:10 INFO mlflow.tracking._model_registry.client: Waiting up to 300 seconds for model version to finish creation. Model name: Test_Model, version 2
Created version '2' of model 'Test_Model'.
