# MLFLOW

* Banco de dados SQL(mysql, postgres, sqlite)

"""
pip install mlflow

sudo apt install sqlite3
"""

mlflow server --backend-store-uri sqlite:///mlrunsdb.db \
              --default-artifact-root ./mlflowruns \
              --host 0.0.0.0 \
              --port 5000

In [10]:
import pandas as pd
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import joblib
from pyexpat import model

In [4]:
URL = "https://raw.githubusercontent.com/ricardo-jr37/mlflow_tutorial/master/data/wine.data"

df = pd.read_csv(URL)

cols = ["label"]

cols.extend(["feature_" + str(i) for i in range(1,14)])

df.columns = cols

df

Unnamed: 0,label,feature_1,feature_2,feature_3,feature_4,feature_5,feature_6,feature_7,feature_8,feature_9,feature_10,feature_11,feature_12,feature_13
0,1,13.20,1.78,2.14,11.2,100,2.65,2.76,0.26,1.28,4.38,1.05,3.40,1050
1,1,13.16,2.36,2.67,18.6,101,2.80,3.24,0.30,2.81,5.68,1.03,3.17,1185
2,1,14.37,1.95,2.50,16.8,113,3.85,3.49,0.24,2.18,7.80,0.86,3.45,1480
3,1,13.24,2.59,2.87,21.0,118,2.80,2.69,0.39,1.82,4.32,1.04,2.93,735
4,1,14.20,1.76,2.45,15.2,112,3.27,3.39,0.34,1.97,6.75,1.05,2.85,1450
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
172,3,13.71,5.65,2.45,20.5,95,1.68,0.61,0.52,1.06,7.70,0.64,1.74,740
173,3,13.40,3.91,2.48,23.0,102,1.80,0.75,0.43,1.41,7.30,0.70,1.56,750
174,3,13.27,4.28,2.26,20.0,120,1.59,0.69,0.43,1.35,10.20,0.59,1.56,835
175,3,13.17,2.59,2.37,20.0,120,1.65,0.68,0.53,1.46,9.30,0.60,1.62,840


In [5]:
from sklearn.model_selection import train_test_split

X = df.iloc[:, 1:]

y = df["label"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25)

In [6]:
import mlflow

DB_URI = "sqlite:///mlrunsdb.db"
mlflow.set_tracking_uri(DB_URI)

tags = {
    "Módulo":"Modelos Produtivos 1",
    "Turma":815,
    "Objeto":"Vinhos"
}

mlflow.set_experiment(experiment_name="Classificação de Vinhos")
mlflow.set_experiment_tags(tags=tags)

In [7]:
def get_metrics(y_test:list, y_pred:list):
    ac = accuracy_score(y_test, y_pred)
    pr = precision_score(y_test, y_pred, average= "weighted")
    rc = recall_score(y_test, y_pred, average= "weighted")
    f1 = f1_score(y_test, y_pred, average= "weighted")
    
    return(ac, pr, rc, f1)


In [9]:
with mlflow.start_run(
    run_name="Tentativa 1",
    description="Classificando vinhos com knn",
    tags={"version":"v2", "env":"dev"}
) as model_run:
    
    k = 7
    knn = KNeighborsClassifier(n_neighbors=k)

    model = Pipeline([
        ("scaler", StandardScaler()),
        ("knn", knn)])

    model.fit(X_train, y_train)
    
    y_pred = model.predict(X_test)
    
    mc = confusion_matrix(y_test.values.ravel(), y_pred)
    ac, pr, rc, f1 = get_metrics(y_test, y_pred)
    
    mlflow.log_param("k",k)
    mlflow.log_param("size_train_dataset", len(X_train))
    mlflow.log_param("size_test_dataset", len(X_test))
    
    params = {
        "k":k,
        "size_train_dataset": len(X_train),
        "size_test_dataset": len(X_test)
        
    }
    
    metrica = {
        "acuracia":ac,
        "precision":pr,
        "recall":rc,
        "f1":f1
    }
    
    mlflow.log_params(params=params)
    
    mlflow.log_metrics(metrics=metrica)
    
    mlflow.sklearn.log_model(model, "model")

In [12]:
mlflow.set_tracking_uri(uri="http://localhost:5000/")

Path = "models:/Claissificação de vinho/Production"
loaded_model = mlflow.sklearn.load_model(Path)

loaded_model.predict(X_val)

array([1, 3, 1, 2, 1, 1, 3, 3, 2, 3, 2, 1, 3, 3, 2, 1, 2, 2, 3, 1, 3, 1,
       2, 2, 3, 2, 3, 1, 3, 1, 1])

In [13]:
loaded_model.score(X_val, y_val)

0.9032258064516129