# MLFlow


Precisa de um Banco de Dados SQL (mysql, postgres, sqlite)

```
pip install mlflow

sudo apt install sqlite3

```



```

mlflow server --backend-store-uri sqlite:///mlrunsdb.db \
              --default-artifact-root ./mlflowruns \
              --host 0.0.0.0 \
              --port 5000

```

In [10]:
import pandas as pd

URL = 'https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv'


df = pd.read_csv(URL)

# cols = ['label']

# cols.extend(['feature_'+str(i) for i in range(1,14)])

# df.columns = cols

df

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,Virginica
146,6.3,2.5,5.0,1.9,Virginica
147,6.5,3.0,5.2,2.0,Virginica
148,6.2,3.4,5.4,2.3,Virginica


In [11]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder




X = df.iloc[:, :-1]
y = df['variety']


le = LabelEncoder()
le.fit(y)
y = le.transform(y)

X_train, X_test, y_train, y_test = train_test_split(X,y , test_size=0.3)


X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.25)


In [12]:
import mlflow

# DB_URI = 'sqlite:///mlrunsdb.db'
mlflow.set_tracking_uri('http://18.230.88.3/')

tags = {
    "Módulo":"Modelos Produtivos 1",
    "Turma":815,
    "objeto":'iris'
}
mlflow.set_experiment(experiment_name='Classificação de Flores')
# mlflow.set_experiment_tags(tags=tags)

<Experiment: artifact_location='./mlruns/2', experiment_id='2', lifecycle_stage='active', name='Classificação de Flores', tags={'Módulo': 'Modelos Produtivos 1', 'Turma': '815', 'objeto': 'iris'}>

In [13]:
from sklearn.metrics import (accuracy_score, precision_score, recall_score, f1_score, confusion_matrix)


def get_metrics(y_test:list, y_pred:list) -> list:
    ac = accuracy_score(y_test, y_pred)
    pr = precision_score(y_test, y_pred, average='weighted')
    rc = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')
    return [ac, pr, rc, f1]


In [14]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline



with mlflow.start_run(
    run_name='API',
    description='Flores',
    # tags={"version":"v3","env":"dev"}
) as model_run:


    k = 9
    knn = KNeighborsClassifier(n_neighbors=k)
    model = Pipeline([('scaler',StandardScaler()),('knn',knn)])
    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    # mc = confusion_matrix(y_test.values.ravel(), y_pred)
    ac, pr, rc, f1 = get_metrics(y_test, y_pred)


    # mlflow.log_param("k",k)
    # mlflow.log_param("size_train_dataset", len(X_train))
    # mlflow.log_param("size_test_dataset", len(X_test))

    params = {
        "k":k,
        "size_train_dataset": len(X_train),
        "size_test_dataset": len(X_test),
    }

    metrics = {
        "acuracia":ac,
        "precision":pr,
        "recall":rc,
        "f1":f1
    }

    mlflow.log_params(params=params)
    mlflow.log_metrics(metrics=metrics)

    mlflow.sklearn.log_model(model, "model")

    # mlflow.log_metric("acuracia",ac)




RestException: BAD_REQUEST: (sqlite3.IntegrityError) UNIQUE constraint failed: tags.key, tags.run_uuid
[SQL: INSERT INTO tags ("key", value, run_uuid) VALUES (?, ?, ?)]
[parameters: (('mlflow.user', 'davi', '663ef17f8329423ab0ecf96b637990f7'), ('mlflow.source.name', '/home/davi/.local/lib/python3.8/site-packages/ipykernel_launcher.py', '663ef17f8329423ab0ecf96b637990f7'), ('mlflow.source.type', 'LOCAL', '663ef17f8329423ab0ecf96b637990f7'), ('mlflow.note.content', 'Flores', '663ef17f8329423ab0ecf96b637990f7'), ('mlflow.runName', 'API', '663ef17f8329423ab0ecf96b637990f7'), ('mlflow.runName', 'monumental-shrew-565', '663ef17f8329423ab0ecf96b637990f7'))]
(Background on this error at: https://sqlalche.me/e/14/gkpj)

In [15]:
import mlflow 

mlflow.set_tracking_uri(uri='http://18.230.88.3/')

PATH = 'models:/Iris/Production'

loaded_model = mlflow.sklearn.load_model(PATH)

loaded_model.predict(X_val)

array([1, 0, 2, 2, 2, 0, 1, 1, 0, 0, 0, 0, 2, 2, 0, 1, 0, 2, 1, 2, 2, 2,
       0, 1, 2, 2, 2])

In [16]:
loaded_model.predict([[5.1,	3.5,	1.4,	0.2]])



array([0])

In [24]:
loaded_model.score(X_val,y_val)

0.9259259259259259