In [13]:
import mlflow
import numpy as np
import pandas as pd
from sklearn.preprocessing import OrdinalEncoder, MinMaxScaler
from sklearn.model_selection import train_test_split
from pycaret.classification import *

In [2]:
mlflow.set_tracking_uri("Data/sqlite:///mlflow.db")
mlflow.set_tracking_uri("http://127.0.0.1:5000")

In [3]:
mlflow.set_experiment("drinking")

<Experiment: artifact_location='file:///M:/CEUB - Pos Graduacao/MLOPs/3- Project Management/mlruns/1', creation_time=1729341700571, experiment_id='1', last_update_time=1729341700571, lifecycle_stage='active', name='drinking', tags={}>

In [None]:
mlflow.artifacts.download_artifacts('mlflow-artifacts:/1/44fbf414a8fc47c3a9144c6bf149f77e/artifacts/smk_drk_preprocessed.parquet',dst_path = 'M:\\CEUB - Pos Graduacao\\MLOPs\\3- Project Management\\Model Tracking')

In [4]:
df = pd.read_parquet('Data/smk_drk_preprocessed.parquet')

In [5]:
cat_features = df.select_dtypes('object').columns.to_list()

In [6]:
ordinalEncoder = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=-99).fit(df[cat_features])
df[cat_features] = ordinalEncoder.transform(df[cat_features])

In [7]:
scaler = MinMaxScaler().set_output(transform='pandas')

In [8]:
scaled = scaler.fit_transform(df)

In [14]:
X = scaled.drop('DRK_YN',axis=1)
y = scaled.loc[:,'DRK_YN']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=101)

In [17]:
from xgboost import XGBClassifier

In [19]:
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, recall_score, precision_score

In [20]:
with mlflow.start_run(run_name='drinking_0'):
    mlflow.set_tag("model_name", "XGBoost")
    
    # mlflow.log_params(params)
    xgboost = XGBClassifier(n_estimators=100, 
                         n_jobs=-1,
                         verbosity = 0)

    xgboost.fit(X_train,y_train,verbose=1)

    y_pred = xgboost.predict(X_test)

    metrics = {
    'acc' : accuracy_score(y_test, y_pred),
    'f1' : f1_score(y_test, y_pred),
    'auc' : roc_auc_score(y_test, y_pred),
    'recal' : recall_score(y_test, y_pred),
    'precision' : precision_score(y_test, y_pred)}
    
    xgboost.save_model("xgboost.json")
    
    mlflow.log_metrics(metrics)
    mlflow.xgboost.log_model(xgboost, "xgboost.json")

In [None]:
with mlflow.start_run(run_name='drinking_0'):
    mlflow.log_artifact('imgs/')
    mlflow.set_tag("model_name", "CatBoost")
    
    params = {
        'iterations':100,
        'task_type':"GPU",
        'devices':'0:1'
    }
    mlflow.log_params(params)
    catbooster = CatBoostClassifier(iterations=100,
                               task_type="GPU",
                               devices='0:1'
                               )
    catbooster.fit(X_train,y_train,verbose=1)
    y_pred = catbooster.predict(X_test)
    metrics = {
    'acc' : accuracy_score(y_test, y_pred),
    'f1' : f1_score(y_test, y_pred),
    'auc' : roc_auc_score(y_test, y_pred),
    'recal' : recall_score(y_test, y_pred),
    'precision' : precision_score(y_test, y_pred)}
    
    catbooster.save_model('catboost_model',
           format="cbm",
           export_parameters=None,
           pool=None)
    
    mlflow.log_metrics(metrics)
    mlflow.catboost.log_model(catbooster, "catboost_model")

In [None]:
# with mlflow.start_run(run_name='drinking'):
#     mlflow.set_tag("model_name", "lightgbm")
#     params = {
#         'n_jobs': -1
#     }
#     mlflow.log_params(params)
#     lightgbm = LGBMClassifier(n_jobs=-1)

#     lightgbm.fit(X_train,y_train)

#     y_pred = lightgbm.predict(X_test)

#     metrics = {
#     'acc' : accuracy_score(y_test, y_pred),
#     'f1' : f1_score(y_test, y_pred),
#     'auc' : roc_auc_score(y_test, y_pred),
#     'recal' : recall_score(y_test, y_pred),
#     'precision' : precision_score(y_test, y_pred)}
    
#     joblib.dump(lightgbm, 'lightgbm.pkl')
#     mlflow.log_metrics(metrics)
#     mlflow.lightgbm.log_model(lightgbm, "lightgbm.pkl")