# Import Library

In [1]:
import pandas as pd 
from pycaret.classification import *
import mlflow
from mlflow.models import infer_signature

# Membuat Mlflow Experiment

In [2]:
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")
mlflow.set_experiment("Mlflow Experiment")

<Experiment: artifact_location='./mlruns/1', creation_time=1712904501368, experiment_id='1', last_update_time=1712904501368, lifecycle_stage='active', name='Mlflow Experiment', tags={}>

# Menampilkan Dataset

In [3]:
dataset = pd.read_csv('heart_cleveland_upload.csv')
dataset

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,condition
0,69,1,0,160,234,1,2,131,0,0.1,1,1,0,0
1,69,0,0,140,239,0,0,151,0,1.8,0,2,0,0
2,66,0,0,150,226,0,0,114,0,2.6,2,0,0,0
3,65,1,0,138,282,1,2,174,0,1.4,1,1,0,1
4,64,1,0,110,211,0,2,144,1,1.8,1,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
292,40,1,3,152,223,0,0,181,0,0.0,0,0,2,1
293,39,1,3,118,219,0,0,140,0,1.2,1,0,2,1
294,35,1,3,120,198,0,0,130,1,1.6,1,0,2,1
295,35,0,3,138,183,0,0,182,0,1.4,0,0,0,0


# Proses Train Model

In [7]:
fitur_cat = ['sex', 'cp', 'fbs', 'restecg', 'exang', 'slope', 'thal']
setup_data = setup(data = dataset, target = 'condition', categorical_features=fitur_cat, session_id=123)

Unnamed: 0,Description,Value
0,Session id,123
1,Target,condition
2,Target type,Binary
3,Original data shape,"(297, 14)"
4,Transformed data shape,"(297, 23)"
5,Transformed train set shape,"(207, 23)"
6,Transformed test set shape,"(90, 23)"
7,Ordinal features,3
8,Numeric features,6
9,Categorical features,7


In [8]:
best_model = compare_models(sort='Prec.', exclude=['catboost', 'xgboost','tr'])

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.8457,0.8887,0.7556,0.8968,0.815,0.6838,0.6965,1.371
ridge,Ridge Classifier,0.8414,0.0,0.7478,0.8946,0.8096,0.6756,0.6879,0.057
lda,Linear Discriminant Analysis,0.8367,0.8925,0.7378,0.8932,0.8032,0.6658,0.679,0.084
lightgbm,Light Gradient Boosting Machine,0.7974,0.8501,0.7433,0.8283,0.7705,0.5877,0.6047,0.114
rf,Random Forest Classifier,0.7883,0.8623,0.7167,0.8219,0.7544,0.5692,0.5837,0.192
nb,Naive Bayes,0.8033,0.8623,0.74,0.8144,0.7681,0.5996,0.6077,0.057
ada,Ada Boost Classifier,0.8026,0.8812,0.7778,0.8056,0.7819,0.6001,0.6101,0.099
et,Extra Trees Classifier,0.7981,0.8805,0.77,0.7988,0.7769,0.5928,0.6013,0.143
gbc,Gradient Boosting Classifier,0.7693,0.8508,0.7267,0.7822,0.7477,0.534,0.5409,0.11
dt,Decision Tree Classifier,0.7064,0.7051,0.6844,0.6881,0.6836,0.4107,0.4131,0.052


# Mengisi Mlflow Experiment

## Exp 1

In [9]:
mlflow.autolog()

with mlflow.start_run():
    lr = create_model('lr', tol=0.0001)
    mlflow.end_run()

2024/04/14 18:13:29 INFO mlflow.tracking.fluent: Autologging successfully enabled for lightgbm.
2024/04/14 18:13:30 INFO mlflow.tracking.fluent: Autologging successfully enabled for statsmodels.
2024/04/14 18:13:32 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8095,0.9091,0.9,0.75,0.8182,0.6216,0.6331
1,0.9048,0.9818,0.8,1.0,0.8889,0.8073,0.8228
2,0.8571,0.8364,0.8,0.8889,0.8421,0.7123,0.7156
3,0.8095,0.8545,0.7,0.875,0.7778,0.6147,0.6264
4,0.8571,0.8364,0.8,0.8889,0.8421,0.7123,0.7156
5,0.7619,0.75,0.5556,0.8333,0.6667,0.4928,0.5173
6,0.8571,0.9815,0.7778,0.875,0.8235,0.7042,0.7077
7,0.95,1.0,0.8889,1.0,0.9412,0.898,0.9027
8,0.85,1.0,0.6667,1.0,0.8,0.6875,0.7237
9,0.8,0.7374,0.6667,0.8571,0.75,0.5876,0.6005


## Exp 2

In [10]:
mlflow.autolog()

with mlflow.start_run():
    lr = create_model('lr', tol=0.01)
    mlflow.end_run()

2024/04/14 18:13:54 INFO mlflow.tracking.fluent: Autologging successfully enabled for lightgbm.
2024/04/14 18:13:54 INFO mlflow.tracking.fluent: Autologging successfully enabled for statsmodels.
2024/04/14 18:13:55 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8095,0.9091,0.9,0.75,0.8182,0.6216,0.6331
1,0.9048,0.9818,0.8,1.0,0.8889,0.8073,0.8228
2,0.8571,0.8364,0.8,0.8889,0.8421,0.7123,0.7156
3,0.8095,0.8545,0.7,0.875,0.7778,0.6147,0.6264
4,0.8571,0.8364,0.8,0.8889,0.8421,0.7123,0.7156
5,0.7619,0.75,0.5556,0.8333,0.6667,0.4928,0.5173
6,0.8571,0.9815,0.7778,0.875,0.8235,0.7042,0.7077
7,0.95,1.0,0.8889,1.0,0.9412,0.898,0.9027
8,0.85,1.0,0.6667,1.0,0.8,0.6875,0.7237
9,0.8,0.7374,0.6667,0.8571,0.75,0.5876,0.6005


## Exp 3

In [11]:
mlflow.autolog()

with mlflow.start_run():
    lr = create_model('lr', tol=0.005)
    mlflow.end_run()

2024/04/14 18:14:10 INFO mlflow.tracking.fluent: Autologging successfully enabled for lightgbm.
2024/04/14 18:14:10 INFO mlflow.tracking.fluent: Autologging successfully enabled for statsmodels.
2024/04/14 18:14:10 INFO mlflow.tracking.fluent: Autologging successfully enabled for sklearn.


Unnamed: 0_level_0,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC
Fold,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
0,0.8095,0.9091,0.9,0.75,0.8182,0.6216,0.6331
1,0.9048,0.9818,0.8,1.0,0.8889,0.8073,0.8228
2,0.8571,0.8364,0.8,0.8889,0.8421,0.7123,0.7156
3,0.8095,0.8545,0.7,0.875,0.7778,0.6147,0.6264
4,0.8571,0.8364,0.8,0.8889,0.8421,0.7123,0.7156
5,0.7619,0.75,0.5556,0.8333,0.6667,0.4928,0.5173
6,0.8571,0.9815,0.7778,0.875,0.8235,0.7042,0.7077
7,0.95,1.0,0.8889,1.0,0.9412,0.898,0.9027
8,0.85,1.0,0.6667,1.0,0.8,0.6875,0.7237
9,0.8,0.7374,0.6667,0.8571,0.75,0.5876,0.6005
