In [10]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
import warnings


import mlflow
import mlflow.sklearn
import mlflow.xgboost

warnings.filterwarnings('ignore')

# 1. Create an Imabalanced Binary classification Dataset

In [2]:

X,y = make_classification(n_samples=1000,n_features=10,n_informative=2,n_redundant=8,weights=[0.9,0.1],flip_y=0,random_state=42)

np.unique(y,return_counts=True)

(array([0, 1]), array([900, 100], dtype=int64))

# 2. Split data into train and test

In [3]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=42,stratify=y)


# 3. Handle Class imbalance using SMOTETomek and then train using XGBoost

In [5]:
from imblearn.combine import SMOTETomek

smt = SMOTETomek(random_state=42)
X_train_res,y_train_res = smt.fit_resample(X_train,y_train)

np.unique(y_train_res,return_counts=True)

(array([0, 1]), array([619, 619], dtype=int64))

# 4. Track all Models in one place

In [8]:
models = [
(
    "Logistic Regression",
    {"C":1,"solver":'liblinear'},
    LogisticRegression(),
    (X_train,y_train),
    (X_test,y_test)
),
    
(
    "Random Forest",
    {"n_estimators":30,"criterion":'gini',"max_depth":3},
    RandomForestClassifier(),
    (X_train,y_train),
    (X_test,y_test)
),

(
    "XGB Classifier",
    {"use_label_encoder":False,"eval_metric":'logloss'},
    XGBClassifier(),
    (X_train,y_train),
    (X_test,y_test)
),

(
    "XGB Classifier with SMOTE",
    {"use_label_encoder":False,"eval_metric":'logloss'},
    XGBClassifier(),
    (X_train_res,y_train_res),
    (X_test,y_test)
)

]

# 5. Read Model and Predict  

In [9]:
reports = []

for model_name, params, model, train_set, test_set in models:
    X_train = train_set[0]
    y_train = train_set[1]
    X_test = test_set[0]
    y_test = test_set[1]
    
    model.set_params(**params)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    report = classification_report(y_test, y_pred, output_dict=True)
    reports.append(report)

# 6. Initialize Dagshub

In [14]:
import dagshub
dagshub.init(repo_owner='VenkyJishu', repo_name='MLOPS', mlflow=True)




Open the following link in your browser to authorize the client:
https://dagshub.com/login/oauth/authorize?state=c32cb2b9-c7d7-45e0-bfec-9a47cfae5df5&client_id=32b60ba385aa7cecf24046d8195a71c07dd345d9657977863b52e7748e0f0f28&middleman_request_id=24c839ba6386278971cbb9375da38b00bbdcba9a0a2c6ed2ed6a345fc0f6991b




Output()

# 7. Register Model in Dagshub using MLflow

In [18]:
import mlflow
import os

os.environ['MLFLOW_TRACKING_USERNAME'] ='VenkyJishu' #'your user name'
os.environ['MLFLOW_TRACKING_PASSWORD'] = 'b9bcd14368067ad120b9176f8ed51febfcad7ec0' #'your password' # 
os.environ['MLFLOW_TRACKING_URI'] = 'https://dagshub.com/VenkyJishu/MLOPS.mlflow'  #'your dagshub unique uri' # click under remote to get this in dagshub repo

mlflow.set_experiment("Anomoly Detection-V2")
#mlflow.set_tracking_uri("http://127.0.0.1:5000/")

for i,element in enumerate(models):
    model_name = element[0]
    params_v2 = element[1]
    model = element[2]
    rep = reports[i]

    with mlflow.start_run(run_name=model_name):
        mlflow.log_param('dagshub_venky',model_name)
        mlflow.log_params(params_v2)
        mlflow.log_metrics(
        {
            'accuracy':rep['accuracy'],
            'recall_class_0': rep['0']['recall'],
            'recall_class_1': rep['1']['recall'],
            'f1_score_macro_avg':rep['macro avg']['f1-score']            
        }        
                          )

        if "XGB" in model_name:
            mlflow.xgboost.log_model(model,"model")
        else:
            mlflow.sklearn.log_model(model,"model")

2024/10/09 17:36:17 INFO mlflow.tracking._tracking_service.client: 🏃 View run Logistic Regression at: https://dagshub.com/VenkyJishu/MLOPS.mlflow/#/experiments/0/runs/5c2500798d7e4c279dd4227364b4d27e.
2024/10/09 17:36:17 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/VenkyJishu/MLOPS.mlflow/#/experiments/0.
2024/10/09 17:36:27 INFO mlflow.tracking._tracking_service.client: 🏃 View run Random Forest at: https://dagshub.com/VenkyJishu/MLOPS.mlflow/#/experiments/0/runs/078abaded4174f2f853dfe1541f08b63.
2024/10/09 17:36:27 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com/VenkyJishu/MLOPS.mlflow/#/experiments/0.
2024/10/09 17:36:38 INFO mlflow.tracking._tracking_service.client: 🏃 View run XGB Classifier at: https://dagshub.com/VenkyJishu/MLOPS.mlflow/#/experiments/0/runs/6556c99cdc3e4c57835641a5db127ac1.
2024/10/09 17:36:38 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: https://dagshub.com