In [23]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

In [24]:
# Step 1: Create an imbalanced binary classification dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=8, 
                           weights=[0.9, 0.1], flip_y=0, random_state=42)

np.unique(y, return_counts=True)

(array([0, 1]), array([900, 100], dtype=int64))

In [25]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)

In [26]:
# Define the model hyperparameters
params = {
    "solver": "lbfgs",
    "max_iter": 1000,
    "multi_class": "auto",
    "random_state": 8888,
}

# Train the model
lr = LogisticRegression(**params)
rf = RandomForestClassifier()
lr.fit(X_train, y_train)
lr.fit(X_train, y_train)
    
# Predict on the test set
y_pred = lr.predict(X_test)

report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.95      0.97      0.96       270
           1       0.62      0.50      0.56        30

    accuracy                           0.92       300
   macro avg       0.79      0.73      0.76       300
weighted avg       0.91      0.92      0.92       300



In [31]:


# Train the model

rf = RandomForestClassifier()

rf.fit(X_train, y_train)
    
# Predict on the test set
y_pred = rf.predict(X_test)

report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.98      1.00      0.99       270
           1       0.96      0.83      0.89        30

    accuracy                           0.98       300
   macro avg       0.97      0.91      0.94       300
weighted avg       0.98      0.98      0.98       300



In [32]:
report_dict = classification_report(y_test, y_pred, output_dict=True)
report_dict

{'0': {'precision': 0.9817518248175182,
  'recall': 0.9962962962962963,
  'f1-score': 0.9889705882352942,
  'support': 270},
 '1': {'precision': 0.9615384615384616,
  'recall': 0.8333333333333334,
  'f1-score': 0.8928571428571429,
  'support': 30},
 'accuracy': 0.98,
 'macro avg': {'precision': 0.9716451431779899,
  'recall': 0.9148148148148149,
  'f1-score': 0.9409138655462186,
  'support': 300},
 'weighted avg': {'precision': 0.9797304884896126,
  'recall': 0.98,
  'f1-score': 0.9793592436974791,
  'support': 300}}

In [33]:
import mlflow

In [34]:
mlflow.set_experiment("RF Experiment")
mlflow.set_tracking_uri(uri="http://127.0.0.1:5000/")

with mlflow.start_run():
    mlflow.log_params(params)
    mlflow.log_metrics({
        'accuracy': report_dict['accuracy'],
        'recall_class_0': report_dict['0']['recall'],
        'recall_class_1': report_dict['1']['recall'],
        'f1_score_macro': report_dict['macro avg']['f1-score']
    })
    mlflow.set_tag("tag1", "AJ")
    mlflow.sklearn.log_model(lr, "RF") 

2024/08/29 15:03:25 INFO mlflow.tracking.fluent: Experiment with name 'RF Experiment' does not exist. Creating a new experiment.
2024/08/29 15:03:28 INFO mlflow.tracking._tracking_service.client: 🏃 View run merciful-zebra-974 at: http://127.0.0.1:5000/#/experiments/5/runs/6af7614df30b4c7186428e84083fe591.
2024/08/29 15:03:28 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/5.


In [30]:
import mlflow
logged_model = 'runs:/f6d5240320174f56974e9200341c5e51/Logistic Regression'

# Load model as a PyFuncModel.
loaded_model = mlflow.pyfunc.load_model(logged_model)

# Predict on a Pandas DataFrame.
import pandas as pd
loaded_model.predict(pd.DataFrame(X_test))

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0])

In [36]:
model_name = 'RF-Smote'
run_id=input('input')
model_uri = f'runs:/{run_id}/model_name'

with mlflow.start_run(run_id=run_id):
    mlflow.register_model(model_uri=model_uri, name=model_name)

input 6af7614df30b4c7186428e84083fe591


Registered model 'RF-Smote' already exists. Creating a new version of this model...
2024/08/29 15:07:18 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: RF-Smote, version 2
Created version '2' of model 'RF-Smote'.
2024/08/29 15:07:18 INFO mlflow.tracking._tracking_service.client: 🏃 View run merciful-zebra-974 at: http://127.0.0.1:5000/#/experiments/5/runs/6af7614df30b4c7186428e84083fe591.
2024/08/29 15:07:18 INFO mlflow.tracking._tracking_service.client: 🧪 View experiment at: http://127.0.0.1:5000/#/experiments/5.
