In [1]:
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import classification_report
import warnings
warnings.filterwarnings('ignore')

In [2]:
# Step 1: Create an imbalanced binary classification dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=2, n_redundant=8, 
                           weights=[0.9, 0.1], flip_y=0, random_state=42)

np.unique(y, return_counts=True)

(array([0, 1]), array([900, 100]))

In [4]:
# split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, stratify=y, random_state = 42)

In [7]:
params = {
    'solver': 'lbfgs',
    'max_iter': 1000,
    'random_state': 42,
    'multi_class': 'auto'
}

lr = LogisticRegression(**params)
lr.fit(X_train, y_train)

#predict the test set
y_pred = lr.predict(X_test)

#report
report = classification_report(y_test, y_pred)
print(report)

              precision    recall  f1-score   support

           0       0.93      0.96      0.95       180
           1       0.50      0.40      0.44        20

    accuracy                           0.90       200
   macro avg       0.72      0.68      0.69       200
weighted avg       0.89      0.90      0.89       200



In [8]:
report_dict = classification_report(y_test, y_pred, output_dict = True)

In [9]:
report_dict

{'0': {'precision': 0.9347826086956522,
  'recall': 0.9555555555555556,
  'f1-score': 0.945054945054945,
  'support': 180.0},
 '1': {'precision': 0.5,
  'recall': 0.4,
  'f1-score': 0.4444444444444444,
  'support': 20.0},
 'accuracy': 0.9,
 'macro avg': {'precision': 0.7173913043478262,
  'recall': 0.6777777777777778,
  'f1-score': 0.6947496947496947,
  'support': 200.0},
 'weighted avg': {'precision': 0.891304347826087,
  'recall': 0.9,
  'f1-score': 0.8949938949938949,
  'support': 200.0}}

In [10]:
import mlflow

In [None]:
mlflow.set_experiment('first experiment')
mlflow.set_tracking_uri('http://127.0.0.1:5000')

with mlflow.start_run():
    mlflow.log_params(params)
    mlflow.log_metrics({
        'accuracy': report_dict['accuracy'],
        'precision': report_dict['1']['precision'],
        'recall': report_dict['1']['recall'],
        'f1-score': report_dict['1']['f1-score']
    })
    mlflow.sklearn.log_model(lr, 'model')

2025/10/10 17:53:02 INFO mlflow.tracking.fluent: Experiment with name 'first experiment' does not exist. Creating a new experiment.
