In [2]:
pip install xgboost

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import mlflow
import mlflow.sklearn
from mlflow.tracking import MlflowClient
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from typing import Any

In [2]:
def create_mlflow_experiment(experiment_name : str,artifact_location :str):
  try:
    exp_id=mlflow.create_experiment(name=experiment_name,artifact_location=artifact_location)
  except:
    print(f'experiment {experiment_name} already exists')
    exp_id=mlflow.get_experiment_by_name(experiment_name).experiment_id
  return exp_id

In [3]:
exp_id=create_mlflow_experiment(experiment_name='Stroke_Prediction',artifact_location='mlflow_artifacts')
exp_id

experiment Stroke_Prediction already exists


  return FileStore(store_uri, store_uri)


'958305138287745628'

In [4]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
import xgboost as xgb

In [5]:
df=pd.read_csv('datasetWithoutScale.csv')

In [6]:
X = df.drop('stroke', axis=1)
y = df['stroke']

In [7]:
if 'Unnamed: 0' in X.columns:
    X = X.drop('Unnamed: 0', axis=1)

In [8]:
X_train, X_test, y_train, y_test = train_test_split( X, y,test_size=0.2,random_state=42, stratify=y)
if 'Unnamed: 0' in X_train.columns:
    X_train = X_train.drop('Unnamed: 0', axis=1)
if 'Unnamed: 0' in X_test.columns:
    X_test = X_test.drop('Unnamed: 0', axis=1)
numerical_cols = ['age', 'avg_glucose_level', 'bmi', 'age_bmi_interaction']

In [None]:
with mlflow.start_run(run_name='logging_Logistic_Regression_auto',experiment_id=exp_id) as run :
    scaler = StandardScaler()
    X_train[numerical_cols] = scaler.fit_transform(X_train[numerical_cols])
    X_test[numerical_cols] = scaler.transform(X_test[numerical_cols])
    mlflow.autolog()
    mlflow.sklearn.autolog()
    log_reg = LogisticRegression(solver='liblinear',class_weight='balanced',random_state=42,max_iter=500)
    log_reg.fit(X_train, y_train)
    y_pred = log_reg.predict(X_test)

2025/11/21 21:59:06 INFO mlflow.tracking.fluent: Autologging successfully enabled for xgboost.


In [9]:
from sklearn.metrics import (
    accuracy_score, f1_score, log_loss, precision_score, recall_score, roc_auc_score, confusion_matrix
)
import mlflow
import xgboost as xgb

In [None]:
with mlflow.start_run(run_name='logging_XGB_manual', experiment_id=exp_id) as run:
    scale_pos_weight_value = 4861 / 249
    xgb_model = xgb.XGBClassifier(
        objective='binary:logistic',
        eval_metric='logloss',
        use_label_encoder=False,
        scale_pos_weight=scale_pos_weight_value,
        random_state=42,
        n_estimators=100
    )
    xgb_model.fit(
        X_train, y_train, 
        eval_set=[(X_test, y_test)],
        verbose=False
    )
    y_pred = xgb_model.predict(X_test)
    y_pred_prob = xgb_model.predict_proba(X_test)
    
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_pred_prob[:, 1]) if y_pred_prob is not None else None
    ll = log_loss(y_test, y_pred_prob) if y_pred_prob is not None else None
    score = xgb_model.score(X_test, y_test)
    cm = confusion_matrix(y_test, y_pred)

    # Log metric
    mlflow.log_metric("test_accuracy_score", acc)
    mlflow.log_metric("test_f1_score", f1)
    mlflow.log_metric("test_precision_score", precision)a
    mlflow.log_metric("test_recall_score", recall)
    if roc_auc is not None:
        mlflow.log_metric("test_roc_auc", roc_auc)
    if ll is not None:
        mlflow.log_metric("test_log_loss", ll)
    mlflow.log_metric("test_score", score)
    mlflow.sklearn.log_model(xgb_model, artifact_path="xgb_model")

Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)


In [10]:
from sklearn.model_selection import GridSearchCV

In [15]:
with mlflow.start_run(run_name='logging_GridSearchCV',experiment_id=exp_id) as run :
    mlflow.autolog()
    mlflow.sklearn.autolog()
    scale_pos_weight_value = 4861 / 249
    param_grid = {
        'max_depth': [3, 4, 5],
        'learning_rate': [0.05, 0.1],  
        'n_estimators': [100, 200, 300]
    }
    xgb_base = xgb.XGBClassifier(
        objective='binary:logistic',
        eval_metric='logloss',
        use_label_encoder=False,
        scale_pos_weight=scale_pos_weight_value,
        random_state=42
    )
    grid_search = GridSearchCV(
    estimator=xgb_base,
    param_grid=param_grid,
    scoring='f1',       
    cv=5,               
    verbose=2,         
    n_jobs=-1          
)
    grid_search.fit(X_train, y_train)
    y_pred = grid_search.predict(X_test)

2025/11/21 22:06:10 INFO mlflow.tracking.fluent: Autologging successfully enabled for xgboost.


Fitting 5 folds for each of 18 candidates, totalling 90 fits


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
2025/11/21 22:06:21 INFO mlflow.sklearn.utils: Logging the 5 best runs, 13 runs will be omitted.


In [16]:
pip install lightgbm

Collecting lightgbm
  Downloading lightgbm-4.6.0-py3-none-win_amd64.whl.metadata (17 kB)
Downloading lightgbm-4.6.0-py3-none-win_amd64.whl (1.5 MB)
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   ---------------------------------------- 0.0/1.5 MB ? eta -:--:--
   -------------- ------------------------- 0.5/1.5 MB 3.1 MB/s eta 0:00:01
   ------------------------------------ --- 1.3/1.5 MB 3.8 MB/s eta 0:00:01
   ---------------------------------------- 1.5/1.5 MB 3.4 MB/s  0:00:00
Installing collected packages: lightgbm
Successfully installed lightgbm-4.6.0
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [11]:
import lightgbm as lgb

In [21]:
with mlflow.start_run(run_name='logging_lightgbm_manual', experiment_id=exp_id) as run:
    
    model = lgb.LGBMClassifier(
        is_unbalance=True,
        boosting_type='gbdt',
        n_estimators=1000,
        learning_rate=0.02,
        max_depth=7,
        num_leaves=40,
        min_child_samples=15,
        subsample=0.85,
        colsample_bytree=0.85,
        reg_alpha=0.1,
        reg_lambda=0.1,
        random_state=42,
        verbose=-1
    )

    # Train
    model.fit(X_train, y_train)

    # Probabilities
    y_pred_prob_full = model.predict_proba(X_test)      # shape = (n_samples, 2)
    y_proba = y_pred_prob_full[:, 1]                    # positive class prob

    # Thresholded predictions
    threshold = 0.28
    y_pred = (y_proba >= threshold).astype(int)

    # Metrics
    acc = accuracy_score(y_test, y_pred)
    f1 = f1_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    roc_auc = roc_auc_score(y_test, y_proba)
    ll = log_loss(y_test, y_pred_prob_full)
    score = model.score(X_test, y_test)
    cm = confusion_matrix(y_test, y_pred)

    # Log metrics
    mlflow.log_metric("test_accuracy_score", acc)
    mlflow.log_metric("test_f1_score", f1)
    mlflow.log_metric("test_precision_score", precision)
    mlflow.log_metric("test_recall_score", recall)
    mlflow.log_metric("test_roc_auc", roc_auc)
    mlflow.log_metric("test_log_loss", ll)
    mlflow.log_metric("test_score", score)

    # Log model
    mlflow.sklearn.log_model(model, artifact_path="lgb_model")




In [None]:
import mlflow
import mlflow.sklearn
mlflow.set_experiment("Stroke_Prediction")
log_reg_run_id = "a9f0a841d5b84dd49bbdf82b7069f744"  
model_name = "LogisticRegression_Stroke"
result = mlflow.register_model(
    model_uri=f"runs:/{log_reg_run_id}/model",   
    name=model_name
)
print(f"Model Registered: {model_name}")
print(f"New Version: {result.version}")


  return FileStore(store_uri)
Successfully registered model 'LogisticRegression_Stroke'.


Model Registered: LogisticRegression_Stroke
New Version: 1


Created version '1' of model 'LogisticRegression_Stroke'.


In [23]:
mlflow.set_experiment("Stroke_Prediction")
log_reg_run_id = "7b1011da2dd047f4b8764714f9c1531c"  
model_name = "lightgbm_Stroke"
result = mlflow.register_model(
    model_uri=f"runs:/{log_reg_run_id}/model",   
    name=model_name
)
print(f"Model Registered: {model_name}")
print(f"New Version: {result.version}")


Successfully registered model 'lightgbm_Stroke'.


Model Registered: lightgbm_Stroke
New Version: 1


Created version '1' of model 'lightgbm_Stroke'.


In [12]:
loaded_model = mlflow.pyfunc.load_model("models:/LogisticRegression_Stroke/1")

  return FileStore(store_uri)


In [None]:
from mlflow.tracking import MlflowClient
client=MlflowClient()
model_name="LogisticRegression_Stroke"
version_to_promote=1 
print(f"attemping to transition model {model_name} version {version_to_promote} to 'prodcution' stage ...")
try:
    client.transition_model_version_stage( #translate it into production
        name=model_name,
        version=version_to_promote,
        stage="Production",
         archive_existing_versions=True # set to true to automatically archive any other model currently in production
    )
    print(f"Model {model_name} Version {version_to_promote} succefully transitioned to 'Production'")
except Exception as e:
    print(f"Error transitioning model stage : {e}")
    print("Please ensure the model name and version are correct and the MLflow tracking server is running")
print("\n after successful transition , you can now run your inference script")
print("you can also verify the stage change in the MLflow UI under the 'Models' tab")
    

attemping to transition model LogisticRegression_Stroke version 1 to 'prodcution' stage ...
Model LogisticRegression_Stroke Version 1 succefully transitioned to 'Production'

 after successful transition , you can now run your inference script
you can also verify the stage change in the MLflow UI under the 'Models' tab


  client.transition_model_version_stage( #translate it into production


In [17]:
print(f"attemping to load model '{model_name}' for inference ")
model_version=1
try:
    predictions_version=loaded_model.predict(X_train.iloc[[85]])
    print(f"\n predicitons from version {model_version} of '{model_name}' : ")
    print(predictions_version)
except Exception as e:
    print(f"couldn't load latest model. please ensure'{model_name}' is registered and has at least one version")
    print(f'Error: {e}')

attemping to load model 'LogisticRegression_Stroke' for inference 

 predicitons from version 1 of 'LogisticRegression_Stroke' : 
[0]
