In [1]:
# ====================================================================
# Part 1: Setup DagsHub Connection
# ====================================================================

import os
import mlflow
import dagshub
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score

# --- DagsHub Initialization ---
# This line helps DagsHub to auto-configure some settings for you.
dagshub.init(repo_owner='adhishpawar', repo_name='mlflow-experiment-tracking', mlflow=True)

# --- Set DagsHub Credentials as Environment Variables ---
# Replace the placeholder values with the credentials from your DagsHub "Remote" button.
os.environ['MLFLOW_TRACKING_USERNAME'] = 'adhishpawar'
os.environ['MLFLOW_TRACKING_PASSWORD'] = '9decab37bd90b11125bd0405c07984e0c8411dc4'
os.environ['MLFLOW_TRACKING_URI'] = 'https://dagshub.com/adhishpawar/mlflow-experiment-tracking.mlflow'


# --- Set the Tracking URI for MLflow ---
# This tells MLflow to send all data to your DagsHub server instead of localhost.
mlflow.set_tracking_uri(os.environ['MLFLOW_TRACKING_URI'])


print("âœ… DagsHub connection is configured. Ready to start experiment.")


# ====================================================================
# Part 2: Train, Log, and Register the Model (Now on DagsHub)
# ====================================================================

# Start a new run. This run will now appear in DagsHub.
with mlflow.start_run() as run:
    print("ðŸš€ Starting a new run on DagsHub...")

    # --- 1. Data and Parameters ---
    X, y = make_classification(n_samples=5000, n_features=15, random_state=42)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    X_train_df = pd.DataFrame(X_train)
    input_example = X_train_df.head(5)

    params = {
        'objective': 'binary:logistic',
        'max_depth': 6,
        'learning_rate': 0.05,
        'n_estimators': 150
    }

    # --- 2. Training and Logging ---
    model = xgb.XGBClassifier(**params)
    model.fit(X_train, y_train)
    acc = accuracy_score(y_test, model.predict(X_test))

    print(f"Logging parameters: {params}")
    mlflow.log_params(params)

    print(f"Logging metric (accuracy): {acc}")
    mlflow.log_metric("accuracy", acc)

    print("Logging the model artifact...")
    mlflow.xgboost.log_model(
        xgb_model=model,
        artifact_path="models", # A folder for your model files
        input_example=input_example
    )

    # --- 3. Register the Model in DagsHub's Registry ---
    model_name = "XGB-Smote-Production"
    model_uri = f"runs:/{run.info.run_id}/models"

    print(f"Registering the model as '{model_name}'...")
    registered_model = mlflow.register_model(model_uri=model_uri, name=model_name)

    print("\n--- Experiment Complete ---")
    print(f"âœ… Run logged to DagsHub with ID: {run.info.run_id}")
    print(f"âœ… Model '{registered_model.name}' version {registered_model.version} registered in DagsHub.")



âœ… DagsHub connection is configured. Ready to start experiment.
ðŸš€ Starting a new run on DagsHub...
Logging parameters: {'objective': 'binary:logistic', 'max_depth': 6, 'learning_rate': 0.05, 'n_estimators': 150}
Logging metric (accuracy): 0.902
Logging the model artifact...


  output_schema = _infer_schema(prediction)
  self.get_booster().save_model(fname)


Registering the model as 'XGB-Smote-Production'...


Successfully registered model 'XGB-Smote-Production'.
2025/11/15 19:33:00 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: XGB-Smote-Production, version 1
Created version '1' of model 'XGB-Smote-Production'.



--- Experiment Complete ---
âœ… Run logged to DagsHub with ID: 5a9198e322cb48f18541691846be6b42
âœ… Model 'XGB-Smote-Production' version 1 registered in DagsHub.
