<a href="https://colab.research.google.com/github/VinodPungle/dagshubdemo/blob/main/Vinod_Dagshub.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
import xgboost as xgb
from sklearn.metrics import classification_report
import pickle
!pip install mlflow dagshub
import mlflow
import dagshub


In [6]:
# Load and prepare data
data = '/content/heart.csv'
df = pd.read_csv(data)
X = df.drop('target', axis=1)
y = df.target


In [7]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=10)

# Handle missing values
from sklearn.impute import SimpleImputer
fill = SimpleImputer(missing_values=0, strategy="mean")
X_train = fill.fit_transform(X_train)
X_test = fill.transform(X_test)


In [10]:
# Initialize DagsHub
import dagshub
dagshub.init(repo_owner='vinod.pungle', repo_name='dagshubdemo', mlflow=True)

In [11]:
# Define models and their parameters
models = {
    'logistic_regression': {
        'model': LogisticRegression(),
        'params': {
            "solver": "lbfgs",
            "max_iter": 45,
            "multi_class": "auto",
            "random_state": 123
        }
    },
    'random_forest': {
        'model': RandomForestClassifier(),
        'params': {
            'n_estimators': 100,
            'max_depth': 10,
            'random_state': 123
        }
    },
    'xgboost': {
        'model': xgb.XGBClassifier(),
        'params': {
            'n_estimators': 100,
            'max_depth': 3,
            'learning_rate': 0.1,
            'random_state': 123
        }
    },
    'svm': {
        'model': SVC(),
        'params': {
            'kernel': 'rbf',
            'C': 1.0,
            'random_state': 123
        }
    }
}


In [14]:
#!pip install mlflow
import mlflow

# Train and log each model
mlflow.set_experiment("Multi_Classifier_Diabetes_Experiment")

for model_name, model_info in models.items():
    print(f"\nTraining {model_name}...")

    with mlflow.start_run(run_name=model_name):
        # Set tags
        mlflow.set_tag("author", "Vinod")
        mlflow.set_tag("model_type", model_name)

        # Train model
        model = model_info['model']
        model.set_params(**model_info['params'])
        model.fit(X_train, y_train)

        # Make predictions
        y_pred = model.predict(X_test)

        # Get metrics
        report_dict = classification_report(y_test, y_pred, output_dict=True)

        # Log parameters
        mlflow.log_params(model_info['params'])

        # Log metrics
        metrics = {
            'accuracy': report_dict['accuracy'],
            'recall_class_0': report_dict['0']['recall'],
            'recall_class_1': report_dict['1']['recall'],
            'f1_score_macro': report_dict['macro avg']['f1-score']
        }
        mlflow.log_metrics(metrics)

        # Save and log model
        filename = f'{model_name}_model.pkl'
        pickle.dump(model, open(filename, 'wb'))
        mlflow.log_artifact(filename, model_name)

        print(f"{model_name} Results:")
        print(f"Accuracy: {metrics['accuracy']:.4f}")
        print(f"F1 Score (macro): {metrics['f1_score_macro']:.4f}")

print("\nAll models have been trained and logged to MLflow")


Training logistic_regression...


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


logistic_regression Results:
Accuracy: 0.8361
F1 Score (macro): 0.8339
🏃 View run logistic_regression at: https://dagshub.com/vinod.pungle/dagshubdemo.mlflow/#/experiments/0/runs/6824dc30e50d4ffe9c25bdd0c6297d89
🧪 View experiment at: https://dagshub.com/vinod.pungle/dagshubdemo.mlflow/#/experiments/0

Training random_forest...
random_forest Results:
Accuracy: 0.7705
F1 Score (macro): 0.7699
🏃 View run random_forest at: https://dagshub.com/vinod.pungle/dagshubdemo.mlflow/#/experiments/0/runs/3ca79e6912a94949b2ecb0ca4a434d19
🧪 View experiment at: https://dagshub.com/vinod.pungle/dagshubdemo.mlflow/#/experiments/0

Training xgboost...
xgboost Results:
Accuracy: 0.7705
F1 Score (macro): 0.7699
🏃 View run xgboost at: https://dagshub.com/vinod.pungle/dagshubdemo.mlflow/#/experiments/0/runs/f6b4d449aa314323a186a4161440dbdd
🧪 View experiment at: https://dagshub.com/vinod.pungle/dagshubdemo.mlflow/#/experiments/0

Training svm...
svm Results:
Accuracy: 0.5902
F1 Score (macro): 0.5788
🏃 View run