In [34]:
!pip install lazypredict
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, classification_report
from lazypredict.Supervised import LazyClassifier, LazyRegressor



In [35]:
# Load a dataset (example: Breast Cancer dataset)
from sklearn.datasets import load_breast_cancer
data = load_breast_cancer()

# Convert to DataFrame
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [36]:
# Initialize LazyClassifier
clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)

# Run LazyPredict
models, predictions = clf.fit(X_train, X_test, y_train, y_test)

# Display Results
print(models)

 97%|█████████▋| 30/31 [00:03<00:00,  5.93it/s]

[LightGBM] [Info] Number of positive: 286, number of negative: 169
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000350 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4540
[LightGBM] [Info] Number of data points in the train set: 455, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.628571 -> initscore=0.526093
[LightGBM] [Info] Start training from score 0.526093


100%|██████████| 31/31 [00:03<00:00,  8.19it/s]

                               Accuracy  Balanced Accuracy  ROC AUC  F1 Score  \
Model                                                                           
BernoulliNB                        0.98               0.98     0.98      0.98   
PassiveAggressiveClassifier        0.98               0.98     0.98      0.98   
SVC                                0.98               0.98     0.98      0.98   
Perceptron                         0.97               0.97     0.97      0.97   
AdaBoostClassifier                 0.97               0.97     0.97      0.97   
LogisticRegression                 0.97               0.97     0.97      0.97   
SGDClassifier                      0.96               0.97     0.97      0.97   
ExtraTreeClassifier                0.96               0.97     0.97      0.97   
CalibratedClassifierCV             0.97               0.97     0.97      0.97   
RandomForestClassifier             0.96               0.96     0.96      0.96   
LGBMClassifier              




In [37]:
# Initialize LazyRegressor
reg = LazyRegressor(verbose=0, ignore_warnings=True, custom_metric=None)

# Run LazyPredict
models, predictions = reg.fit(X_train, X_test, y_train, y_test)

# Display Results
print(models)


 98%|█████████▊| 41/42 [00:15<00:01,  1.16s/it]

[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000308 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4540
[LightGBM] [Info] Number of data points in the train set: 455, number of used features: 30
[LightGBM] [Info] Start training from score 0.628571


100%|██████████| 42/42 [00:15<00:00,  2.66it/s]

                               Adjusted R-Squared  R-Squared  RMSE  Time Taken
Model                                                                         
ExtraTreesRegressor                          0.86       0.90  0.15        0.16
AdaBoostRegressor                            0.84       0.88  0.17        0.35
HistGradientBoostingRegressor                0.83       0.88  0.17        0.56
GradientBoostingRegressor                    0.82       0.87  0.18        1.03
LGBMRegressor                                0.82       0.87  0.18        0.42
NuSVR                                        0.82       0.86  0.18        0.24
RandomForestRegressor                        0.81       0.86  0.18        1.88
SVR                                          0.80       0.85  0.19        0.08
MLPRegressor                                 0.79       0.85  0.19        1.80
KNeighborsRegressor                          0.79       0.85  0.19        0.06
BaggingRegressor                             0.78   




In [39]:
# Install the necessary libraries
!pip install lazypredict

import pandas as pd
from sklearn.model_selection import train_test_split
from lazypredict.Supervised import LazyClassifier
from sklearn.datasets import load_breast_cancer
from sklearn.metrics import (
    precision_score, recall_score, accuracy_score,
    balanced_accuracy_score, roc_auc_score, f1_score
)

# Load the breast cancer dataset
data = load_breast_cancer()

# Convert to DataFrame
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target)

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize LazyClassifier
clf = LazyClassifier(verbose=0, ignore_warnings=True, custom_metric=None)

# Run LazyPredict for classification
models_summary, predictions = clf.fit(X_train, X_test, y_train, y_test)

# Prepare a dictionary to store all metrics for each model
metrics_dict = {
    "Model": [],
    "Accuracy": [],
    "Balanced Accuracy": [],
    "ROC AUC": [],
    "F1 Score": [],
    "Precision": [],
    "Recall": []
}

# Calculate and store metrics for each model
for model_name in models_summary.index:
    # Train each model individually to get predictions
    model = clf.models[model_name]  # Get the model from LazyClassifier
    model.fit(X_train, y_train)  # Fit the model
    y_pred = model.predict(X_test)  # Predict on the test set

    # Append model name
    metrics_dict["Model"].append(model_name)

    # Calculate metrics
    metrics_dict["Accuracy"].append(accuracy_score(y_test, y_pred))
    metrics_dict["Balanced Accuracy"].append(balanced_accuracy_score(y_test, y_pred))
    metrics_dict["ROC AUC"].append(roc_auc_score(y_test, y_pred))
    metrics_dict["F1 Score"].append(f1_score(y_test, y_pred))
    metrics_dict["Precision"].append(precision_score(y_test, y_pred, zero_division=1))
    metrics_dict["Recall"].append(recall_score(y_test, y_pred, zero_division=1))

# Convert metrics dictionary to DataFrame
metrics_df = pd.DataFrame(metrics_dict).set_index("Model")

# Display the DataFrame for an organized, side-by-side view of all metrics
print("Detailed Model Metrics:")
print(metrics_df)



100%|██████████| 31/31 [00:03<00:00,  8.59it/s]

[LightGBM] [Info] Number of positive: 286, number of negative: 169
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000321 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4540
[LightGBM] [Info] Number of data points in the train set: 455, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.628571 -> initscore=0.526093
[LightGBM] [Info] Start training from score 0.526093





[LightGBM] [Info] Number of positive: 286, number of negative: 169
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000301 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 4540
[LightGBM] [Info] Number of data points in the train set: 455, number of used features: 30
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.628571 -> initscore=0.526093
[LightGBM] [Info] Start training from score 0.526093
Detailed Model Metrics:
                               Accuracy  Balanced Accuracy  ROC AUC  F1 Score  \
Model                                                                           
BernoulliNB                        0.98               0.98     0.98      0.99   
PassiveAggressiveClassifier        0.98               0.98     0.98      0.99   
SVC                                0.98               0.98     0.98      0.99   
Perceptron                         0.97               0.97     0.97      0.98   
AdaB