In [2]:

# In a blog I posted a while back, I described how users can loop through a bunch of Scikit-Learn ML models
# with the intention of trying multiple models in one go, to quickly and easily see the predictive capabilities
# of each model. LazyPredict takes this concept one step further, but auto-looping through models.

# LazyPredict is a Python library that simplifies the machine learning process by providing a unified 
# interface to multiple algorithms. With LazyPredict, users can quickly and easily evaluate multiple models and 
# identify the most promising options for a particular task. The library includes a variety of algorithms, from 
# linear models to tree-based models and even neural networks. By automating much of the machine learning process, 
# LazyPredict can help streamline the model selection and tuning process, saving significant time and effort for 
# data scientists and machine learning practitioners. 


In [1]:

import lazypredict
from lazypredict.Supervised import LazyRegressor
from sklearn import datasets
from sklearn.utils import shuffle 
import numpy as np

from sklearn.model_selection import train_test_split
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pylab import rcParams
import seaborn as sb
import scipy
from scipy.stats import spearmanr
from sklearn import datasets
import statsmodels.api as sm
import numpy as np
import pandas as pd



mtcars = sm.datasets.get_rdataset("mtcars", "datasets", cache=True).data
df = pd.DataFrame(mtcars)
print(df.columns)
df.head()


X = df[['cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear', 'carb']]
y = df['mpg']

X_train ,X_test ,y_train ,y_test = train_test_split(X,y,test_size=.2,random_state=42)

reg = LazyRegressor(verbose=0,ignore_warnings=False,custom_metric=None)
train,test = reg.fit(X_train,X_test,y_train,y_test)
train


Index(['mpg', 'cyl', 'disp', 'hp', 'drat', 'wt', 'qsec', 'vs', 'am', 'gear',
       'carb'],
      dtype='object')


100%|██████████████████████████████████████████████████████████████████████████████████| 42/42 [00:02<00:00, 15.43it/s]


Unnamed: 0_level_0,Adjusted R-Squared,R-Squared,RMSE,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
KernelRidge,18.19,-10.46,21.4,0.03
GaussianProcessRegressor,6.41,-2.61,12.01,0.03
MLPRegressor,2.92,-0.28,7.16,0.15
RANSACRegressor,2.56,-0.04,6.45,0.11
QuantileRegressor,2.55,-0.03,6.42,0.04
DummyRegressor,2.5,-0.0,6.33,0.02
HistGradientBoostingRegressor,2.5,-0.0,6.33,0.07
LGBMRegressor,2.5,-0.0,6.33,0.04
SVR,1.82,0.46,4.66,0.01
NuSVR,1.81,0.46,4.66,0.02


In [3]:

import lazypredict
from lazypredict.Supervised import LazyClassifier

from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from pylab import rcParams
import seaborn as sb
import scipy
from scipy.stats import spearmanr
from sklearn import datasets
import statsmodels.api as sm
import numpy as np
import pandas as pd

# Load the Wine Quality dataset
url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv'
wine_data = pd.read_csv(url, sep=';')
print(wine_data.columns)
wine_data.head()


Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')


Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.7,0.0,1.9,0.08,11.0,34.0,1.0,3.51,0.56,9.4,5
1,7.8,0.88,0.0,2.6,0.1,25.0,67.0,1.0,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.09,15.0,54.0,1.0,3.26,0.65,9.8,5
3,11.2,0.28,0.56,1.9,0.07,17.0,60.0,1.0,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.08,11.0,34.0,1.0,3.51,0.56,9.4,5


In [6]:

X = wine_data.drop('quality', axis=1)
y = wine_data['quality']

X_train ,X_test ,y_train ,y_test = train_test_split(X,y,test_size=.2,random_state=42)


clf = LazyClassifier(verbose=0,ignore_warnings=False,custom_metric=None)
train,test = clf.fit(X_train,X_test,y_train,y_test)
train


  7%|█████▋                                                                             | 2/29 [00:00<00:05,  4.72it/s]

ROC AUC couldn't be calculated for AdaBoostClassifier
multi_class must be in ('ovo', 'ovr')
ROC AUC couldn't be calculated for BaggingClassifier
multi_class must be in ('ovo', 'ovr')


 14%|███████████▍                                                                       | 4/29 [00:00<00:03,  7.34it/s]

ROC AUC couldn't be calculated for BernoulliNB
multi_class must be in ('ovo', 'ovr')
ROC AUC couldn't be calculated for CalibratedClassifierCV
multi_class must be in ('ovo', 'ovr')
CategoricalNB model failed to execute
Negative values in data passed to CategoricalNB (input X)
ROC AUC couldn't be calculated for DecisionTreeClassifier
multi_class must be in ('ovo', 'ovr')
ROC AUC couldn't be calculated for DummyClassifier
multi_class must be in ('ovo', 'ovr')
ROC AUC couldn't be calculated for ExtraTreeClassifier
multi_class must be in ('ovo', 'ovr')


 41%|█████████████████████████████████▉                                                | 12/29 [00:01<00:01, 12.12it/s]

ROC AUC couldn't be calculated for ExtraTreesClassifier
multi_class must be in ('ovo', 'ovr')
ROC AUC couldn't be calculated for GaussianNB
multi_class must be in ('ovo', 'ovr')
ROC AUC couldn't be calculated for KNeighborsClassifier
multi_class must be in ('ovo', 'ovr')
ROC AUC couldn't be calculated for LabelPropagation
multi_class must be in ('ovo', 'ovr')


 48%|███████████████████████████████████████▌                                          | 14/29 [00:01<00:01, 12.55it/s]

ROC AUC couldn't be calculated for LabelSpreading
multi_class must be in ('ovo', 'ovr')
ROC AUC couldn't be calculated for LinearDiscriminantAnalysis
multi_class must be in ('ovo', 'ovr')


 55%|█████████████████████████████████████████████▏                                    | 16/29 [00:01<00:01, 10.55it/s]

ROC AUC couldn't be calculated for LinearSVC
multi_class must be in ('ovo', 'ovr')
ROC AUC couldn't be calculated for LogisticRegression
multi_class must be in ('ovo', 'ovr')
ROC AUC couldn't be calculated for NearestCentroid
multi_class must be in ('ovo', 'ovr')
NuSVC model failed to execute
specified nu is infeasible
ROC AUC couldn't be calculated for PassiveAggressiveClassifier
multi_class must be in ('ovo', 'ovr')
ROC AUC couldn't be calculated for Perceptron
multi_class must be in ('ovo', 'ovr')
ROC AUC couldn't be calculated for QuadraticDiscriminantAnalysis
multi_class must be in ('ovo', 'ovr')


 76%|██████████████████████████████████████████████████████████████▏                   | 22/29 [00:02<00:00,  8.94it/s]

ROC AUC couldn't be calculated for RandomForestClassifier
multi_class must be in ('ovo', 'ovr')
ROC AUC couldn't be calculated for RidgeClassifier
multi_class must be in ('ovo', 'ovr')
ROC AUC couldn't be calculated for RidgeClassifierCV
multi_class must be in ('ovo', 'ovr')
ROC AUC couldn't be calculated for SGDClassifier
multi_class must be in ('ovo', 'ovr')


 90%|█████████████████████████████████████████████████████████████████████████▌        | 26/29 [00:02<00:00, 11.02it/s]

ROC AUC couldn't be calculated for SVC
multi_class must be in ('ovo', 'ovr')
StackingClassifier model failed to execute
StackingClassifier.__init__() missing 1 required positional argument: 'estimators'
XGBClassifier model failed to execute
Invalid classes inferred from unique values of `y`.  Expected: [0 1 2 3 4 5], got [3 4 5 6 7 8]


100%|██████████████████████████████████████████████████████████████████████████████████| 29/29 [00:03<00:00,  9.16it/s]

ROC AUC couldn't be calculated for LGBMClassifier
multi_class must be in ('ovo', 'ovr')





Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
LGBMClassifier,0.7,0.36,,0.69,0.6
ExtraTreesClassifier,0.69,0.34,,0.67,0.34
LabelPropagation,0.62,0.33,,0.62,0.07
LabelSpreading,0.62,0.33,,0.62,0.12
RandomForestClassifier,0.66,0.33,,0.64,0.67
BaggingClassifier,0.65,0.32,,0.63,0.2
GaussianNB,0.55,0.31,,0.54,0.03
ExtraTreeClassifier,0.6,0.3,,0.59,0.02
BernoulliNB,0.58,0.29,,0.57,0.02
QuadraticDiscriminantAnalysis,0.56,0.29,,0.55,0.02


In [None]:

# There you go! With just a few lines of code, we have results for our regressor models and our classifier models!
    