* https://github.com/shankarpandala/lazypredict?tab=readme-ov-file
* https://lazypredict.readthedocs.io/en/latest/usage.html

In [2]:
%pip install lazypredict 

Note: you may need to restart the kernel to use updated packages.


In [3]:
from lazypredict.Supervised import LazyRegressor
from sklearn import datasets
from sklearn.utils import shuffle
import numpy as np

In [4]:
diabetes = datasets.load_diabetes()

In [5]:
type(diabetes) # <class 'sklearn.utils.Bunch'> 

sklearn.utils._bunch.Bunch

In [6]:
diabetes.DESCR

'.. _diabetes_dataset:\n\nDiabetes dataset\n----------------\n\nTen baseline variables, age, sex, body mass index, average blood\npressure, and six blood serum measurements were obtained for each of n =\n442 diabetes patients, as well as the response of interest, a\nquantitative measure of disease progression one year after baseline.\n\n**Data Set Characteristics:**\n\n:Number of Instances: 442\n\n:Number of Attributes: First 10 columns are numeric predictive values\n\n:Target: Column 11 is a quantitative measure of disease progression one year after baseline\n\n:Attribute Information:\n    - age     age in years\n    - sex\n    - bmi     body mass index\n    - bp      average blood pressure\n    - s1      tc, total serum cholesterol\n    - s2      ldl, low-density lipoproteins\n    - s3      hdl, high-density lipoproteins\n    - s4      tch, total cholesterol / HDL\n    - s5      ltg, possibly log of serum triglycerides level\n    - s6      glu, blood sugar level\n\nNote: Each of thes

In [7]:
diabetes.data # <class 'numpy.ndarray'> / X values - features

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990749, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06833155, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286131, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04688253,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452873, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00422151,  0.00306441]], shape=(442, 10))

In [8]:
diabetes.target # <class 'numpy.ndarray'>  / y values - target

array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
        69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
        68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
        87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
       259.,  53., 190., 142.,  75., 142., 155., 225.,  59., 104., 182.,
       128.,  52.,  37., 170., 170.,  61., 144.,  52., 128.,  71., 163.,
       150.,  97., 160., 178.,  48., 270., 202., 111.,  85.,  42., 170.,
       200., 252., 113., 143.,  51.,  52., 210.,  65., 141.,  55., 134.,
        42., 111.,  98., 164.,  48.,  96.,  90., 162., 150., 279.,  92.,
        83., 128., 102., 302., 198.,  95.,  53., 134., 144., 232.,  81.,
       104.,  59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,
       173., 180.,  84., 121., 161.,  99., 109., 115., 268., 274., 158.,
       107.,  83., 103., 272.,  85., 280., 336., 281., 118., 317., 235.,
        60., 174., 259., 178., 128.,  96., 126., 28

In [9]:
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)

In [10]:
X = X.astype(np.float32)

In [11]:
# train - test split
offset = int(X.shape[0] * 0.9) # %90 train-%10 test split

X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]

In [12]:
X_train

array([[-0.00551456, -0.04464164,  0.05630714, ...,  0.03799897,
         0.05078203,  0.0569118 ],
       [-0.02004471, -0.04464164, -0.08488624, ..., -0.05167075,
        -0.08237869, -0.04664087],
       [-0.06726771, -0.04464164, -0.05901875, ..., -0.03949338,
         0.00200444,  0.02377494],
       ...,
       [-0.02367725, -0.04464164, -0.06979687, ..., -0.03949338,
        -0.08913335, -0.05078298],
       [ 0.07440129, -0.04464164,  0.114509  , ..., -0.00259226,
        -0.00061174, -0.0052198 ],
       [ 0.01991321,  0.05068012,  0.01427248, ...,  0.03430886,
         0.04666178,  0.09004866]], shape=(397, 10), dtype=float32)

In [13]:
y_train

array([272.,  90.,  86., 232., 281.,  88., 235., 151., 152.,  98.,  85.,
       270., 113., 263., 225., 178., 152., 275., 125., 167., 102.,  63.,
        72.,  72., 150., 283.,  87., 200., 179.,  47., 259., 253., 129.,
       258., 132., 161., 178., 154., 144., 179.,  53., 140., 182.,  97.,
       292., 172.,  91.,  48., 116., 185., 170., 129., 104.,  85.,  97.,
        84., 170.,  87., 186., 273., 230., 141., 134.,  96.,  94.,  71.,
       214., 235., 252.,  72., 276., 197.,  91., 277., 145.,  52., 131.,
        91.,  65., 102.,  93., 191.,  83., 201.,  96., 118., 168.,  71.,
       258.,  84., 108., 185., 198., 262.,  78., 172.,  72.,  40., 243.,
        93., 279., 306., 111., 217.,  67., 197., 144.,  42.,  43., 246.,
       113.,  59., 128., 225., 156., 219.,  64., 163.,  99.,  52., 103.,
        90.,  96., 275., 129.,  48., 264.,  77., 182., 212., 268.,  81.,
        85., 242., 121., 195., 293.,  83.,  91., 190., 109., 146., 177.,
       185., 233.,  65., 113., 310.,  74., 139.,  6

In [14]:
X[0]

array([-0.00551456, -0.04464164,  0.05630714, -0.03665608, -0.04835136,
       -0.04296262, -0.07285395,  0.03799897,  0.05078203,  0.0569118 ],
      dtype=float32)

In [15]:
y[0]

np.float64(272.0)

In [16]:
reg = LazyRegressor(verbose=0, ignore_warnings=False, custom_metric=None)

In [None]:
# Install required packages for progress bars in Jupyter
%pip install ipywidgets jupyter-widgets
%pip install --upgrade jupyter

In [18]:
# After kernel restart, re-import required libraries
from lazypredict.Supervised import LazyRegressor
from sklearn import datasets
from sklearn.utils import shuffle
import numpy as np

# Reload data (after kernel restart)
diabetes = datasets.load_diabetes()
X, y = shuffle(diabetes.data, diabetes.target, random_state=13)
X = X.astype(np.float32)

# Recreate train-test split
offset = int(X.shape[0] * 0.9)
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]

print("Data reloaded successfully!")
print(f"Training set shape: {X_train.shape}")
print(f"Test set shape: {X_test.shape}")

Data reloaded successfully!
Training set shape: (397, 10)
Test set shape: (45, 10)


In [19]:
# Initialize LazyRegressor with verbose=0 to minimize output
reg = LazyRegressor(verbose=0, ignore_warnings=True, custom_metric=None)

try:
    models, predictions = reg.fit(X_train, X_test, y_train, y_test)
    print("LazyRegressor completed successfully!")
except Exception as e:
    print(f"Error occurred: {e}")
    print("If you still get IProgress error, try restarting the kernel again.")

  0%|          | 0/42 [00:00<?, ?it/s]

[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000169 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 640
[LightGBM] [Info] Number of data points in the train set: 397, number of used features: 10
[LightGBM] [Info] Start training from score 151.722922
LazyRegressor completed successfully!


In [23]:
# Display results
print("Model Performance Results:")
print("=" * 50)
print(models)

Model Performance Results:
                               Adjusted R-Squared  R-Squared   RMSE  \
Model                                                                 
ExtraTreesRegressor                          0.38       0.52  54.22   
OrthogonalMatchingPursuitCV                  0.37       0.52  54.39   
Lasso                                        0.37       0.52  54.46   
LassoLars                                    0.37       0.52  54.46   
LarsCV                                       0.37       0.51  54.54   
LassoCV                                      0.37       0.51  54.59   
PassiveAggressiveRegressor                   0.37       0.51  54.74   
LassoLarsIC                                  0.36       0.51  54.83   
SGDRegressor                                 0.36       0.51  54.85   
RidgeCV                                      0.36       0.51  54.91   
Ridge                                        0.36       0.51  54.91   
BayesianRidge                                0.36 

In [21]:
# Show best performing models
print("Top 5 Best Performing Models:")
print("=" * 40)
print(models.head())

Top 5 Best Performing Models:
                             Adjusted R-Squared  R-Squared  RMSE  Time Taken
Model                                                                       
ExtraTreesRegressor                        0.38       0.52 54.22        0.12
OrthogonalMatchingPursuitCV                0.37       0.52 54.39        0.01
Lasso                                      0.37       0.52 54.46        0.01
LassoLars                                  0.37       0.52 54.46        0.01
LarsCV                                     0.37       0.51 54.54        0.01


In [22]:
# Show model predictions summary
print("Predictions shape:", predictions.shape)
print("Sample predictions:")
predictions.head()

Predictions shape: (42, 4)
Sample predictions:


Unnamed: 0_level_0,Adjusted R-Squared,R-Squared,RMSE,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
ExtraTreesRegressor,0.38,0.52,54.22,0.12
OrthogonalMatchingPursuitCV,0.37,0.52,54.39,0.01
Lasso,0.37,0.52,54.46,0.01
LassoLars,0.37,0.52,54.46,0.01
LarsCV,0.37,0.51,54.54,0.01
