## Automated Machine Learning Model Testing

In [2]:
import os

print(os.getcwd())

/Users/justynrodrigues/Documents/python/ml


In [3]:
import pandas as pd
import numpy as np
from lazypredict.Supervised import LazyClassifier, LazyRegressor
from sklearn.model_selection import train_test_split



In [7]:
#pip install lazypredict

#Let’s see an example using the Titanic dataset from Kaggle.
#https://www.kaggle.com/c/titanic/data?select=train.csv

 
data = pd.read_csv('/Users/justynrodrigues/Documents/python/data/train.csv')

data.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.28,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.92,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S


Here, we will try to predict if a passenger survived the Titanic so we have a classification problem.  

Lazypredict can also do basic data preprocessing like fill NA values, create dummy variables, etc. That means that we can test the models immediately after reading the data and without getting any errors. However, we can use our preprocessed data so the model testing will be more accurate as it will be closer to our final models.  

For this example, we will not do any preprocessing and let the Lazypredict do all the work.

In [8]:
#we are selecting the following columns as features for our models
X=data[['Pclass', 'Sex', 'Age', 'SibSp',
       'Parch', 'Fare', 'Embarked']]
 
y=data['Survived']


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2,random_state=7)
 
# Fit LazyRegressor
reg = LazyClassifier(ignore_warnings=True, random_state=7, verbose=False)
 
#we have to pass the train and test dataset so it can evaluate the models
models, predictions = reg.fit(X_train, X_test, y_train, y_test)  # pass all sets
 
models

100%|██████████| 29/29 [00:01<00:00, 25.23it/s]


Unnamed: 0_level_0,Accuracy,Balanced Accuracy,ROC AUC,F1 Score,Time Taken
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
LGBMClassifier,0.8,0.78,0.78,0.8,0.08
ExtraTreesClassifier,0.78,0.77,0.77,0.78,0.12
XGBClassifier,0.79,0.76,0.76,0.78,0.07
BaggingClassifier,0.79,0.76,0.76,0.78,0.04
NearestCentroid,0.77,0.75,0.75,0.77,0.01
ExtraTreeClassifier,0.76,0.75,0.75,0.76,0.02
RandomForestClassifier,0.77,0.74,0.74,0.76,0.16
LogisticRegression,0.76,0.74,0.74,0.76,0.02
LinearSVC,0.76,0.74,0.74,0.76,0.04
CalibratedClassifierCV,0.76,0.74,0.74,0.76,0.1


In [9]:
#we will get the pipeline of LGBMClassifier
reg.models['LGBMClassifier']

Pipeline(steps=[('preprocessor',
                 ColumnTransformer(transformers=[('numeric',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer()),
                                                                  ('scaler',
                                                                   StandardScaler())]),
                                                  Index(['Pclass', 'Age', 'SibSp', 'Parch', 'Fare'], dtype='object')),
                                                 ('categorical_low',
                                                  Pipeline(steps=[('imputer',
                                                                   SimpleImputer(fill_value='missing',
                                                                                 strategy='constant')),
                                                                  ('encoding',
                          

In [10]:
reg.models['LGBMClassifier'].predict(X_test)

array([0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 0, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0,
       0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       0, 1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1,
       0, 0, 1])