Inspired by Deep Learning for Computer Vision with Python [Rosebrock]  
Chapter 9  
SGD with Regularizers

In [None]:
from os.path import expanduser
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import SGDClassifier

from imutils import paths
from dlcv_utils import SimplePreprocessor, SimpleDatasetLoader

In [None]:
# path to input dataset
DATASET = expanduser('~/dlcv/code/datasets/animals')

# get the list of image paths
image_paths = list(paths.list_images(DATASET))
if not image_paths:
    print('Error! No files found!')
    quit()

# initialize the image preprocessor
sp = SimplePreprocessor(width=32, height=32)

# load the dataset from disk
sdl = SimpleDatasetLoader(preprocessors=[sp])
(data, labels) = sdl.load(image_paths, verbose=500)
print('data shape', data.shape)

# reshape the data matrix
num_files, height, width, channels = data.shape
print(f'{num_files=}')
print(f'{height=}')
print(f'{width=}')
print(f'{channels=}')
data = data.reshape((num_files, height * width * channels))
print('data shape', data.shape)

# encode the labels as integers (from text)
le = LabelEncoder()
labels = le.fit_transform(labels)

# split the data into training and testing sets
(features_train, features_test, labels_train, labels_test) = train_test_split(
    data, labels, test_size=0.25, random_state=42) # 25% for testing

In [None]:
# loop over multiple regularizers
for r in (None, 'l1', 'l2'):
    # train a SGD classifier using a softmax loss function and the specified regularization function
    print(f'training model with {r} penalty')
    model = SGDClassifier(
        loss='log_loss',          # logistic regression, a probabilistic classifier
        penalty=r,                # the penalty (regularization term) to be used
        max_iter=60,              # epochs
        tol=1e-3,                 # stopping criterion
        random_state=12,          # used for shuffling the data
        learning_rate='constant', # learning rate schedule
        eta0=0.01,                # initial learning rate for constant rate schedule
    )
    model.fit(features_train, labels_train)

    # evaluate the classifier
    acc = model.score(features_test, labels_test)
    print(f'{r} penalty accuracy: {acc*100:.2f}%\n')