# Image Classifier

Refer to 01_intro.ipynb and 02_production.ipynb for details.

In [12]:
from fastai.vision.all import *

## Quick Model

In [2]:
path = Path('data')

# Our x are images, our y is a category. We'll resize all images initially to 128x128. The label for each image can be
# determined from its parent folder. We'll apply an 80/20 training/validation split.
faces = DataBlock(
    blocks=(ImageBlock, CategoryBlock), 
    get_items=get_image_files, 
    splitter=RandomSplitter(valid_pct=0.2, seed=42),
    get_y=parent_label,
    item_tfms=Resize(128))
dls = faces.dataloaders(path)

In [3]:
# Finetune an Imagenet-trained resnet18 model where once epoch is run with the last layer unfrozen, then the rest of the
# network is unfrozen for 4 epochs.
learn = cnn_learner(dls, resnet18, metrics=error_rate)
learn.fine_tune(4)

epoch,train_loss,valid_loss,error_rate,time
0,1.148599,0.426977,0.140845,00:05


epoch,train_loss,valid_loss,error_rate,time
0,0.731895,0.231788,0.070423,00:06
1,0.54903,0.121886,0.042254,00:06
2,0.427828,0.090708,0.056338,00:06
3,0.335212,0.089578,0.056338,00:05


## Improving the Model with Resizing and Augmentations

In [7]:
# Use random resized crop so a different part of the image is focused on with each epoch
# Results are worse suggesting we're losing important data by cropping.
faces = faces.new(item_tfms=RandomResizedCrop(128, min_scale=0.5))
dls = faces.dataloaders(path)
learn = cnn_learner(dls, resnet18, metrics=error_rate)
learn.fine_tune(4)

epoch,train_loss,valid_loss,error_rate,time
0,1.19486,0.392972,0.126761,00:05


epoch,train_loss,valid_loss,error_rate,time
0,0.644075,0.271315,0.098592,00:05
1,0.537257,0.263642,0.098592,00:06
2,0.420853,0.23992,0.126761,00:06
3,0.348629,0.235017,0.126761,00:05


In [6]:
# Since randomized crop produced worse result, we go with resizing with squishing/streteching allowed so
# that we don't lose data and all pixels are represented in some way
# We get an error rate similar to the original case
faces = faces.new(item_tfms=Resize(128, ResizeMethod.Squish))
dls = faces.dataloaders(path)
learn = cnn_learner(dls, resnet18, metrics=error_rate)
learn.fine_tune(4)

epoch,train_loss,valid_loss,error_rate,time
0,1.053709,0.608596,0.183099,00:05


epoch,train_loss,valid_loss,error_rate,time
0,0.571915,0.360179,0.140845,00:05
1,0.403532,0.219778,0.084507,00:05
2,0.287008,0.128684,0.070423,00:05
3,0.223565,0.093168,0.056338,00:05


In [8]:
# Try data augmentation - Rotation, flipping, warping, brightness changes, contrast changes
# We get the best results yet
faces = faces.new(item_tfms=Resize(128, ResizeMethod.Squish), batch_tfms=aug_transforms())
dls = faces.dataloaders(path)
learn = cnn_learner(dls, resnet18, metrics=error_rate)
learn.fine_tune(4)

epoch,train_loss,valid_loss,error_rate,time
0,1.321217,0.45829,0.183099,00:05


epoch,train_loss,valid_loss,error_rate,time
0,0.622619,0.364884,0.126761,00:05
1,0.568374,0.235509,0.070423,00:05
2,0.468294,0.186839,0.056338,00:06
3,0.418736,0.156898,0.042254,00:06


In [9]:
# Since squishing wasn't clearly better than a normal resize, we try a normal resize along with
# augmentation. Results declined
faces = faces.new(item_tfms=Resize(128), batch_tfms=aug_transforms())
dls = faces.dataloaders(path)
learn = cnn_learner(dls, resnet18, metrics=error_rate)
learn.fine_tune(4)

epoch,train_loss,valid_loss,error_rate,time
0,1.237507,0.429924,0.15493,00:06


epoch,train_loss,valid_loss,error_rate,time
0,0.60174,0.329062,0.169014,00:05
1,0.505071,0.16353,0.056338,00:06
2,0.453461,0.132568,0.056338,00:05
3,0.399957,0.142141,0.070423,00:05


In [4]:
# We return to using squish resizing so full data is included. But this time we use a deeper network as well.
# Resnet model we could use are: resnet18, resnet34, resnet50, resnet101, resnet152
# resnet34 produces slightly worse results
faces = faces.new(item_tfms=Resize(128, ResizeMethod.Squish), batch_tfms=aug_transforms())
dls = faces.dataloaders(path)
# We reduce the batch-size to ensure the model can be trained on the GPU
learn = cnn_learner(dls, resnet34, metrics=error_rate, bs=32)
learn.fine_tune(4)

epoch,train_loss,valid_loss,error_rate,time
0,1.237555,0.303075,0.126761,00:05


epoch,train_loss,valid_loss,error_rate,time
0,0.747875,0.215964,0.098592,00:06
1,0.579102,0.157076,0.084507,00:06
2,0.466799,0.116483,0.042254,00:06
3,0.386042,0.112962,0.056338,00:05


In [9]:
# We return to using the model producing the best results where a normal resize
# and augmentation transforms are applied
# This time, we also use set_seed to get reproducible results
set_seed(42, True)
faces = faces.new(item_tfms=Resize(128), batch_tfms=aug_transforms())
dls = faces.dataloaders(path)
learn = cnn_learner(dls, resnet18, metrics=error_rate)
learn.fine_tune(4)

epoch,train_loss,valid_loss,error_rate,time
0,1.142999,0.469191,0.15493,00:05


epoch,train_loss,valid_loss,error_rate,time
0,0.707933,0.300509,0.140845,00:05
1,0.601721,0.162827,0.042254,00:06
2,0.523511,0.149664,0.042254,00:06
3,0.431885,0.1399,0.042254,00:05


## Cleaning

In [14]:
# Can show some images, select which ones to delete, then run the deletion code in the cell below
from fastai.vision.widgets import *
cleaner = ImageClassifierCleaner(learn)
cleaner

VBox(children=(Dropdown(options=('female', 'male'), value='female'), Dropdown(options=('Train', 'Valid'), valu…

In [20]:
# Once we've selected images to delete, run the line below to have them deleted
for idx in cleaner.delete(): cleaner.fns[idx].unlink()

# This line takes care of moving images between categories
for idx,cat in cleaner.change(): shutil.move(str(cleaner.fns[idx]), path/cat)

## Saving and Loading

In [21]:
# We return to using the model producing the best results where a normal resize
# and augmentation transforms are applied
# This time, we have cleaner data as well. We get 4% error after all epochs though 3% after the 2nd and 3rd epoch.
set_seed(42, True)
faces = faces.new(item_tfms=Resize(128), batch_tfms=aug_transforms())
dls = faces.dataloaders(path)
learn = cnn_learner(dls, resnet18, metrics=error_rate)
learn.fine_tune(4)

epoch,train_loss,valid_loss,error_rate,time
0,1.235793,0.827627,0.333333,00:05


epoch,train_loss,valid_loss,error_rate,time
0,0.700897,0.210412,0.101449,00:05
1,0.58156,0.126296,0.028986,00:05
2,0.463669,0.133857,0.028986,00:05
3,0.396687,0.146669,0.043478,00:05


In [22]:
# Save the model
learn.export("image_classifier.pkl")

In [26]:
# Load and use the model
path = Path()
learn_inf = load_learner(path/'image_classifier.pkl')
print("Classes:", learn_inf.dls.vocab)
predicted_class, predicted_class_index, pred_probs = learn_inf.predict("data/male/image.jpeg")
print("Predicted Class:", predicted_class)

Classes: (#2) ['female','male']


Predicted Class: male
