In [2]:
%reload_ext autoreload
%autoreload 2

In [3]:
import mylib.data_loaders as data_loaders
import mylib.data_transformers as data_transformers
import mylib.models_repo as models_repo
import mylib.optimizer_repo as optimizer_repo
import mylib.scheduler_repo as scheduler_repo
import mylib.trainer as trainer

In [4]:
from torchvision.datasets.folder import ImageFolder

In [5]:
%matplotlib inline

In [6]:
data_path = "/home/as/datasets/fastai.dogscats"
num_classes = 2       # Cats & Dogs
img_size  = 224       # H and W are expected to be atleast 224 for PyTorch model zoo models
scale_img_size = 300  # During data augmentation, we first scale the image to this value, 
                      # then we take a Random Crop of size (img_size x img_size) from within that image
batch_size = 256      # Set as per your GPU RAM

Let's get the transformers

In [7]:
norm = data_transformers.pytorch_zoo_normaliser
trans = data_transformers.get_transformer(img_size, scale_img_size, norm, False)
trans_aug = data_transformers.get_transformer(img_size, scale_img_size, norm, True)
trans_valid = data_transformers.get_test_valid_transformer(img_size, scale_img_size, norm)

Let's create the datasets with the given transformers. Note that ImageFolder() is a utility class in torchvision which can read images which are segregated into class folders.

In [8]:
# Vanilla dataset
train_images = ImageFolder(f'{data_path}/train', transform=trans)

# Augmented dataset
train_images_aug = ImageFolder(f'{data_path}/train', transform=trans_aug)

# For valid and test datasets
valid_images = ImageFolder(f'{data_path}/valid', transform=trans_valid)

In [9]:
print('Number of train instances', len(train_images))
print('Number of valid instances', len(valid_images))

Number of train instances 23000
Number of valid instances 2000


In [10]:
print('Classes', train_images.classes)
print('Class index', train_images.class_to_idx)

Classes ['cats', 'dogs']
Class index {'cats': 0, 'dogs': 1}


Let's create the loaders. We will iterate these during training. They will give us our batches.

In [11]:
train_loader     = data_loaders.get_data_loader(train_images, batch_size)
train_loader_aug = data_loaders.get_data_loader(train_images_aug, batch_size)
valid_loader     = data_loaders.get_data_loader(valid_images, batch_size)

### Let's try with a vanilla pretrained ResNet, with no augmentation

We just replace the last FC layer to account for the num_classes, that's all

In [12]:
if False:
    # Get the model
    model = models_repo.model_resnet_vanilla(num_classes)
    # Get the optimizer and loss function
    criteria, optimizer = optimizer_repo.sgd(model, 0.01, model.fc.parameters(), momentum=0.9, weight_decay=1e-4)
    # Get the scheduler
    scheduler = scheduler_repo.step_lr(optimizer, step_size=5, gamma=0.1)

    print('Layers in the model')
    for p in model.children():
        print(type(p))
    print('Training starts')

    # Train!
    best_model = trainer.train(model, criteria, optimizer, scheduler, train_loader, valid_loader, 5)

### Let's try with a vanilla pretrained ResNet, with no data augmentation

In [13]:
if False:
    # Get the model
    model = models_repo.model_resnet_vanilla(num_classes)
    # Get the optimizer and loss function
    criteria, optimizer = optimizer_repo.sgd(model, 0.01, model.fc.parameters(), momentum=0.9, weight_decay=1e-4)
    # Get the scheduler
    scheduler = scheduler_repo.step_lr(optimizer, step_size=5, gamma=0.1)

    print('Layers in the model')
    for p in model.children():
        print(type(p))
    print('Training starts')

    # Train!
    best_model = trainer.train(model, criteria, optimizer, scheduler, train_loader_aug, valid_loader, 5)

In [15]:
model, params_to_optimize = models_repo.resnet34_extra_layers(num_classes, top_layers_to_freeze=6, debug=True)

<class 'torch.nn.modules.conv.Conv2d'> 1 0
<class 'torch.nn.modules.batchnorm.BatchNorm2d'> 2 0
<class 'torch.nn.modules.activation.ReLU'> 0 0
<class 'torch.nn.modules.pooling.MaxPool2d'> 0 0
<class 'torch.nn.modules.container.Sequential'> 18 0
<class 'torch.nn.modules.container.Sequential'> 27 0
<class 'torch.nn.modules.container.Sequential'> 39 39
<class 'torch.nn.modules.container.Sequential'> 21 21
<class 'mylib.models_repo.AdaptiveConcatPool2d'> 0 0
<class 'mylib.models_repo.Flatten'> 0 0
<class 'torch.nn.modules.batchnorm.BatchNorm1d'> 2 2
<class 'torch.nn.modules.linear.Linear'> 2 2
<class 'torch.nn.modules.dropout.Dropout'> 0 0
<class 'torch.nn.modules.activation.ReLU'> 0 0
<class 'torch.nn.modules.batchnorm.BatchNorm1d'> 2 2
<class 'torch.nn.modules.dropout.Dropout'> 0 0
<class 'torch.nn.modules.linear.Linear'> 2 2
<class 'torch.nn.modules.activation.LogSoftmax'> 0 0


In [17]:
model, params_to_optimize = models_repo.resnet34_extra_layers(num_classes, top_layers_to_freeze=6)
criteria, optimizer = optimizer_repo.sgd(model, 0.01, params_to_optimize=params_to_optimize)
scheduler = scheduler_repo.step_lr(optimizer)

best_model = trainer.train(model, criteria, optimizer, scheduler, train_loader_aug, valid_loader, 10)

A Jupyter Widget


Epoch 1/10


90it [00:44,  2.03it/s]
8it [00:03,  2.30it/s]

Training Loss  : 0.0004949418514481057 , Acc: 0.9455652173913044
Validation Loss: 0.0001471586711704731 , Acc: 0.9855
############################### Better model found

Epoch 2/10



90it [00:44,  2.02it/s]
8it [00:03,  2.34it/s]

Training Loss  : 0.0001759736776837836 , Acc: 0.9830434782608696
Validation Loss: 0.00012492886278778315 , Acc: 0.989
############################### Better model found

Epoch 3/10



90it [00:44,  2.02it/s]
8it [00:03,  2.29it/s]

Training Loss  : 0.0001429175592796958 , Acc: 0.9861739130434782
Validation Loss: 0.0001228288346901536 , Acc: 0.987

Epoch 4/10



90it [00:44,  2.00it/s]
8it [00:03,  2.29it/s]

Training Loss  : 0.00012823270982050377 , Acc: 0.9880434782608696
Validation Loss: 0.0001183404135517776 , Acc: 0.99
############################### Better model found

Epoch 5/10



90it [00:45,  1.99it/s]
8it [00:03,  2.28it/s]

Training Loss  : 0.00010639017542986118 , Acc: 0.9893478260869565
Validation Loss: 0.000108808150049299 , Acc: 0.99

Epoch 6/10



90it [00:45,  1.98it/s]
8it [00:03,  2.27it/s]

Training Loss  : 8.904299650179303e-05 , Acc: 0.9918260869565217
Validation Loss: 8.126656338572503e-05 , Acc: 0.991
############################### Better model found

Epoch 7/10



90it [00:45,  1.98it/s]
8it [00:03,  2.29it/s]

Training Loss  : 8.315881250588143e-05 , Acc: 0.9919565217391304
Validation Loss: 0.00010625039716251195 , Acc: 0.988

Epoch 8/10



90it [00:45,  1.98it/s]
8it [00:03,  2.30it/s]

Training Loss  : 9.118514948362566e-05 , Acc: 0.9909130434782609
Validation Loss: 0.00010382538381963969 , Acc: 0.9905

Epoch 9/10



90it [00:45,  1.98it/s]
8it [00:03,  2.31it/s]

Training Loss  : 7.924229720526415e-05 , Acc: 0.992304347826087
Validation Loss: 0.00011187818879261613 , Acc: 0.9905

Epoch 10/10



90it [00:46,  1.95it/s]
8it [00:03,  2.29it/s]

Training Loss  : 8.160859210740612e-05 , Acc: 0.9925652173913043
Validation Loss: 0.00010679071117192507 , Acc: 0.9875




