In [1]:
from cards_class import CardsDataset
import torch
import torch.backends.cudnn as cudnn
import torch.optim
from torch.utils.data import DataLoader
from model import SSD300, MultiBoxLoss
from utils import unpackBoundigBox, showBatch, collate_fn, train, save_checkpoint, adjust_learning_rate
import sys

In [2]:
images_path = 'data/images'
anotations_path = 'data/anotations'
labels_path = 'data/general_labels/classes.txt'
model_path = None#'data/models/cards_2.pth.tar'
checkpoint = None

In [3]:
train_dataset = CardsDataset(images_path, anotations_path, labels_path)
num_classes = len(list(train_dataset.labels.keys()))
print('Dataset lenght:\n', len(train_dataset))
print('Classes:\n', train_dataset.labels)
print('Number of classes:\n', num_classes)

Dataset lenght:
 961
Classes:
 {0: 'background', 1: 'a_heart', 2: 'a_diamond', 3: 'a_club', 4: 'a_spade', 5: '2_heart', 6: '2_diamond', 7: '2_club', 8: '2_spade', 9: '3_heart', 10: '3_diamond', 11: '3_club', 12: '3_spade', 13: '4_heart', 14: '4_diamond', 15: '4_club', 16: '4_spade', 17: '5_heart', 18: '5_diamond', 19: '5_club', 20: '5_spade', 21: '6_heart', 22: '6_diamond', 23: '6_club', 24: '6_spade', 25: '7_heart', 26: '7_diamond', 27: '7_club', 28: '7_spade', 29: '8_heart', 30: '8_diamond', 31: '8_club', 32: '8_spade', 33: '9_heart', 34: '9_diamond', 35: '9_club', 36: '9_spade', 37: '10_heart', 38: '10_diamond', 39: '10_club', 40: '10_spade', 41: 'j_heart', 42: 'j_diamond', 43: 'j_club', 44: 'j_spade', 45: 'q_heart', 46: 'q_diamond', 47: 'q_club', 48: 'q_spade', 49: 'k_heart', 50: 'k_diamond', 51: 'k_club', 52: 'k_spade', 53: 'joker'}
Number of classes:
 54


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('Availabele GPU:\n', device)

Availabele GPU:
 cpu


### Learning Parameters

In [5]:
batch_size = 4
# number of iterations to train
epochs = 40
# number of workers for loading data in the DataLoader
workers = 4
# print training status every __ batches
print_freq = 50
# learning rate
lr = 1e-4 
# decay learning rate after these many iterations
decay_lr_at = [int(epochs * .7), int(epochs * .9)]
# decay learning rate to this fraction of the existing learning rate
decay_lr_to = 0.1
# momentum
momentum = 0.9
# weight decay
weight_decay = 5e-4
# clip if gradients are exploding, which may happen at larger batch sizes (sometimes at 32) - you will recognize it by a sorting error in the MuliBox loss calculation
grad_clip = None

cudnn.benchmark = True
print('Number of epochs:\n', epochs)
print('Decaying learning rate at epochs:\n', decay_lr_at)

Number of epochs:
 40
Decaying learning rate at epochs:
 [28, 36]


In [6]:
# Initialize model or load checkpoint
if checkpoint is None:
    start_epoch = 0
    model = SSD300(n_classes=num_classes)
    # Initialize the optimizer, with twice the default learning rate for biases, as in the original Caffe repo
    biases = list()
    not_biases = list()
    for param_name, param in model.named_parameters():
        if param.requires_grad:
            if param_name.endswith('.bias'):
                biases.append(param)
            else:
                not_biases.append(param)
    optimizer = torch.optim.SGD(params=[{'params': biases, 'lr': 2 * lr}, {'params': not_biases}],
                                lr=lr, momentum=momentum, weight_decay=weight_decay)

else:
    checkpoint = torch.load(checkpoint)
    start_epoch = checkpoint['epoch'] + 1
    print('Loaded checkpoint from epoch %d.' % start_epoch)
    model = checkpoint['model']
    optimizer = checkpoint['optimizer']

# Move to default device
model = model.to(device)
criterion = MultiBoxLoss(priors_cxcy=model.priors_cxcy).to(device)


train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True,
                            num_workers=workers,  collate_fn=collate_fn, drop_last=True)

# Epochs
for epoch in range(start_epoch, epochs):
    # Decay learning rate at particular epochs
    if epoch in decay_lr_at:
        adjust_learning_rate(optimizer, decay_lr_to)
    # One epoch's training
    train(train_loader=train_loader, model=model,
            criterion=criterion, optimizer=optimizer,
            epoch=epoch, print_freq=print_freq, grad_clip=grad_clip)

    # Save checkpoint
    if epoch % 2 == 0 and model_path is not None:
        save_checkpoint(model_path,epoch, model, optimizer)

Loaded VGGBase base model.
Epoch: [0][0/240]	Batch Time 12.497 (12.497)	Data Time 1.717 (1.717)	Loss 28.4215 (28.4215)	
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x12cdd33a0>
Traceback (most recent call last):
  File "/Users/acano/.virtualenvs/py3/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1203, in __del__
    self._shutdown_workers()
  File "/Users/acano/.virtualenvs/py3/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 1177, in _shutdown_workers
    w.join(timeout=_utils.MP_STATUS_CHECK_INTERVAL)
  File "/usr/local/opt/python@3.8/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/process.py", line 149, in join
    res = self._popen.wait(timeout)
  File "/usr/local/opt/python@3.8/Frameworks/Python.framework/Versions/3.8/lib/python3.8/multiprocessing/popen_fork.py", line 44, in wait
    if not wait([self.sentinel], timeout):
  File "/usr/local/opt/python@3.8/Frameworks/Python.framework/Versions/3

KeyboardInterrupt: 