In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.models as models
import torchvision.datasets as dset
import torchvision.transforms as transforms

In [2]:
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

vgg16 = models.vgg16(pretrained=True)
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
                                 std=[0.229, 0.224, 0.225])
coco_dset = dset.CocoDetection(
    root = '/Users/Devin/Documents/ml/datasets/coco/train2014', 
    annFile = '/Users/Devin/Documents/ml/datasets/coco/annotations/instances_train2014.json',
    transform = transforms.Compose([
        transforms.Scale((240, 240), interpolation=2),
        transforms.ToTensor(),
        normalize
    ]),
)

dataloader = torch.utils.data.DataLoader(coco_dset, batch_size=1, shuffle=True, num_workers=2)

loading annotations into memory...
Done (t=11.14s)
creating index...
index created!


In [3]:
class CocoVGG(nn.Module):
    def __init__(self, vgg_base):
        super(CocoVGG, self).__init__()
        self.features = vgg_base.features
        self.classifier = nn.Sequential(
            nn.Dropout(),
            nn.Linear(7*7*512, 4096),
            nn.ReLU(inplace=True),
            nn.Dropout(),
            nn.Linear(4096, 4096),
            nn.ReLU(inplace=True),
            nn.Linear(4096, 91), # 80 classes used in COCO2014 dataset but 91 indices
        )
        
        # Freeze features
        for param in self.features.parameters():
            param.requires_grad = False
        
    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1) # vector before classifier
        y = self.classifier(x)
        return y
    
def convert_coco_target(coco_target):
    cat_ids = [d['category_id'].item() for d in coco_target]
    target = []
    for i in range(0, 91): # 80 classes in COCO dataset?
        if (i + 1) in cat_ids: # classes 1 indexed
            target.append(1.0)
        else:
            target.append(0.0)
    return torch.Tensor([target])

In [4]:
fine_model = CocoVGG(vgg16)
optimizer = optim.Adam(fine_model.parameters(), lr=0.001)
criterion = nn.MultiLabelSoftMarginLoss() #multi-class labels

In [None]:
for epoch in range(0, 10):
    running_loss = 0.0
    for i, data in enumerate(dataloader):
        data_input, coco_target = data
        new_target = convert_coco_target(coco_target)
        
        optimizer.zero_grad()
        preds = fine_model(data_input)
        
        loss = criterion(preds, new_target)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        
        if i % 100 == 0:
            print(running_loss/(i+1))
    print('Epoch: %d | loss: %.3f' % (epoch + 1, running_loss / len(dataloader)))

0.10444652289152145
0.1588288023072528
0.16218551138362938
0.16951249996140064
