In [1]:
# import dependencies
import torch
from torch.autograd import Variable as V
import torchvision
import torchvision.models as models
import torchvision.datasets as dset
import torchvision.transforms as transforms
import os

from functools import partial
import pickle

In [2]:
# set up pickle to support encoding of the trained 2.7 weight
pickle.load = partial(pickle.load, encoding="latin1")
pickle.Unpickler = partial(pickle.Unpickler, encoding="latin1")

In [3]:
# define our transformation function
centre_crop = transforms.Compose([
        transforms.Scale(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

In [4]:
# load the data with cocoAPI
cap = dset.CocoCaptions(root = '/home/raille/coco-features/coco-dataset/train2017',
                        annFile = '/home/raille/coco-features/coco-dataset/annotations/captions_train2017.json',
                        transform=centre_crop)

loading annotations into memory...
Done (t=1.35s)
creating index...
index created!


In [5]:
# create dataloader to iterate over the dataset in batch of images
batch_size = 4
dataloders = torch.utils.data.DataLoader(cap, batch_size=batch_size, shuffle=False, num_workers=4)

In [8]:
# load the model
arch = 'resnet18'

model_file = 'whole_%s_places365.pth.tar' % arch
if not os.access(model_file, os.W_OK):
    weight_url = 'http://places2.csail.mit.edu/models_places365/whole_%s_places365.pth.tar' % arch
    os.system('wget ' + weight_url)

useGPU = torch.cuda.is_available()
if useGPU == 1:
    model = torch.load(model_file)
    model = model.cuda()
else:
    model = torch.load(model_file, map_location=lambda storage, loc: storage, pickle_module=pickle) # model trained in GPU could be deployed in CPU machine like this!

# put the model in eval mode (no dropout / batchnorm...)
_ = model.eval()



In [9]:
# create a custom forward function to get only the final features just before the fc
def my_forward(x):
    x = model.conv1(x)
    x = model.bn1(x)
    x = model.relu(x)
    x = model.maxpool(x)

    x = model.layer1(x)
    x = model.layer2(x)
    x = model.layer3(x)
    x = model.layer4(x)

    x = model.avgpool(x)
    # reshape the tensor into a #images * n vector
    x = x.view(x.size(0), -1)
    
    return x

In [15]:
# create a FloatTensor to gather all the features
if useGPU:
    features = torch.cuda.FloatTensor(len(cap), 512)
else:
    features = torch.FloatTensor(len(cap), 512)

In [None]:
# run the pretrained model on the data and put the collected feature in the features tensor
for i, data in enumerate(dataloders):
    inputs, captions = data

    if useGPU:
        inputs = V(inputs.cuda())
    else:
        inputs = V(inputs)

    features[i*batch_size:(i+1)*batch_size] = my_forward(inputs).data

In [None]:
# save the raw features extracted
torch.save(features, 'raw-features-scaled.pt')

In [None]:
# PCA
# Perform the standardization of the data
for i in range(features.shape[1]):
    features[:, i].add_(-torch.mean(features[:, i]))
    features[:, i].div_(torch.std(features[:, i]))
    
# Perform SVD
U, S, V = torch.svd(features)

# Keep only the principal component
k = 128
PC_k = torch.mm(U[:, 0:k], torch.diag(S)[0:k,0:k])

# save the reduced matrice
torch.save(PC_k, 'PCA-features-scaled.pt')