Extract features for different datasets.

In [1]:
%set_env CUDA_DEVICE_ORDER=PCI_BUS_ID
%set_env CUDA_VISIBLE_DEVICES=3

env: CUDA_DEVICE_ORDER=PCI_BUS_ID
env: CUDA_VISIBLE_DEVICES=3


In [2]:
from config import domainData
from config import num_classes as NUM_CLASSES
from torchvision import datasets, models, transforms
import pickle as pkl
import numpy as np
import logit

In [3]:
import torch
import torch.nn as nn
from torch.autograd import Variable

In [7]:
def get_model(model):
    net, feature_size = None, None
    if model == 'resnet50':
        resnet = models.resnet50(pretrained=True)
        resnet.fc = logit.flatten()
        net = resnet
        feature_size = 2048
    elif model == 'alexnet':
        alexnet = models.alexnet(pretrained=True)
        alexnet.classifier = nn.Sequential(*list(alexnet.classifier.children())[:-2])
        net = alexnet
        feature_size = 4096
    elif model == 'vgg16':
        net = models.vgg16(pretrained=True)
        net.classifier = nn.Sequential(*list(net.classifier.children())[:-2])
        feature_size = 4096
    return net, feature_size        

In [8]:
model = 'vgg16'

net, feature_size = get_model(model)

net = nn.Sequential(*list(resnet.children())[:-1])
net.add_module("flatten", logit.flatten())

In [9]:
use_gpu = True
train_dir = domainData['amazon'] # 'amazon', 'dslr', 'webcam'
val_dir = domainData['webcam']
num_classes = NUM_CLASSES['office']
batch_size = 1

In [11]:
# net = resnet.cuda() if use_gpu else resnet
net = net.cuda() if use_gpu else net

In [12]:
data_transforms = transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ])
image_datasets = {'train' : datasets.ImageFolder(train_dir,
                                          data_transforms),
                  'val' : datasets.ImageFolder(val_dir,
                                          data_transforms)
                 }

In [13]:
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size,
                                             shuffle=True, num_workers=0, drop_last=False)
              for x in ['train', 'val']}

In [14]:
dataset = image_datasets['val']

In [15]:
total_data_len = len(dataset)
total_data_len

795

In [16]:
img, lbl = dataset[0]

In [17]:
print(type(img), img.size(), type(lbl), lbl)


<class 'torch.FloatTensor'> torch.Size([3, 224, 224]) <class 'int'> 0


imgs = []
lbls = []
def extract_feat():
    global imgs
    global lbls
    for i in range(total_data_len):
        img, lbl = dataset[i]
        img = img.cuda() if use_gpu else img
        img = net(Variable(img.unsqueeze(0)))
        imgs += [img.squeeze(0).data]
        lbls += [lbl]
    return
%time extract_feat()

In [18]:
imgs = None
lbls = None
def extract_feat2():
    global imgs
    global lbls
    data_iter = iter(dataloaders['val'])
    for img, lbl in data_iter:
        img = img.cuda() if use_gpu else img
        img = net(Variable(img))
        if imgs is None:
            imgs = img.cpu().data.numpy()
            lbls = lbl
        else:
            imgs = np.vstack([imgs, img.cpu().data.numpy()])
            lbls = np.vstack([lbls, lbl])
    return
%time extract_feat2()

CPU times: user 1min 11s, sys: 5.71 s, total: 1min 17s
Wall time: 13.9 s


In [35]:
imgs.shape

(795, 4096)

In [38]:
print(lbls[794])

[28]


In [39]:
features = {
    'features': imgs,
    'labels': lbls
}

In [40]:
with open("webcam_alexnet_4096.pkl", "wb") as f:
    pkl.dump(features, f, pkl.HIGHEST_PROTOCOL)

In [33]:
!ls -l

total 263268
-rw-r--r--  1 utkrsh gpu     5196 Mar 13 09:55 advDomain.py
-rw-r--r--  1 utkrsh gpu 44628150 Apr  7 11:06 amazon_4096.yml
-rw-r--r--  1 utkrsh gpu 46176525 Apr  7  2018 amazon_alexnet_4096.pkl
-rw-r--r--  1 utkrsh gpu 23099661 Apr  7 10:25 amazon_resnet50.pkl
-rw-r--r--  1 utkrsh gpu     2774 Feb 26 16:01 checkVisDA.py
-rw-r--r--  1 utkrsh gpu    64058 Mar 10 23:45 classifier_dump.pth
-rw-r--r--  1 utkrsh gpu     9352 Feb 23 18:28 classifier.ipynb
-rw-r--r--  1 utkrsh gpu      679 Apr  6 13:22 config.py
-rw-r--r--  1 utkrsh gpu    13780 Apr  7 11:35 feature_extract.ipynb
-rw-r--r--  1 utkrsh gpu 44757600 Mar 10 23:45 features_dump.pth
-rw-r--r--  1 utkrsh gpu     8993 Mar  9 10:27 grl_combineDmnTrns.py
-rw-r--r--  1 utkrsh gpu    13656 Feb 23 18:28 grl.ipynb
-rw-r--r--  1 utkrsh gpu 44956012 Feb 23 18:28 grl_model.pth
-rw-r--r--  1 utkrsh gpu 45219745 Feb 28 21:26 grl_model_with_transform.pth
-rw-r--r--  1 utkrsh gpu     8511 Mar  3 23:25 grl_only.py
-rw-r