In [1]:
import os
import sys
import torch
import fnmatch
import numpy as np
import deepdish as dd
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
from torch.autograd import Variable
from torchvision.models import vgg16
from scipy.misc import imread, imresize

### gpu usage

In [2]:
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
use_gpu = torch.cuda.is_available()

### set hyperparameters

In [3]:
img_size = 224
S = 8
B = 2
C = 20
n_features = 1000

### load the trained model

In [4]:
model = vgg16(pretrained=False)
model.classifier = nn.Sequential(
            nn.Linear(512 * 7 * 7, n_features),
            nn.LeakyReLU(0.1, inplace=True),
            nn.Dropout(),
            nn.Linear(n_features, (B*5+C) * S * S),
            nn.Sigmoid(),
        )
model.load_state_dict(torch.load('./results/model_100iters_S8_1000feas_vot20classes_fixlossbug.pth'))
model.eval()
if use_gpu:
    model.cuda()

### load bounding boxes

In [5]:
bboxes = dd.io.load('./routine_generate_vot2017_train/normal_bboxes_all_sqrtwh_list.h5')

In [6]:
len(bboxes)

20

In [7]:
for item in bboxes:
    print(item.shape)

(325, 4)
(196, 4)
(105, 4)
(725, 4)
(339, 4)
(225, 4)
(76, 4)
(350, 4)
(175, 4)
(151, 4)
(742, 4)
(345, 4)
(160, 4)
(131, 4)
(326, 4)
(355, 4)
(292, 4)
(366, 4)
(1377, 4)
(248, 4)


In [8]:
len(bboxes)

20

### extract features

In [18]:
def extract_features(vot_folder, model, n_features, save_path, bboxes, padzero, use_gpu):
    whole_combo_list = []
    zero_padding = torch.FloatTensor([0,0,0,0])
    file = os.path.join(vot_folder, 'list_20classes.txt')
    CLASSES = [line.rstrip('\n') for line in open(file)]
    
    for index, CLASS in enumerate(CLASSES):
        sys.stdout.write('%s/20\r' % str(index + 1))
        sys.stdout.flush()
        subdirpath = os.path.join(vot_folder, CLASS)
        n_data = len(fnmatch.filter(os.listdir(subdirpath), '*.jpg'))
        data_series = np.arange(n_data)
        file_numbers = data_series + 1
        combo_list = []
        for i, numble in enumerate(file_numbers):
            jpgname = '{0:08}'.format(numble) + '.jpg'
            jpgpath = os.path.join(subdirpath, jpgname)
            img = imread(jpgpath)
            height, width, _ = img.shape
            bbox = bboxes[index][i] / torch.Tensor([width, height, np.sqrt(width), np.sqrt(height)])
            
            # 2. Create a PyTorch Variable with the transformed image
            img = imresize(img, (224, 224))
            transform = transforms.Compose([transforms.ToTensor(), ])
            img = transform(img)
            img = Variable(img[None, :, :, :], volatile=True)
            if use_gpu:
                img = img.cuda()
                # 3. Create a vector of zeros that will hold our feature vector
            #    The fc1 layer has an output size of 4096
            features = torch.zeros(n_features)
            # 4. Define a function that will copy the output of a layer
            def copy_data(m, i, o):
                features.copy_(o.data)
            # 5. Attach that function to our selected layer
            layer = model.classifier._modules.get('1')
            h = layer.register_forward_hook(copy_data)
            # 6. Run the model on our transformed image
            model(img)
            # 7. Detach our copy function from the layer
            h.remove()
            # 8. padzero or not with features
            if padzero is True:
                if index ==0:
                    combo = torch.cat([features, bbox], dim=0)
                else:
                    combo = torch.cat([features, zero_padding], dim=0)
            else:
                combo = torch.cat([features, bbox], dim=0)
            combo_list.append(combo)
            #combo_list = np.array(combo_list, dtype=np.float32)
        for i in range(len(combo_list)):
            if i == 0:
                cat_features = combo_list[0].view(1,-1)
            else:
                cat_features = torch.cat((cat_features, combo_list[i].view(1,-1)))
                
        whole_combo_list.append(cat_features)
        
    if padzero is True:
        dd.io.save(os.path.join(save_path, '20classes_combo_padzero_1000features.h5'), whole_combo_list)
    else:
        dd.io.save(os.path.join(save_path, '20classes_combo_padgt_1000features.h5'), whole_combo_list)   

In [19]:
save_path = './routine_generate_vot2017_train/'
vot_folder = './routine_generate_vot2017_train/vot2017/'
padzero = True

In [20]:
extract_features(vot_folder, model, n_features, save_path, bboxes, padzero, use_gpu)

1/20



20/20

In [21]:
padzero = False
extract_features(vot_folder, model, n_features, save_path, bboxes, padzero, use_gpu)

1/20



20/20

In [22]:
print('hello')

hello


### load bboxes list

In [103]:
bboxes_padzeros = dd.io.load(os.path.join(save_path, '20classes_combo_padzero.h5'))
bboxes_padgt = dd.io.load(os.path.join(save_path, '20classes_combo_padgt.h5'))

In [104]:
len(bboxes_padzeros)

20

In [106]:
bboxes_padzeros[0].size()

torch.Size([325, 4100])

### normalize bboxes

In [None]:
file = os.path.join(vot_folder, 'list_20classes.txt')
CLASSES = [line.rstrip('\n') for line in open(file)]
normalized_bbox = np.zeros_like(bboxes)
for index, CLASS in enumerate(CLASSES):
    sys.stdout.write('%s/500\r' % str(index + 1))
    sys.stdout.flush()
    #print(index)
    img = imread(save_path+image_name)
    height, width, _ = img.shape
    normalized_bbox[index] = bboxes[index] / np.array([width, height, width, height])