In [1]:
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '3'

In [2]:
from glob import glob
from tqdm import tqdm_notebook as tqdm

In [3]:
import torch as T
import torch.nn as nn
from torch.autograd import Variable as V

import torchvision.models as models
import torchvision.transforms as transforms

In [4]:
trs = transforms.Compose([transforms.ToTensor(), 
                          transforms.Normalize(mean=[0.485, 0.456, 0.406], 
                                               std=[0.229, 0.224, 0.225])])

In [5]:
class VisualFeatureExtractor(nn.Module):
    def __init__(self):
        super(VisualFeatureExtractor, self).__init__()
        
        resnet = models.resnet101(pretrained=True)
        self.model = nn.Sequential(*list(resnet.children())[:-1])
        
    def forward(self, img):
        out = self.model(img)
        
        return out

vfe = VisualFeatureExtractor().cuda()

In [6]:
from PIL import Image
import cv2
import numpy as np

In [7]:
paths = glob('Dataset/Youtube/Input/*')
feats = dict()

for path in tqdm(paths):
    v = path[path.rfind('/')+1:]
    feats[v] = []
    leng = len(glob(path+'/*.png'))
    
    for i in range(1, leng+1):
        img = Image.open(path+'/%d.png'%(i))
        img = np.array(img)
        img = cv2.resize(img, (224, 224))
        
        img = trs(img)
        img = img.view((1, 3, 224, 224))
        feat = vfe(V(img).float().cuda())
        feat = np.squeeze(feat.cpu().detach().numpy())
        
        feats[v].append(feat)

HBox(children=(IntProgress(value=0, max=40), HTML(value='')))




In [8]:
import pickle

pickle.dump(feats, open('features.pkl', 'wb'))