In [4]:
from torchvision import transforms
import torchvision
import torch
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torch.nn import Identity
from tqdm.notebook import tqdm
import numpy as np
import os

In [5]:
FRAME_DIR = "training_set/N-Frames"
FEATURES_DIR = "training_set/Features"

NUM_FRAMES_PER_VIDEO = 8

In [6]:
resnet152 = torchvision.models.resnet152(pretrained=True)

In [8]:
torch.cuda.is_available()

True

In [7]:

resnet152.fc = Identity()
resnet152.cuda().eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): Bottleneck(
      (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
      (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (downsample): Sequential(
        (0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 

In [5]:
# Required preprocessing for resnet
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [6]:
images = ImageFolder(FRAME_DIR, transform = preprocess)

In [7]:
batched_images = DataLoader(images, batch_size = NUM_FRAMES_PER_VIDEO)

In [8]:
idx_to_video_id = [''] * len(images.classes)
for video_id, idx in images.class_to_idx.items():
    idx_to_video_id[idx] = str(video_id)

In [10]:
def extract_features(model, model_name, data, idx_to_id):
    for batch, c in tqdm(data):
        feature_path = f"{FEATURES_DIR}/{model_name}/{idx_to_id[c[0]]}.csv"
        if not os.path.exists(feature_path):
            assert np.all((c == c[0]).numpy()) # make sure the batch contains frames from only one video
            with torch.no_grad():
                features = model(batch.cuda()).cpu()
            np.savetxt(feature_path, features.numpy())

In [11]:
extract_features(resnet152, "ResNet152", batched_images, idx_to_video_id)

HBox(children=(FloatProgress(value=0.0, max=590.0), HTML(value='')))


