# Setup

In [None]:
!mkdir -p "data/ucf-101/annotations/"
!wget -R "index.html*" -O "data/ucf-101/annotations/trainlist01.txt" "https://isis-data.science.uva.nl/mettes/zero-shot-actions/data/ucf-101/annotations/all_videos.txt"
!wget -nc --no-check-certificate -P "data/ucf-101/" "https://www.crcv.ucf.edu/data/UCF101/UCF101.rar"
!unrar x "data/ucf-101/UCF101.rar" "data/ucf-101"
!mv "data/ucf-101/UCF-101" "data/ucf-101/videos"
!rm -rf "data/ucf-101/UCF101.rar"
!pip install av

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Extracting  data/ucf-101/UCF-101/PlayingGuitar/v_PlayingGuitar_g07_c03.avi      62%  OK 
Extracting  data/ucf-101/UCF-101/PlayingGuitar/v_PlayingGuitar_g07_c04.avi      62%  OK 
Extracting  data/ucf-101/UCF-101/PlayingGuitar/v_PlayingGuitar_g07_c05.avi      62%  OK 
Extracting  data/ucf-101/UCF-101/PlayingGuitar/v_PlayingGuitar_g07_c06.avi      62%  OK 
Extracting  data/ucf-101/UCF-101/PlayingGuitar/v_PlayingGuitar_g07_c07.avi      62%  OK 
Extracting  data/ucf-101/UCF-101/PlayingGuitar/v_PlayingGuitar_g08_c01.avi      62%  OK 
Extracting  data/ucf-101/UCF-101/PlayingGuitar/v_PlayingGuitar_g08_c02.avi      62%  OK 
Extracting  data/ucf-101/UCF-101/PlayingGuitar/v_PlayingGuitar_g08_c03.avi      62%  OK 
Extracting  data/ucf-101/UCF-101/PlayingGuitar/v_PlayingGuitar_g08_c04.avi      62%  OK 
Extracting  data/ucf-101/UCF-101/PlayingGuitar/v_Play

# Scene

In [None]:
import torch
import numpy as np
import os
from torchvision import datasets as dset
from torchvision import transforms as trn
from torchvision import models as mdl
from torch.nn import functional as F
from tqdm.notebook import tqdm
from pathlib import Path
from google.colab import files

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
videos_path = "data/ucf-101/videos"
anno_path = "data/ucf-101/annotations"

# load the image transformer (this is from the Places-CNN script)
centre_crop = trn.Compose([
        # reshape into (T, C, H, W)
        trn.Lambda(lambda x: x.permute(0, 3, 1, 2)),
        # remove dimension T since we only have one frame per clip
        trn.Lambda(lambda x: torch.squeeze(x)),
        trn.Resize((256,256)),
        trn.CenterCrop(224),
        # scale in [0, 1] of type float
        trn.Lambda(lambda x: x / 255.),
        trn.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

ucf_data = dset.UCF101( root = videos_path,
                        annotation_path = anno_path,
                        frames_per_clip = 1,
                        step_between_clips = 5,
                        fold = 1,     #hacky way of loading the entire dataset, put all the videos in "annotations/train_list_01.txt"
                        train = True, #this was done so that I could still use this torch Dataset to load the dataset
                        transform = centre_crop
                       )

HBox(children=(FloatProgress(value=0.0, max=833.0), HTML(value='')))




In [None]:
def custom_collate(batch):
    filtered_batch = []
    for video, _, label in batch:
        filtered_batch.append((video, label))
    return torch.utils.data.dataloader.default_collate(filtered_batch)

data_loader = torch.utils.data.DataLoader(ucf_data,
                                          batch_size=256,
                                          drop_last = False,
                                          collate_fn=custom_collate)

In [None]:
arch = 'densenet161'

# load the pre-trained weights
model_file = f'{arch}_places365.pth.tar'

!wget -nc http://places2.csail.mit.edu/models_places365/"$arch"_places365.pth.tar

--2021-04-05 16:58:12--  http://places2.csail.mit.edu/models_places365/densenet161_places365.pth.tar
Resolving places2.csail.mit.edu (places2.csail.mit.edu)... 128.30.195.26
Connecting to places2.csail.mit.edu (places2.csail.mit.edu)|128.30.195.26|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 110119198 (105M) [application/x-tar]
Saving to: ‘densenet161_places365.pth.tar’


2021-04-05 16:58:13 (98.5 MB/s) - ‘densenet161_places365.pth.tar’ saved [110119198/110119198]



In [None]:
model = mdl.__dict__[arch](num_classes=365)
checkpoint = torch.load(model_file, map_location=lambda storage, loc: storage)
state_dict = {str.replace(k,'module.',''): v for k,v in checkpoint['state_dict'].items()}

if arch == 'densenet161': #fix to load densenet weights from pytorch <0.4
  remove_data_parallel = False # Change if you don’t want to use nn.DataParallel(model)

  import re
  pattern = re.compile("^(.*denselayer\d+.(?:norm|relu|conv)).((?:[12]).(?:weight|bias|running_mean|running_var))$")
  for key in list(state_dict.keys()):
    match = pattern.match(key)
    new_key = match.group(1) + match.group(2) if match else key
    new_key = new_key[7:] if remove_data_parallel else new_key
    state_dict[new_key] = state_dict[key]
    # Delete old key only if modified.
    if match or remove_data_parallel:
      del state_dict[key]


model.load_state_dict(state_dict)
model.to(device)
model.eval()

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 96, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(96, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(192, 48, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (rel

In [None]:
all_predictions = []
with torch.no_grad():
    for batch in tqdm(data_loader):
        images, _ = batch
        logit = model.forward(images.to(device))
        h_x = F.softmax(logit, 1).data.squeeze()
        all_predictions.append(h_x)

HBox(children=(FloatProgress(value=0.0, max=1961.0), HTML(value='')))

  "The pts_unit 'pts' gives wrong results and will be removed in a "





In [None]:
all_predictions = torch.cat(all_predictions, dim = 0)

In [None]:
original_pos = np.array([ucf_data.video_clips.get_clip_location(clip)[0] for clip in range(ucf_data.__len__())])

In [None]:
out = Path("data/ucf-101/scenescores")
out.mkdir(exist_ok = True)

for i, vid_path in enumerate(tqdm(ucf_data.metadata["video_paths"])):
    vid_path = Path(out / vid_path.split("/")[4][:-4])
    vid_path.mkdir(exist_ok=True)
    avg_feats = torch.mean(all_predictions[np.where(original_pos==i)[0]], dim=0).cpu().numpy()
    arr_path = vid_path / "avg-features.npy"
    np.save(arr_path, avg_feats)

HBox(children=(FloatProgress(value=0.0, max=13320.0), HTML(value='')))




In [None]:
!zip -r /content/"$arch"_scenescores.zip data/ucf-101/scenescores

files.download(f"{arch}_scenescores.zip") 

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  adding: data/ucf-101/scenescores/v_HeadMassage_g21_c01/ (stored 0%)
  adding: data/ucf-101/scenescores/v_HeadMassage_g21_c01/avg-features.npy (deflated 7%)
  adding: data/ucf-101/scenescores/v_Diving_g18_c03/ (stored 0%)
  adding: data/ucf-101/scenescores/v_Diving_g18_c03/avg-features.npy (deflated 8%)
  adding: data/ucf-101/scenescores/v_ApplyEyeMakeup_g06_c02/ (stored 0%)
  adding: data/ucf-101/scenescores/v_ApplyEyeMakeup_g06_c02/avg-features.npy (deflated 7%)
  adding: data/ucf-101/scenescores/v_BenchPress_g09_c02/ (stored 0%)
  adding: data/ucf-101/scenescores/v_BenchPress_g09_c02/avg-features.npy (deflated 6%)
  adding: data/ucf-101/scenescores/v_Rafting_g24_c02/ (stored 0%)
  adding: data/ucf-101/scenescores/v_Rafting_g24_c02/avg-features.npy (deflated 6%)
  adding: data/ucf-101/scenescores/v_Surfing_g10_c04/ (stored 0%)
  adding: data/ucf-101/scenescores/v_Surfing_g10_c04/avg-features.npy (deflated 7%)
  adding:

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>