# Extract MobileNet features

In [None]:
from torchvision.models import mobilenet_v3_large
from torchvision.models.feature_extraction import create_feature_extractor
from torchvision import transforms

mobilenet = mobilenet_v3_large(pretrained=True)
mobilenet.eval()
feature_extractor = create_feature_extractor(mobilenet, ['avgpool'])

def extract_mobilenet_features(video):
    t = transforms.Compose([
        transforms.CenterCrop(224), 
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    with torch.no_grad():
        return feature_extractor(t(video))['avgpool'].squeeze(-1).squeeze(-1)

In [None]:
import os
import joblib
from data import UCF101
import torch
from tqdm import tqdm

if not os.path.exists('features'):
    os.mkdir('features')

train_dataset = UCF101('UCF-101', 'ucfTrainTestlist', 1, True)
test_dataset = UCF101('UCF-101', 'ucfTrainTestlist', 1, False)

train_features, test_features = [], []

for i in tqdm(range(len(train_data))):
    features = extract_mobilenet_features(train_dataset[i])
    train_features.append(features)

for i in tqdm(range(len(test_data))):
    features = extract_mobilenet_features(test_dataset[i])
    test_features.append(features)
    
# save features
joblib.dump(train_features, 'features/mobilenet_train_features_01.pkl')
joblib.dump(test_features, 'features/mobilenet_test_features_01.pkl')

In [None]:
from tqdm.notebook import tqdm
from keyframes import *
import cupy as cp

extractor = KeyFrameExtractor(32, 'agglomerative', False)

train_kfs = extractor.predict(train_features)
test_kfs = extractor.predict(test_features)

# save keyframes
joblib.dump(test_kfs, 'keyframes/test_01_32frames_agglomerative.pkl')
joblib.dump(train_kfs, 'keyframes/train_01_32frames_agglomerative.pkl')

# Extract Swin features

In [None]:
from transformers import AutoFeatureExtractor, SwinModel
import torch
from data import UCF101
import joblib

from tqdm.notebook import tqdm

feature_extractor = AutoFeatureExtractor.from_pretrained("microsoft/swin-tiny-patch4-window7-224")
swin = SwinModel.from_pretrained("microsoft/swin-tiny-patch4-window7-224").cuda()

def extract_swin_features(video):
    inputs = feature_extractor(list(video), return_tensors="pt")
    pixel_values = inputs['pixel_values'].cuda()

    with torch.no_grad():
        outputs = swin(pixel_values=pixel_values)

    return outputs.pooler_output.numpy()

In [None]:
train_dataset = UCF101('UCF-101', 'ucfTrainTestlist', 1, True)
test_dataset = UCF101('UCF-101', 'ucfTrainTestlist', 1, False)

train_features, test_features = [], []

for i in tqdm(range(len(train_data))):
    features = extract_swin_features(train_dataset[i])
    train_features.append(features)

for i in tqdm(range(len(test_data))):
    features = extract_swin_features(test_dataset[i])
    test_features.append(features)

# save features
joblib.dump(train_features, 'features/swin_train_features_01.pkl')
joblib.dump(test_features, 'features/swin_test_features_01.pkl')

In [None]:
from tqdm.notebook import tqdm
from keyframes import *
import cupy as cp

extractor = KeyFrameExtractor(32, 'agglomerative', False)

train_kfs = extractor.predict(train_features)
test_kfs = extractor.predict(test_features)

# save keyframes
joblib.dump(train_kfs, 'keyframes/train_01_32frames_agglomerative_swin.pkl') 
joblib.dump(test_kfs, 'keyframes/test_01_32frames_agglomerative_swin.pkl') 

# Get number of parameters per model

In [None]:
def get_n_params(model):
    pp=0
    for p in list(model.parameters()):
        nn=1
        for s in list(p.size()):
            nn = nn*s
        pp += nn
    return pp

print("MobileNet # params (millions):", get_n_params(mobilenet)/1_000_000)
print("Swin # params (millions):", get_n_params(swin)/1_000_000)