In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append("/home/pbagad/projects/")
sys.path.append("/home/pbagad/projects/ssl_benchmark/")

In [3]:
execfile("/home/pbagad/projects/ViDA-SSL/utils/viz.py")

In [4]:
from tqdm import tqdm
import numpy as np
import pandas as pd

import torch
from torch.utils.data import DataLoader
import torch.nn as nn
import torchvision.models as models

In [5]:
from ssl_benchmark.datasets.ucf import UCF
from ssl_benchmark.datasets.ntu60 import NTU

from ssl_benchmark.utils.videotransforms import video_transforms, tensor_transforms, volume_transforms

### Load dataset

In [6]:
crop = (112, 112)

transforms = [
    video_transforms.Resize(int(crop[0]/0.875)),
    video_transforms.CenterCrop(crop),
    volume_transforms.ClipToTensor(),
    tensor_transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
]

In [7]:
dataset = UCF(
    subset="trainlist01",
    clips_per_video=1,
    video_fps=16.0,
    video_clip_duration=(1.0/16.0),
    video_transform=transforms,
    return_labels=True,
)

In [8]:
dataloader = DataLoader(dataset, batch_size=64, shuffle=False, num_workers=8, pin_memory=True, drop_last=False)

In [9]:
len(dataloader)

150

In [10]:
instance = dataset[0]
x = instance["frames"].squeeze(1)

instance["frames"].shape, instance["label"]

(torch.Size([3, 1, 112, 112]), 0)

### Load model

In [11]:
model = models.resnet18(pretrained=True)

In [12]:
model.fc = nn.Identity()

### Forward pass (test)

In [13]:
y = model(x.unsqueeze(0))

In [14]:
x.shape

torch.Size([3, 112, 112])

In [15]:
y.shape

torch.Size([1, 512])

### Forward pass with batching

In [16]:
model = model.eval()

In [17]:
results = {
    "features": [],
    "labels": [],
    "latent": [],
}

iterator = tqdm(
    dataloader,
    desc="Computing features",
    bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}',
)
with torch.no_grad():
    for batch in iterator:
        frames = batch["frames"]
        labels = batch["label"]

        # remove time dimension since number of frames 1
        frames = frames.squeeze(2)

        # forward pass
        features = model(frames)

        results["features"].append(features.cpu())
        results["labels"].append(labels.cpu())

Computing features: 100%|██████████| 150/150 [00:27<00:00,  5.44it/s]                                                                                       


In [18]:
len(results["features"]), len(results["labels"])

(150, 150)

In [19]:
results["features"] = torch.cat(results["features"], dim=0)
results["labels"] = torch.cat(results["labels"], dim=0)

### Apply dimensionality reduction

In [20]:
# from tsne_torch import TorchTSNE as TSNE
from sklearn.manifold import TSNE
from umap import UMAP

In [21]:
method = "UMAP"

In [22]:
X = results["features"]

K = 2

if method == "PCA":
    # PCA
    (U, S, V) = torch.pca_lowrank(X)
    Z = torch.matmul(X, V[:, :K])

elif method == "TSNE":
    # TSNE
    Z = TSNE(n_components=2).fit_transform(X.numpy())
    # Z = TSNE(n_components=2).fit_transform(X)
    Z = torch.from_numpy(Z)

elif method == "UMAP":
    Z = UMAP().fit_transform(X)
    Z = torch.from_numpy(Z)
else:
    raise NotImplementedError

failed. This is likely due to too small an eigengap. Consider
adding some noise or jitter to your data.

Falling back to random initialisation!


In [23]:
Z.shape

torch.Size([9537, 2])

In [24]:
results["latent"] = Z

### Visualize

In [25]:
df = pd.DataFrame(None)

df["label"] = results["labels"].numpy()
df["Z1"] = results["latent"][:, 0].numpy()
df["Z2"] = results["latent"][:, 1].numpy()

In [26]:
df

Unnamed: 0,label,Z1,Z2
0,0,8.756439,6.495585
1,0,8.714692,6.520347
2,0,8.739508,6.534493
3,0,9.011086,6.592406
4,0,9.012991,6.542683
...,...,...,...
9532,100,4.599086,5.855163
9533,100,4.615057,5.800848
9534,100,4.619016,5.799709
9535,100,4.553802,5.881421


In [27]:
bokeh_2d_scatter_new(
    df=df, x="Z1", y="Z2", hue="label", label="label", use_nb=True, 
    title="ResNet18 feature embeddings for single-frame clips in UCF.", legend_loc="top_right"
)

### Find number of clusters K via KMeans

In [1]:
from sklearn.cluster import KMeans

dimred_model = KMeans()

In [2]:
from yellowbrick.cluster import KElbowVisualizer
from sklearn.datasets import make_blobs

In [3]:
import numpy as np

In [4]:
# k is range of number of clusters.
visualizer = KElbowVisualizer(dimred_model, k=(2,30), timings= True)

In [5]:
visualizer.fit(X)
visualizer.show()

NameError: name 'X' is not defined

In [None]:
visualizer.fit(cluster_df)        # Fit data to visualizer
visualizer.show()        # Finalize and render figure