<a href="https://colab.research.google.com/github/ambekarsameer96/algonuts/blob/main/ResNet_for_AlgoNuts.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!nvidia-smi
!prime-select query

Fri Aug 13 13:44:36 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.42.01    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   38C    P8     9W /  70W |      0MiB / 15109MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
!prime-select query

/bin/bash: prime-select: command not found


# **Setup**

In [None]:
# install libraries
%%capture
!pip install decord
!pip install hickle

# **Import Packages**

In [None]:
# Import libraries
import os
import time
import tqdm
import torch
import IPython
import torchvision

import numpy as np
import matplotlib.pyplot as plt

import torch.nn as nn
from torch import Tensor
import torch.nn.functional as F
import torchvision.models as models

from torchvision import transforms
from torchvision.models import ResNet
from torchvision.utils import make_grid
from torchvision.datasets import ImageFolder
from typing import Type, Any, Callable, Union, List, Optional

from PIL import Image
from io import BytesIO

In [None]:
# image & video libraries
from decord import VideoReader  
from PIL import Image    

# others
import glob
import os
import pickle

In [None]:
from torch.autograd import Variable as V
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.multioutput import MultiOutputRegressor


In [None]:
# random number generators for reproducibility
seed = 24

torch.manual_seed(seed)
np.random.seed(seed)

# **Import and Define RESNET**

In [None]:
model = models.resnet18(pretrained=True)
# torch.utils.model_zoo.load_url()
#model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
# or any of these variants

# model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet34', pretrained=True)
# model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet50', pretrained=True)
# model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet101', pretrained=True)
# model = torch.hub.load('pytorch/vision:v0.10.0', 'resnet152', pretrained=True)
# models.eval()

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


HBox(children=(FloatProgress(value=0.0, max=46830571.0), HTML(value='')))




In [None]:
__all__ = ['ResNet', 'resnet18']
model_url = {
    'resnet18': 'https://download.pytorch.org/models/resnet18-f37072fd.pth'}

def conv3x3(in_planes: int, out_planes: int, stride: int = 1, groups: int = 1, dilation: int = 1) -> nn.Conv2d:
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=dilation, groups=groups, bias=False, dilation=dilation)


def conv1x1(in_planes: int, out_planes: int, stride: int = 1) -> nn.Conv2d:
    """1x1 convolution"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)


class BasicBlock(nn.Module):
    expansion: int = 1

    def __init__(
        self,
        inplanes: int,
        planes: int,
        stride: int = 1,
        downsample: Optional[nn.Module] = None,
        groups: int = 1,
        base_width: int = 64,
        dilation: int = 1,
        norm_layer: Optional[Callable[..., nn.Module]] = None
    ) -> None:
        super(BasicBlock, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        if groups != 1 or base_width != 64:
            raise ValueError('BasicBlock only supports groups=1 and base_width=64')
        if dilation > 1:
            raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
        # Both self.conv1 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = norm_layer(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = norm_layer(planes)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x: Tensor) -> Tensor:
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out


class Bottleneck(nn.Module):
    # Bottleneck in torchvision places the stride for downsampling at 3x3 convolution(self.conv2)
    # while original implementation places the stride at the first 1x1 convolution(self.conv1)
    # according to "Deep residual learning for image recognition"https://arxiv.org/abs/1512.03385.
    # This variant is also known as ResNet V1.5 and improves accuracy according to
    # https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch.

    expansion: int = 4

    def __init__(
        self,
        inplanes: int,
        planes: int,
        stride: int = 1,
        downsample: Optional[nn.Module] = None,
        groups: int = 1,
        base_width: int = 64,
        dilation: int = 1,
        norm_layer: Optional[Callable[..., nn.Module]] = None
    ) -> None:
        super(Bottleneck, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        width = int(planes * (base_width / 64.)) * groups
        # Both self.conv2 and self.downsample layers downsample the input when stride != 1
        self.conv1 = conv1x1(inplanes, width)
        self.bn1 = norm_layer(width)
        self.conv2 = conv3x3(width, width, stride, groups, dilation)
        self.bn2 = norm_layer(width)
        self.conv3 = conv1x1(width, planes * self.expansion)
        self.bn3 = norm_layer(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride

    def forward(self, x: Tensor) -> Tensor:
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)

        out = self.conv3(out)
        out = self.bn3(out)

        if self.downsample is not None:
            identity = self.downsample(x)

        out += identity
        out = self.relu(out)

        return out


class ResNet(nn.Module):

    def __init__(
        self,
        block: Type[Union[BasicBlock, Bottleneck]],
        layers: List[int],
        num_classes: int = 1000,
        zero_init_residual: bool = False,
        groups: int = 1,
        width_per_group: int = 64,
        replace_stride_with_dilation: Optional[List[bool]] = None,
        norm_layer: Optional[Callable[..., nn.Module]] = None
    ) -> None:
        super(ResNet, self).__init__()
        if norm_layer is None:
            norm_layer = nn.BatchNorm2d
        self._norm_layer = norm_layer

        self.inplanes = 64
        self.dilation = 1
        if replace_stride_with_dilation is None:
            # each element in the tuple indicates if we should replace
            # the 2x2 stride with a dilated convolution instead
            replace_stride_with_dilation = [False, False, False]
        if len(replace_stride_with_dilation) != 3:
            raise ValueError("replace_stride_with_dilation should be None "
                             "or a 3-element tuple, got {}".format(replace_stride_with_dilation))
        self.groups = groups
        self.base_width = width_per_group
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = norm_layer(self.inplanes)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
                                       dilate=replace_stride_with_dilation[0])
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
                                       dilate=replace_stride_with_dilation[1])
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
                                       dilate=replace_stride_with_dilation[2])
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)

        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)  # type: ignore[arg-type]
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)  # type: ignore[arg-type]

    def _make_layer(self, block: Type[Union[BasicBlock, Bottleneck]], planes: int, blocks: int,
                    stride: int = 1, dilate: bool = False) -> nn.Sequential:
        norm_layer = self._norm_layer
        downsample = None
        previous_dilation = self.dilation
        if dilate:
            self.dilation *= stride
            stride = 1
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                norm_layer(planes * block.expansion),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
                            self.base_width, previous_dilation, norm_layer))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes, groups=self.groups,
                                base_width=self.base_width, dilation=self.dilation,
                                norm_layer=norm_layer))

        return nn.Sequential(*layers)

    def _forward_impl(self, x: Tensor) -> Tensor:
        # See note [TorchScript super()]
        out1 = self.conv1(x)
        out2 = self.bn1(x)
        out3 = self.relu(x)
        out4 = self.maxpool(x)

        out5 = self.layer1(x)
        out6 = self.layer2(x)
        out7 = self.layer3(x)
        out8 = self.layer4(x)

        out_avg = self.avgpool(x)
        flat = torch.flatten(x, 1)
        fin = self.fc(x)

        return out1, out2, out3, out4, out5, out6, out7, out8, out_avg, flat, fin

    def forward(self, x: Tensor) -> Tensor:
        return self._forward_impl(x)




# **Load The Weights**


In [None]:
def load_model(model_url):
    """This function initializes a neural network (ResNet) and load
    its weights from a pretrained model.
    
    Args:
        arch: class
            pytorch neural network.
        model_url: str
            pytorch model. 

    Returns:
        model: class
            pytorch model ready for inference.
    """

    # instantiate model architecture
    model = resnet18()
    
    # list of parameters name of the model
    param_names = list(model.state_dict())  

    # initialise dictionary of model parameters
    model_dict = {k:None for k in param_names}

    # load parameters of pretrained model
    state_dict = hub.load_state_dict_from_url(model_url)
    
    i = 0
    for v in state_dict.values():
        model_dict[param_names[i]] = v
        i += 1

    model.load_state_dict(model_dict)

    # set inference mode
    model.eval()

    return model

In [None]:
def print_ResNet_Predictions(output):
    with open('class_names_ImageNet.txt') as labels:
        classes = [i.strip() for i in labels.readlines()]

    # sort the probability vector in descending order
    sorted, indices = torch.sort(output, descending=True)
    percentage = F.softmax(output, dim=1)[0] * 100.0
    
    # obtain the first 5 classes (with the highest probability) the input belongs to
    results = [(classes[i], percentage[i].item()) for i in indices[0][:5]]
    for i in range(5):
        print('{}: {:.4f}%'.format(results[i][0], results[i][1]))

In [None]:
# Download an example image from the pytorch website
import urllib
url, filename = ("https://github.com/pytorch/hub/raw/master/images/dog.jpg", "dog.jpg")
try: urllib.URLopener().retrieve(url, filename)
except: urllib.request.urlretrieve(url, filename)

In [None]:
# sample execution (requires torchvision)
from PIL import Image
from torchvision import transforms
input_image = Image.open(filename)
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
input_tensor = preprocess(input_image)
input_batch = input_tensor.unsqueeze(0) # create a mini-batch as expected by the model

# move the input and model to GPU for speed if available
if torch.cuda.is_available():
    input_batch = input_batch.to('cuda')
    model.to('cuda')

with torch.no_grad():
    output = model(input_batch)
# Tensor of shape 1000, with confidence scores over Imagenet's 1000 classes
print(output[0])
# The output has unnormalized scores. To get probabilities, you can run a softmax on it.
probabilities = torch.nn.functional.softmax(output[0], dim=0)
print(probabilities)

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


tensor([ 1.5917e-02, -1.5497e+00,  3.2031e-01, -2.0585e+00, -8.5747e-01,
         1.7843e+00,  1.4699e+00,  2.1626e+00,  4.4888e+00,  8.2885e-01,
        -5.7819e+00, -3.4969e+00, -4.0621e+00, -4.7517e+00, -3.8072e+00,
        -4.7243e+00, -1.2590e+00,  2.9813e-01, -2.0459e+00, -5.2885e-01,
        -3.5982e+00, -8.1425e-01, -2.7651e+00, -1.2770e+00, -3.4182e+00,
        -1.9048e+00, -3.0018e+00, -1.3471e+00, -1.8391e+00,  1.3937e+00,
        -2.0114e+00, -1.4137e+00, -2.3287e+00, -1.8198e+00, -1.1914e-01,
        -3.4102e+00, -1.6544e+00, -3.4496e+00, -2.6479e+00, -2.7407e+00,
        -2.2193e+00, -3.6509e+00, -4.1255e+00, -5.5946e+00, -1.7519e+00,
        -1.6900e+00, -9.8164e-01, -2.1251e+00, -3.5137e+00, -1.3320e+00,
        -1.1335e+00, -1.1564e+00, -2.2711e-02, -8.5797e-01, -1.2919e+00,
        -2.8682e+00,  6.6078e-01, -1.7178e+00, -1.2443e+00, -2.3362e+00,
        -5.7817e-02, -1.9204e+00, -2.5964e+00, -1.8020e+00, -1.5125e+00,
        -1.0843e+00, -4.0987e-01, -1.3090e+00, -9.4

In [None]:
# Download ImageNet labels
!wget https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt

--2021-08-13 13:47:27--  https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.110.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 10472 (10K) [text/plain]
Saving to: ‘imagenet_classes.txt’


2021-08-13 13:47:27 (95.0 MB/s) - ‘imagenet_classes.txt’ saved [10472/10472]



In [None]:
# Read the categories
with open("imagenet_classes.txt", "r") as f:
    categories = [s.strip() for s in f.readlines()]
# Show top categories per image
top5_prob, top5_catid = torch.topk(probabilities, 5)
for i in range(top5_prob.size(0)):
    print(categories[top5_catid[i]], top5_prob[i].item())

Samoyed 0.8846219182014465
Arctic fox 0.0458051860332489
white wolf 0.04427620768547058
Pomeranian 0.005621347576379776
Great Pyrenees 0.00465201074257493


In [None]:
# image preprocessing
resize_normalize = transforms.Compose([
            transforms.Resize((224,224)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

# instantiate randomly initialised VGG19
# model = R
model.eval()

# predict example frames 
for img in example_frames:
  
    input_img = V(resize_normalize(img).unsqueeze(0))
    activations = model(input_img)

print_ResNet_Predictions(activations[-1])


NameError: ignored

In [None]:
#exporting model to cuda
if torch.cuda.is_available():
    model.cuda()

# **Data Loading**

In [None]:
# download dataset from dropbox
dropbox_link = 'https://www.dropbox.com/s/agxyxntrbwko7t1/participants_data.zip?dl=0'
os.environ['download_link'] = dropbox_link 
!echo $download_link
!wget -O participants_data.zip -c $download_link
!wget -c https://raw.githubusercontent.com/Neural-Dynamics-of-Visual-Cognition-FUB/Algonauts2021_devkit/main/class_names_ImageNet.txt

https://www.dropbox.com/s/agxyxntrbwko7t1/participants_data.zip?dl=0
--2021-08-13 13:47:52--  https://www.dropbox.com/s/agxyxntrbwko7t1/participants_data.zip?dl=0
Resolving www.dropbox.com (www.dropbox.com)... 162.125.2.18, 2620:100:6017:18::a27d:212
Connecting to www.dropbox.com (www.dropbox.com)|162.125.2.18|:443... connected.
HTTP request sent, awaiting response... 301 Moved Permanently
Location: /s/raw/agxyxntrbwko7t1/participants_data.zip [following]
--2021-08-13 13:47:52--  https://www.dropbox.com/s/raw/agxyxntrbwko7t1/participants_data.zip
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://uc3190fa121ea7655a1ae610c23a.dl.dropboxusercontent.com/cd/0/inline/BUJ-T-WZs1Ghn8VuGMRzK0UXrMY-kMiOjqG4zNUEG5ugQZ_3CCagk3FXqd0JQnV40af6YYDkn6VgjixgKdEvIaByZAzYhFdnvFZrKEan6lrLX5ZbJftQufeHTJh_IANAZr0i95nBIyUVtEngzZ5L2_28/file# [following]
--2021-08-13 13:47:52--  https://uc3190fa121ea7655a1ae610c23a.dl.dropboxusercontent.com/c

In [None]:
# unzip data file (%%capture suppresses the output)
%%capture
!unzip -o participants_data.zip

# **Video Preprocessing**

In [None]:
# Colab directory containing videos
video_dir = '/content/AlgonautsVideos268_All_30fpsmax'

# get path of all videos
video_list = glob.glob(video_dir + '/*.mp4')

# sort videos in ascending order
video_list.sort()

print('Total number of videos: ', len(video_list))

Total number of videos:  1102


In [None]:
def sample_video_from_mp4(file, num_frames = 16):
    """This function takes a mp4 video file as input and returns
    an array of uniformly sampled frames.
    
    Args
    ----------
    file : str
        path to mp4 video file
    num_frames : int
        number of frames to select with uniform frame sampling
    
    Returns
    -------
    frames: list of frames as PIL images
    num_frames: number of sampled frames

    """
    
    # read video file
    video = VideoReader(file)

    # get total number of video frames
    total_frames = len(video)

    # create frame indices 
    frame_indices = np.linspace(0, total_frames-1, num_frames, dtype = np.int) 

    video_frames = []

    # fill list of video frames as PIL images
    for i in frame_indices:
      video_frames.append(Image.fromarray(video[i].asnumpy()))

    return video_frames, num_frames

In [None]:
# example of using the previous function
example_frames, num_frames = sample_video_from_mp4(video_list[8])
len(example_frames)

16

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

inputs_bat, labels = inputs.to(device), labels.to(device)
# inputs, labels = inputs.cuda(), labels.cuda() 

NameError: ignored

# **Activate**


In [None]:
def get_activations_and_save(model, video_list, save_dir, layer):
    """This function extracts the activations (features) of a specific layer of
    a model to a set of videos and save them in a specified directory. Every
    file is a list with a vector containing the activations of that layer to
    a particular video. The activations are averaged over the frames of every
    video. 

    Parameters
    ----------
    model :
        pytorch model.
    video_list : list
        list containing path to all videos.
    save_dir : str
        save path for extracted activations.
    layer : int
        integer specifying layer number.
    """
    
    # define preprocessing function
    preprocess = transforms.Compose([
            transforms.Resize((224,224)),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])

    for video in video_list:
      
      # name video file
      video_file_name = os.path.split(video)[-1].split(".")[0]
      
      # load video frames
      video_frames, num_frames = sample_video_from_mp4(video)
      
      activations = []
      for frame, image in enumerate(video_frames):
        
        # preprocess video frame
        input_image = V(resize_normalize(image).unsqueeze(0))

        # feed image through the model  #### check predict
        layer_outputs = model.forward(input_image)

        if frame==0:
          # append and flatten layer activations
          activations.append(layer_outputs[layer].ravel())
        else:
          # add activations over frames
          activations =  activations + layer_outputs[layer].ravel()
        
        # average layer activations across frames
        avg_layer_activations = np.array([activations])/float(num_frames)
        
        # define saving directory
        save_path = os.path.join(save_dir, video_file_name + "_" +
                               "layer" + "_" + str(layer) + ".npy")

        # save activations for a particular video
        np.save(save_path, avg_layer_activations)

In [None]:
############################### change dir ###########################
# create saving directory for activations
activations_dir = "/content/activations_ResNet"
if not os.path.exists(activations_dir):
  os.makedirs(activations_dir)

# get activations
get_activations_and_save(model, video_list[:30], activations_dir, layer = 1)   ##### SPECIFY LAYER NUMBER #####

RuntimeError: ignored