<a href="https://colab.research.google.com/github/crazyCoderLi/benchmark_problem/blob/main/benchmark.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 1. Preparations
Most of these preparations are directly from the official turorials with only a few changes, which mainly include:
*   import packages
*   download the sample video









In [7]:
try:
    import torch
except ModuleNotFoundError:
    !pip install torch torchvision
    import os
    import sys
    import torch
    
if torch.__version__=='1.6.0+cu101' and sys.platform.startswith('linux'):
    !pip install pytorchvideo
else:
    need_pytorchvideo=False
    try:
        # Running notebook locally
        import pytorchvideo
    except ModuleNotFoundError:
        need_pytorchvideo=True
    if need_pytorchvideo:
        # Install from GitHub
        !pip install "git+https://github.com/facebookresearch/pytorchvideo.git"

In [8]:
from torchvision.transforms import Compose, Lambda
from torchvision.transforms._transforms_video import (
    CenterCropVideo,
    NormalizeVideo,
)
from pytorchvideo.data.encoded_video import EncodedVideo
from pytorchvideo.transforms import (
    ApplyTransformToKey,
    ShortSideScale,
    UniformTemporalSubsample,
    UniformCropVideo
) 
from typing import Dict
import time
import copy
import torch
import numpy as np

In [9]:
# Download the example video file
!wget https://dl.fbaipublicfiles.com/pytorchvideo/projects/archery.mp4 

--2021-10-12 07:34:58--  https://dl.fbaipublicfiles.com/pytorchvideo/projects/archery.mp4
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 104.22.75.142, 172.67.9.4, 104.22.74.142, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|104.22.75.142|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 549197 (536K) [video/mp4]
Saving to: ‘archery.mp4.1’


2021-10-12 07:35:00 (613 KB/s) - ‘archery.mp4.1’ saved [549197/549197]



# 2. Video Preprocessing Class
Function: transform the video into a list of tensors that the model can handle.

By changing the initialized parameters, we can easily transform the video into different tensors for different models.

But in fact, I didn't fully understand the meaning of each parameter, so I just encapsulated them into the simple class and assigned the params default value as in the official sample code.

In [10]:
class PackPathway(torch.nn.Module):
    '''
    Directly copied from the official tutorial,
    it is a class to transform the video.
    '''
    def __init__(self, alpha = 4):
        super().__init__()
        self.alpha = alpha

    def forward(self, frames: torch.Tensor):
        fast_pathway = frames
        # Perform temporal sampling from the fast pathway.
        slow_pathway = torch.index_select(
            frames,
            1,
            torch.linspace(
                0, frames.shape[1] - 1, frames.shape[1] // self.alpha
            ).long(),
        )
        frame_list = [slow_pathway, fast_pathway]
        return frame_list


class Preprocess():
    ####################
    # SlowFast transform
    ####################
    '''
    A class for video transform, I have encapsulated the responsibility
    of data preprocess into this class.
    You can meet the requirements of each model by changing the parameters.
    In fact, I didn't fully understand the meaning of each parameter, through I have tried.
    so I just give them default value as in the official sample code.
    '''
    def __init__(
            self,
            video_path,
            device = "cpu",
            side_size = 256,
            mean=[0.45, 0.45, 0.45],
            std = [0.225, 0.225, 0.225],
            crop_size = 256,
            num_frames = 32,
            sampling_rate = 2,
            frames_per_second = 30,

    ):
        self.video_path = video_path
        self.device = device
        self.side_size = side_size
        self.mean = mean
        self.std = std
        self.crop_size = crop_size
        self.num_frames = num_frames
        self.sampling_rate = sampling_rate
        self.frames_per_second = frames_per_second

        self.clip_duration = self.get_clip_duration()


    def transform(self):
        '''
        Define a video transformation pipeline, 
        I have no experience in the Video Process field before,
        so I don't fully understand the transformation details.
        :return:
        '''
        trans = ApplyTransformToKey(
            key="video",
            transform=Compose(
                [
                    UniformTemporalSubsample(self.num_frames),
                    Lambda(lambda x: x / 255.0),
                    NormalizeVideo(self.mean, self.std),
                    ShortSideScale(
                        size=self.side_size
                    ),
                    CenterCropVideo(self.crop_size),
                    PackPathway()
                ]
            ),
        )
        return trans

    def get_clip_duration(self):
        '''
        According to the official tutorial, this part may change with
        the specific model, so I made it a separate method.
        :return: the clip duration.
        '''
        return (self.num_frames * self.sampling_rate) / self.frames_per_second

    def get_processed_data(self):
        '''
        Process the video into a list of tensors.
        :return: the tensors.
        '''
        start_sec = 0
        end_sec = start_sec + self.clip_duration

        # Initialize an EncodedVideo helper class
        video = EncodedVideo.from_path(self.video_path)

        # Load the desired clip
        video_data = video.get_clip(start_sec=start_sec, end_sec=end_sec)

        # Apply a transform to normalize the video input
        video_data = self.transform()(video_data)

        # Move the inputs to the desired device
        inputs = video_data["video"]
        inputs = [i.to(self.device)[None, ...] for i in inputs]

        return inputs


# 3. Benchmarking Script Class
Function: load a model, run and evaluate its inference performance

By running the same sample data repeatly, we can record its single and total running time, which can help us get statistical results of latencies and throughput.

We can easily load different pre-trained model form torchhub by assigning different model names when initializing the class.


In [11]:
class BenchmarkingScript():
    '''
    A class for running the model inference with metrics testing. User can
    call the the method to run and test the model and print the tested
    latency and throughput.
    '''
    def __init__(
            self,
            # video_path,
            preprocess_class,
            running_times,
            device,
            model_source,
            model_name,
            pretrained: bool = True
    ):
        '''
        # :param video_path: the path of the video which you want to infer repeatly.
        # :param preprocess_class: the class name that you want to preprocess the data.
        :param running_times: times you want to run.
        :param device: device to run the model.
        :param model_source: the models are loaded from torchhub, you should give the source and name of the model.
        :param model_name: the name of the model you want to load.
        :param pretrained: default is True.
        '''
        # self.video_path = video_path
        self.preprocess_class = preprocess_class
        self.running_times = running_times
        self.device = device

        self.model_source = model_source
        self.model_name = model_name
        self.pretrained = pretrained

        self.throughput = 0
        self.latency_list = []

        self.processed_data = self.preprocess()
        self.model = self.load_model()

    def preprocess(self):
        '''
        For different models, we need different preprocessing classes,
        we can just pass different
        :param preprocess_class: you can pass different preprocess class for different goals.
        :return: the processed data
        '''
        return self.preprocess_class.get_processed_data()


    def load_model(self):
        '''
        The model will load from the torchhub, you can just pass the source and name of the model,
        and it will be loaded automaticly.
        :return: the model loaded from torchhub
        '''
        tempmodel = torch.hub.load(self.model_source, model=self.model_name, pretrained=self.pretrained)
        return tempmodel

    def run_script(self):
        '''
        The main function for running the model with metrics testing.
        :return: Nothing
        '''

        # move the model to desired device and turn to eval mode.
        self.model = self.model.to(self.device)
        self.model = self.model.eval()

        #warmup, discard the first few running data
        print("Start warming up!")
        for i in range(10):
            temp_data = copy.deepcopy(self.processed_data)
            self.model(temp_data)
            print(f'\tWarming up for {i+1} times')
        print("Warm up is over!")

        # start to infer!
        ful_start_time = time.time()
        for i in range(self.running_times):
            one_start_time = time.time()
            temp_data = copy.deepcopy(self.processed_data)
            self.model(temp_data)
            one_end_time = time.time()
            one_time = one_end_time - one_start_time
            self.latency_list.append(one_time)
            print(f'times:{i} latency:{one_time}')

        ful_end_time = time.time()
        ful_time = ful_end_time - ful_start_time

        self.throughput = self.running_times / ful_time

        p50_latency = np.percentile(self.latency_list, 50)
        p95_latency = np.percentile(self.latency_list, 95)
        p99_latency = np.percentile(self.latency_list, 99)

        print("==================================")
        print(f'Full running time: {ful_time}')
        print(f'throughput: {self.throughput} times/sec')
        print(f'p50_latency: {p50_latency}')
        print(f'p95_latency: {p95_latency}')
        print(f'p99_latency: {p99_latency}')
        print("==================================")


In [12]:
pre = Preprocess("archery.mp4")

sc = BenchmarkingScript(
    preprocess_class=pre,
    running_times=100,
    device="cpu",
    model_source="facebookresearch/pytorchvideo:main",
    model_name="slowfast_r50"
)
sc.run_script()

Downloading: "https://github.com/facebookresearch/pytorchvideo/archive/main.zip" to /root/.cache/torch/hub/main.zip
Downloading: "https://dl.fbaipublicfiles.com/pytorchvideo/model_zoo/kinetics/SLOWFAST_8x8_R50.pyth" to /root/.cache/torch/hub/checkpoints/SLOWFAST_8x8_R50.pyth


  0%|          | 0.00/264M [00:00<?, ?B/s]

Start warming up!
	Warming up for 1 times
	Warming up for 2 times
	Warming up for 3 times
	Warming up for 4 times
	Warming up for 5 times
	Warming up for 6 times
	Warming up for 7 times
	Warming up for 8 times
	Warming up for 9 times
	Warming up for 10 times
Warm up is over!
times:0 latency:5.71503472328186
times:1 latency:5.731809377670288
times:2 latency:5.726421594619751
times:3 latency:5.702890396118164
times:4 latency:5.767206192016602
times:5 latency:5.7390525341033936
times:6 latency:5.765338897705078
times:7 latency:5.677489757537842
times:8 latency:5.661489248275757
times:9 latency:5.705151319503784
times:10 latency:5.700153112411499
times:11 latency:5.640721321105957
times:12 latency:5.7169880867004395
times:13 latency:5.722662448883057
times:14 latency:5.691630125045776
times:15 latency:5.6977620124816895
times:16 latency:5.656634569168091
times:17 latency:5.688710451126099
times:18 latency:5.7059266567230225
times:19 latency:5.7385094165802
times:20 latency:5.73963713645935