<a href="https://colab.research.google.com/github/crazyCoderLi/benchmark_problem/blob/main/benchmark.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

准备工作：
导入各种包

In [1]:
try:
    import torch
except ModuleNotFoundError:
    !pip install torch torchvision
    import os
    import sys
    import torch
    
if torch.__version__=='1.6.0+cu101' and sys.platform.startswith('linux'):
    !pip install pytorchvideo
else:
    need_pytorchvideo=False
    try:
        # Running notebook locally
        import pytorchvideo
    except ModuleNotFoundError:
        need_pytorchvideo=True
    if need_pytorchvideo:
        # Install from GitHub
        !pip install "git+https://github.com/facebookresearch/pytorchvideo.git"

Collecting git+https://github.com/facebookresearch/pytorchvideo.git
  Cloning https://github.com/facebookresearch/pytorchvideo.git to /tmp/pip-req-build-sd31wxk0
  Running command git clone -q https://github.com/facebookresearch/pytorchvideo.git /tmp/pip-req-build-sd31wxk0
Collecting fvcore
  Downloading fvcore-0.1.5.post20210924.tar.gz (49 kB)
[K     |████████████████████████████████| 49 kB 2.7 MB/s 
[?25hCollecting av
  Downloading av-8.0.3-cp37-cp37m-manylinux2010_x86_64.whl (37.2 MB)
[K     |████████████████████████████████| 37.2 MB 32 kB/s 
[?25hCollecting parameterized
  Downloading parameterized-0.8.1-py2.py3-none-any.whl (26 kB)
Collecting iopath
  Downloading iopath-0.1.9-py3-none-any.whl (27 kB)
Collecting yacs>=0.1.6
  Downloading yacs-0.1.8-py3-none-any.whl (14 kB)
Collecting pyyaml>=5.1
  Downloading PyYAML-5.4.1-cp37-cp37m-manylinux1_x86_64.whl (636 kB)
[K     |████████████████████████████████| 636 kB 34.4 MB/s 
Collecting portalocker
  Downloading portalocker-2.3.2-

In [2]:
import json 
from torchvision.transforms import Compose, Lambda
from torchvision.transforms._transforms_video import (
    CenterCropVideo,
    NormalizeVideo,
)
from pytorchvideo.data.encoded_video import EncodedVideo
from pytorchvideo.transforms import (
    ApplyTransformToKey,
    ShortSideScale,
    UniformTemporalSubsample,
    UniformCropVideo
) 
from typing import Dict

  "The _functional_video module is deprecated. Please use the functional module instead."
  "The _transforms_video module is deprecated. Please use the transforms module instead."


下载示例视频

In [3]:
# Download the example video file
!wget https://dl.fbaipublicfiles.com/pytorchvideo/projects/archery.mp4 

--2021-10-11 09:23:14--  https://dl.fbaipublicfiles.com/pytorchvideo/projects/archery.mp4
Resolving dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)... 172.67.9.4, 104.22.75.142, 104.22.74.142, ...
Connecting to dl.fbaipublicfiles.com (dl.fbaipublicfiles.com)|172.67.9.4|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 549197 (536K) [video/mp4]
Saving to: ‘archery.mp4’


2021-10-11 09:23:15 (1.86 MB/s) - ‘archery.mp4’ saved [549197/549197]



视频预处理类

In [4]:
class PackPathway(torch.nn.Module):
    def __init__(self, alpha = 4):
        super().__init__()
        self.alpha = alpha

    def forward(self, frames: torch.Tensor):
        fast_pathway = frames
        # Perform temporal sampling from the fast pathway.
        slow_pathway = torch.index_select(
            frames,
            1,
            torch.linspace(
                0, frames.shape[1] - 1, frames.shape[1] // self.alpha
            ).long(),
        )
        frame_list = [slow_pathway, fast_pathway]
        return frame_list


class Preprocess():
    ####################
    # SlowFast transform
    ####################
    def __init__(
            self,
            video_path,
            device = "cuda",
            side_size = 256,
            mean=[0.45, 0.45, 0.45],
            std = [0.225, 0.225, 0.225],
            crop_size = 256,
            num_frames = 32,
            sampling_rate = 2,
            frames_per_second = 30,

    ):
        self.video_path = video_path
        self.device = device
        self.side_size = side_size
        self.mean = mean
        self.std = std
        self.crop_size = crop_size
        self.num_frames = num_frames
        self.sampling_rate = sampling_rate
        self.frames_per_second = frames_per_second

        self.clip_duration = self.get_clip_duration()


    def transform(self):
        trans = ApplyTransformToKey(
            key="video",
            transform=Compose(
                [
                    UniformTemporalSubsample(self.num_frames),
                    Lambda(lambda x: x / 255.0),
                    NormalizeVideo(self.mean, self.std),
                    ShortSideScale(
                        size=self.side_size
                    ),
                    CenterCropVideo(self.crop_size),
                    PackPathway()
                ]
            ),
        )
        return trans

    def get_clip_duration(self):
        return (self.num_frames * self.sampling_rate) / self.frames_per_second

    def get_processed_data(self):
        start_sec = 0
        end_sec = start_sec + self.clip_duration

        # Initialize an EncodedVideo helper class
        video = EncodedVideo.from_path(self.video_path)

        # Load the desired clip
        video_data = video.get_clip(start_sec=start_sec, end_sec=end_sec)

        # Apply a transform to normalize the video input
        video_data = self.transform()(video_data)

        # Move the inputs to the desired device
        inputs = video_data["video"]
        inputs = [i.to(self.device)[None, ...] for i in inputs]

        return inputs

benchmark脚本类

In [5]:
import time
import copy
import torch
import numpy as np



class BenchmarkingScript():
    def __init__(
            self,
            unit_data,
            running_times,
            device,
            model_source,
            model_name,
            pretrained: bool = True
    ):
        self.unit_data = unit_data
        self.running_times = running_times
        self.device = device

        self.model_source = model_source
        self.model_name = model_name
        self.pretrained = pretrained

        self.throughput = 0
        self.latency_list = []

        self.processed_data = self.preprocess(self.unit_data)
        self.model = self.load_model()

    def preprocess(self, raw_data):
        return raw_data


    def load_model(self):
        tempmodel = torch.hub.load(self.model_source, model=self.model_name, pretrained=self.pretrained)
        return tempmodel

    def run_script(self):
        self.model = self.model.to(self.device)
        self.model = self.model.eval()

        #warmup
        print("warming up!")
        for i in range(10):
            temp_data = copy.deepcopy(self.processed_data)
            self.model(temp_data)
            print(f'warming up down {i+1} times')
        print("warming up over!")

        ful_start_time = time.time()
        for i in range(self.running_times):
            one_start_time = time.time()
            temp_data = copy.deepcopy(self.processed_data)
            self.model(temp_data)
            one_end_time = time.time()
            one_time = one_end_time - one_start_time
            self.latency_list.append(one_time)
            print(f'times:{i} latency:{one_time}')

        ful_end_time = time.time()
        ful_time = ful_end_time - ful_start_time

        self.throughput = self.running_times / ful_time

        p50_latency = np.percentile(self.latency_list, 50)
        p95_latency = np.percentile(self.latency_list, 95)
        p99_latency = np.percentile(self.latency_list, 99)

        print(f'full running time: {ful_time: .4f}')
        print(f'throughput: {self.throughput: .4f}')
        print(f'p50_latency: {p50_latency}')
        print(f'p95_latency: {p95_latency}')
        print(f'p99_latency: {p99_latency}')

In [7]:
pre = Preprocess("archery.mp4")
inputs = pre.get_processed_data()
sc = BenchmarkingScript(unit_data=inputs, running_times=100, device="cuda",model_source="facebookresearch/pytorchvideo:main",model_name="slowfast_r50" )
sc.run_script()

Using cache found in /root/.cache/torch/hub/facebookresearch_pytorchvideo_main


warming up!
warming up down 1 times
warming up down 2 times
warming up down 3 times
warming up down 4 times
warming up down 5 times
warming up down 6 times
warming up down 7 times
warming up down 8 times
warming up down 9 times
warming up down 10 times
warming up over!
times:0 latency:0.18125486373901367
times:1 latency:0.1804673671722412
times:2 latency:0.18056440353393555
times:3 latency:0.17926931381225586
times:4 latency:0.17872214317321777
times:5 latency:0.17902660369873047
times:6 latency:0.1787407398223877
times:7 latency:0.1777198314666748
times:8 latency:0.1782841682434082
times:9 latency:0.17696189880371094
times:10 latency:0.17627429962158203
times:11 latency:0.17526555061340332
times:12 latency:0.17697620391845703
times:13 latency:0.17696833610534668
times:14 latency:0.17584538459777832
times:15 latency:0.17531752586364746
times:16 latency:0.17717957496643066
times:17 latency:0.17711544036865234
times:18 latency:0.1747446060180664
times:19 latency:0.17725706100463867
times