In [38]:
import torch, torchvision, av
video_path = "video_reader_benchmark/videos/R6llTwEh07w.mp4"
video = torch.classes.torchvision.Video(video_path, "video", True)

## 1. `next_tensor` vs `next_list`
testing that we get the same frame using both functions 

In [39]:
video = torch.classes.torchvision.Video(video_path, "video", True)
t1, _ = video.next_list("")
video = torch.classes.torchvision.Video(video_path, "video", True)
t2 = video.next_tensor("")
# first we assert that these two are the same tensors
assert torch.equal(t1, t2)

#### timing
comparing the timeits of both (unscientific; for benchmarking check [here](https://github.com/bjuncek/video_reader_benchmark/blob/bkorbar/newAPI/timeitcomp/Graph%20Results.ipynb))

In [40]:
%%timeit
video = torch.classes.torchvision.Video(video_path, "video", True)
_, _ = video.next_list("")

8.97 ms ± 4.06 ms per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [41]:
%%timeit
video = torch.classes.torchvision.Video(video_path, "video", True)
_ = video.next_tensor("")

7.02 ms ± 80.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## 2. returting the dummy tensor alone vs returining it in a list
the two returned tensors should be equal,
furthermore, they should be the same in size as the benchmark tensor from point 1

In [42]:
video = torch.classes.torchvision.Video(video_path, "video", True)
t3, _ = video.next_list_dummy_tensor("")
video = torch.classes.torchvision.Video(video_path, "video", True)
t4 = video.next_tensor_dummy_tensor("")

assert torch.equal(t3, t4)
assert t1.size() == t3.size()

#### timing
comparing the timeits of both (unscientific; for benchmarking check [here](https://github.com/bjuncek/video_reader_benchmark/blob/bkorbar/newAPI/timeitcomp/Graph%20Results.ipynb))

In [43]:
%%timeit
video = torch.classes.torchvision.Video(video_path, "video", True)
_, _ = video.next_list_dummy_tensor("")

7.02 ms ± 138 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [44]:
%%timeit
video = torch.classes.torchvision.Video(video_path, "video", True)
_ = video.next_tensor_dummy_tensor("")

7.1 ms ± 164 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## 3. Looped decoding: tensor vs list vs num_frames
we're checking for 2 assumptions:
1. two tensor lists will be identical
2. they will have the same number of frames compared to simply returning the decoded part

In [45]:
reader = torch.classes.torchvision.Video(video_path, "video", True)
f_1 = []
t, _ = reader.next_list("")
while t.numel() > 0:
    f_1.append(t)
    t, _ = reader.next_list("")

In [46]:
reader = torch.classes.torchvision.Video(video_path, "video", True)
f_2 = []
t = reader.next_tensor("")
while t.numel() > 0:
    f_2.append(t)
    t = reader.next_tensor("")

In [47]:
reader = torch.classes.torchvision.Video(video_path, "video", True)
i = reader.next_int_numframes("")
f_3 = []
while i == 1:
    i = reader.next_int_numframes("")
    f_3.append(i)

In [48]:
# condition 1
assert len(f_1) == len(f_2)
assert len(f_1) == len(f_3)

# condition 2
assert torch.equal(torch.stack(f_1, 0), torch.stack(f_2,0))

#### timing
comparing the timeits of both (unscientific; for benchmarking check [here](https://github.com/bjuncek/video_reader_benchmark/blob/bkorbar/newAPI/timeitcomp/Graph%20Results.ipynb))
Note that all functions are decoding, the only difference should be the return value

In [49]:
%%timeit
reader = torch.classes.torchvision.Video(video_path, "video", True)
f_1 = []
t, _ = reader.next_list("")
while t.numel() > 0:
    f_1.append(t)
    t, _ = reader.next_list("")

305 ms ± 9.63 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [50]:
%%timeit
reader = torch.classes.torchvision.Video(video_path, "video", True)
f_2 = []
t = reader.next_tensor("")
while t.numel() > 0:
    f_2.append(t)
    t = reader.next_tensor("")

311 ms ± 9.14 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [51]:
%%timeit
reader = torch.classes.torchvision.Video(video_path, "video", True)
i = reader.next_int_numframes("")
f_3 = []
while i == 1:
    i = reader.next_int_numframes("")
    f_3.append(i)

309 ms ± 4.12 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


## 3.2 comparing the speed of decoding a real tensor vs returing a dummy tensor
Note that both functions are decoding, the only difference (in theory) should be that `next_tensor` is filling the tensor with actual values, while
`next_tensor_dummy_tensor` is filling it with ones

In [52]:
reader = torch.classes.torchvision.Video(video_path, "video", True)
f_2 = []
t = reader.next_tensor("")
while t.numel() > 0:
    f_2.append(t)
    t = reader.next_tensor("")

reader = torch.classes.torchvision.Video(video_path, "video", True)
f_3 = []
t = reader.next_tensor_dummy_tensor("")
while t.numel() > 0:
    f_3.append(t)
    t = reader.next_tensor_dummy_tensor("")

assert len(f_2) == len(f_3)
print(len(f_2), len(f_3))

303 303


In [53]:
%%timeit
reader = torch.classes.torchvision.Video(video_path, "video", True)
f_2 = []
t = reader.next_tensor("")
while t.numel() > 0:
    f_2.append(t)
    t = reader.next_tensor("")

319 ms ± 1.72 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [54]:
%%timeit
reader = torch.classes.torchvision.Video(video_path, "video", True)
f_3 = []
t = reader.next_tensor_dummy_tensor("")
while t.numel() > 0:
    f_3.append(t)
    t = reader.next_tensor_dummy_tensor("")

304 ms ± 7.08 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


finally, the most interesting test,
## 3.3 comparing the difference between returing a dummy tensor and a dummy list

Note that both functions are decoding, the only difference (in theory) should be that `next_list_dummy_tensor` is returning the list with dummy tensors, and
`next_tensor_dummy_tensor` is filling it with ones

In [55]:
reader = torch.classes.torchvision.Video(video_path, "video", True)
f_2 = []
t = reader.next_tensor_dummy_tensor("")
while t.numel() > 0:
    f_2.append(t)
    t = reader.next_tensor_dummy_tensor("")
    
reader = torch.classes.torchvision.Video(video_path, "video", True)
f_3 = []
t, _ = reader.next_list_dummy_tensor("")
while t.numel() > 0:
    f_3.append(t)
    t, _ = reader.next_list_dummy_tensor("")

print(len(f_2), len(f_3))
assert len(f_2) == len(f_3)


303 303


In [56]:
%%timeit
reader = torch.classes.torchvision.Video(video_path, "video", True)
f_2 = []
t = reader.next_tensor_dummy_tensor("")
while t.numel() > 0:
    f_2.append(t)
    t = reader.next_tensor_dummy_tensor("")

321 ms ± 5.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [57]:
%%timeit
reader = torch.classes.torchvision.Video(video_path, "video", True)
f_2 = []
t, _ = reader.next_list_dummy_tensor("")
while t.numel() > 0:
    f_2.append(t)
    t, _ = reader.next_list_dummy_tensor("")

324 ms ± 2.09 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
