##### Import classes, libraries

In [53]:
%load_ext autoreload

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [54]:
%autoreload 2
import sys
sys.path.insert(0,'..')
from src.server.db_api import connect
from src.storage.petastorm_storage_engine import PetastormStorageEngine
from src.readers.opencv_reader import OpenCVReader
from src.catalog.models.df_metadata import DataFrameMetadata
from src.storage.abstract_storage_engine import AbstractStorageEngine
from src.storage.ffmpeg_storage_engine import FFmpegStorageEngine
from src.catalog.models.df_column import DataFrameColumn
from src.models.storage.batch import Batch
from src.catalog.column_type import ColumnType, NdArrayType
from src.readers.abstract_reader import AbstractReader
from src.readers.petastorm_reader import PetastormReader
from typing import Iterator
import os
import ffmpeg
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import time

In [3]:
one_min_video = "/home/akshay/Downloads/pallof.mp4"
thirty_sec_video = "/home/akshay/Downloads/30secseinfeld.mkv"

In [8]:
%%time
def writevideo(videoname, tablename, video_location, resolution):
    start = time.time()
    opencv_reader = OpenCVReader(video_location, 30000000)
    ffengine = FFmpegStorageEngine()
    table = DataFrameMetadata(tablename, videoname)
    table.resolution = resolution
    ffengine.create(table)
    ffengine.open_write(table)
    for batch in opencv_reader.read():
        ffengine.write(table, batch)
    ffengine.close_write(tablename)
    end = time.time()
    
    return end - start

CPU times: user 13 µs, sys: 4 µs, total: 17 µs
Wall time: 32.4 µs


In [15]:
video_intervals = [1, 2, 3, 4, 6, 8, 10, 12, 14, 16]
video_locations = ["/home/akshay/Downloads/evavideos/" + str(x) + ".mp4" for x in video_intervals]
video_names = [str(x) for x in video_intervals]
table_names = ["table_" + str(x) for x in video_intervals]

In [16]:
writevideo("2.mp4", "table_2", video_locations[1], (1920, 1080))

9.682677984237671

In [17]:
write_times = []
for interval, location, name, table_name in list(zip(video_intervals, video_locations, video_names, table_names)):
    print(interval, location, name, table_name)
    write_times.append(writevideo(name, table_name, location, (1920, 1080)))

1 /home/akshay/Downloads/evavideos/1.mp4 1 table_1
2 /home/akshay/Downloads/evavideos/2.mp4 2 table_2
3 /home/akshay/Downloads/evavideos/3.mp4 3 table_3
4 /home/akshay/Downloads/evavideos/4.mp4 4 table_4
6 /home/akshay/Downloads/evavideos/6.mp4 6 table_6
8 /home/akshay/Downloads/evavideos/8.mp4 8 table_8
10 /home/akshay/Downloads/evavideos/10.mp4 10 table_10
12 /home/akshay/Downloads/evavideos/12.mp4 12 table_12
14 /home/akshay/Downloads/evavideos/14.mp4 14 table_14
16 /home/akshay/Downloads/evavideos/16.mp4 16 table_16


In [20]:
for time, write_time in zip(video_intervals, write_times):
    print(f"{time} {write_time}")

1 5.300134897232056
2 7.122283220291138
3 9.79416823387146
4 15.646122932434082
6 21.651527404785156
8 27.97468852996826
10 31.666693449020386
12 41.123889446258545
14 48.69524073600769
16 64.28311681747437


In [90]:
ffengine = FFmpegStorageEngine()

## Stress Test, Consecutive Sequence

In [30]:
def sequence(start, end, step, table):
    
    results = []
    
    for i in range(start, end, step):
        start_time = time.time()
        
        ffengine.open_read(table, ("between", (0, i)))
        for images in ffengine.read(table):
            image_arrays = images.frames['data'].values
        ffengine.close_read(table)
        
        end_time = time.time()
        
        results.append(end_time - start_time)
    
    return results

In [31]:
start, end, step = 0, 1000, 50

def do_test(start, end, step, testmethod, warmup_rounds, experiment_rounds, table):
    n = len(range(start, end, step))

    result_sum = [0] * n

    ffengine.process = None

    for i in range(warmup_rounds):
        testmethod(start, end, step, table)

    for i in range(experiment_rounds):
        test_i_results = testmethod(start, end, step, table)
        result_sum = [result_sum[i] + test_i_results[i] for i in range(n)]

    average_result = [x / experiment_rounds for x in result_sum]

    return average_result

In [34]:
table = DataFrameMetadata("table_16", "16")
table.resolution = (1920, 1080)
results = do_test(0, 500, 50, sequence, 1, 2, table)
print(results)

['ffmpeg', '-i', 'ffmpeg_data/16.mp4', '-filter_complex', '[0]select=between(n\\,0\\,0)[s0]', '-map', '[s0]', '-f', 'rawvideo', '-pix_fmt', 'rgb24', '-s', '1920x1080', '-vsync', '0', 'pipe:']
['ffmpeg', '-i', 'ffmpeg_data/16.mp4', '-filter_complex', '[0]select=between(n\\,0\\,50)[s0]', '-map', '[s0]', '-f', 'rawvideo', '-pix_fmt', 'rgb24', '-s', '1920x1080', '-vsync', '0', 'pipe:']
['ffmpeg', '-i', 'ffmpeg_data/16.mp4', '-filter_complex', '[0]select=between(n\\,0\\,100)[s0]', '-map', '[s0]', '-f', 'rawvideo', '-pix_fmt', 'rgb24', '-s', '1920x1080', '-vsync', '0', 'pipe:']
['ffmpeg', '-i', 'ffmpeg_data/16.mp4', '-filter_complex', '[0]select=between(n\\,0\\,150)[s0]', '-map', '[s0]', '-f', 'rawvideo', '-pix_fmt', 'rgb24', '-s', '1920x1080', '-vsync', '0', 'pipe:']
['ffmpeg', '-i', 'ffmpeg_data/16.mp4', '-filter_complex', '[0]select=between(n\\,0\\,200)[s0]', '-map', '[s0]', '-f', 'rawvideo', '-pix_fmt', 'rgb24', '-s', '1920x1080', '-vsync', '0', 'pipe:']
['ffmpeg', '-i', 'ffmpeg_data/16.

In [35]:
for frames, time in zip(range(0, 500, 50), results):
    print(f"{frames} {time}")

0 4.481938481330872
50 5.480599880218506
100 6.383861780166626
150 7.387832999229431
200 8.422945618629456
250 9.34721291065216
300 11.187633275985718
350 11.349470496177673
400 12.35720431804657
450 13.424412488937378


In [12]:
n_frames = int(ffmpeg.probe(ffengine._file_url(table))['streams'][0]['nb_frames'])

## Stress Test, Random N Frames, with replacement

In [56]:
import random
import time


In [52]:
random_results = do_test(50, 500, 50, random_n, 1, 2, table)
for frames, rtime in zip(range(0, 500, 50), random_results):
    print(f"{frames} {rtime}")

50 50
100 100
150 150
200 200
250 250
300 300
350 350
400 400
450 450
50 50
100 100
150 150
200 200
250 250
300 300
350 350
400 400
450 450
50 50
100 100
150 150
200 200
250 250
300 300
350 350
400 400
450 450
0 7.712672829627991
50 6.65441370010376
100 7.360141396522522
150 8.54240095615387
200 9.016069054603577
250 9.82991111278534
300 11.331424593925476
350 14.278607368469238
400 13.918431639671326


In [83]:
video_intervals = [1, 2, 3, 4, 6, 8, 10, 12, 14, 16]
video_locations = ["/home/akshay/Downloads/evavideos/" + str(x) + ".mp4" for x in video_intervals]
video_names = [str(x) for x in video_intervals]
table_names = ["table_" + str(x) for x in video_intervals]

In [96]:
def video_test_point_query():
    results = []
    number_of_frames = []
    for video, table_name in zip(video_names, table_names):
        table = DataFrameMetadata(table_name, video)
        #print(ffengine._file_url(table))
        n_frames = int(ffmpeg.probe(ffengine._file_url(table) + ".mp4")['streams'][0]['nb_frames'])
        print(n_frames)
        frame = random.randrange(n_frames)
        table.resolution = (1920, 1280)
        start_time = time.time()
        ffengine.open_read(table, ("random", [frame]))
        for images in ffengine.read(table):
            image_arrays = images.frames['data'].values
        ffengine.close_read(table)
        end_time = time.time()    
        number_of_frames.append(n_frames)
        results.append(end_time - start_time)
    return number_of_frames, results

In [97]:
sum_results = [0] * len(video_intervals)
print(sum_results)
frames, results_across_videos = video_test()
for i in range(3):
    _, results_i = video_test()
    sum_results = [sum_results[i] + results_i[i] for i in range(len(video_intervals))]
    
avg_results = [x / 3 for x in sum_results]
print(avg_results)

[0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
27
60
89
142
202
266
308
371
423
500
27
60
89
142
202
266
308
371
423
500
27
60
89
142
202
266
308
371
423
500
27
60
89
142
202
266
308
371
423
500
[0.4550052483876546, 0.7183191776275635, 0.941396951675415, 1.5469048817952473, 2.0835627714792886, 2.599161942799886, 2.9492828051249185, 3.4326114654541016, 3.898787260055542, 4.691850105921428]


In [99]:
print(results_across_results)
frame_counts = [x[0] for x in results_across_results]

[(27, 0.6908631324768066), (60, 0.7625288963317871), (89, 0.9750816822052002), (142, 1.5520527362823486), (202, 2.125415325164795), (266, 2.568614959716797), (308, 2.91607403755188), (371, 3.6731321811676025), (423, 4.455996751785278), (500, 4.980330228805542)]


In [100]:
for frames, avg_time in zip(frame_counts, avg_results):
    print(f"{frames},{avg_time}")

27,0.4550052483876546
60,0.7183191776275635
89,0.941396951675415
142,1.5469048817952473
202,2.0835627714792886
266,2.599161942799886
308,2.9492828051249185
371,3.4326114654541016
423,3.898787260055542
500,4.691850105921428


In [103]:
def video_test_range_query():
    results = []
    number_of_frames = []
    for video, table_name in zip(video_names, table_names):
        table = DataFrameMetadata(table_name, video)
        #print(ffengine._file_url(table))
        n_frames = int(ffmpeg.probe(ffengine._file_url(table) + ".mp4")['streams'][0]['nb_frames'])
        print(n_frames)
        frame = random.randrange(n_frames)
        table.resolution = (1920, 1280)
        start_time = time.time()
        ffengine.open_read(table, ("between", (frame, frame + 10)))
        for images in ffengine.read(table):
            image_arrays = images.frames['data'].values
        ffengine.close_read(table)
        end_time = time.time()    
        number_of_frames.append(n_frames)
        results.append(end_time - start_time)
    return number_of_frames, results

In [104]:
sum_results_range = [0] * len(video_intervals)
for i in range(3):
    _, results_i = video_test_range_query()
    sum_results_range = [sum_results_range[i] + results_i[i] for i in range(len(video_intervals))]
    
avg_results_range = [x / 3 for x in sum_results]
print(avg_results_range)

27
60
89
142
202
266
308
371
423
500
27
60
89
142
202
266
308
371
423
500
27
60
89
142
202
266
308
371
423
500
[0.4550052483876546, 0.7183191776275635, 0.941396951675415, 1.5469048817952473, 2.0835627714792886, 2.599161942799886, 2.9492828051249185, 3.4326114654541016, 3.898787260055542, 4.691850105921428]


In [106]:
avg_results_range = [x / 3 for x in sum_results_range]

for frames, avg_time in zip(frame_counts, avg_results_range):
    print(f"{frames},{avg_time}")

27,1.040590524673462
60,1.2731739679972331
89,1.4530449708302815
142,2.000558296839396
202,2.712224324544271
266,3.692084868748983
308,3.9504953225453696
371,4.230886061986287
423,4.654622395833333
500,5.613044500350952
