In [20]:
import pandas
import seaborn as sn
import matplotlib.pyplot as plt

In [21]:
df = pandas.read_csv("test_files.csv")
print(df)

                                          video_source  reader_sleep_seconds  \
0    data/strawberry/cam_3_S_video_20210312_123203....                     5   
1    data/strawberry/cam_3_S_video_20210312_123203....                     5   
2    data/strawberry/cam_4_N_video_20210309_132604....                     5   
3    data/strawberry/cam_4_N_video_20210317_114802....                     5   
4    data/strawberry/cam_3_S_video_20210312_123203....                     5   
..                                                 ...                   ...   
155  data/strawberry/cam_1_N_video_20210315_132804....                     5   
156  data/strawberry/cam_1_S_video_20210317_123004....                     5   
157  data/strawberry/cam_2_S_video_20210308_112402....                     5   
158  data/strawberry/cam_4_N_video_20210309_132604....                     5   
159  data/strawberry/cam_8_N_video_20210310_133844....                     5   

     reader_flush_proportion  downscale

In [22]:
df["processing_speed_pct"] = df["processing_duration_seconds"] / df["input_duration_seconds"]

In [23]:
df["output_filesize_pct"] = df["output_filesize_MB"] / df["input_filesize_MB"]


Now let's see all the unique downscale factors

In [24]:
print(df["downscale_factor"].unique())

[16  1]


We want the average filesize/processing speed reduction grouped by each of these downscale factors, so

In [25]:
downscale_groups = df.groupby("downscale_factor")
print(downscale_groups[["output_filesize_pct", "processing_speed_pct"]].mean())
print(downscale_groups[["output_filesize_pct", "processing_speed_pct"]].std())

                  output_filesize_pct  processing_speed_pct
downscale_factor                                           
1                            0.111787              1.068993
16                           0.054679              0.410763
                  output_filesize_pct  processing_speed_pct
downscale_factor                                           
1                            0.071555              0.099926
16                           0.021552              0.013474


Let's repeat this for dilation kernel size

In [26]:
dilation_groups = df.groupby("dilate_kernel_size")
print(dilation_groups[["output_filesize_pct", "processing_speed_pct"]].mean())
print(dilation_groups[["output_filesize_pct", "processing_speed_pct"]].std())

                    output_filesize_pct  processing_speed_pct
dilate_kernel_size                                           
41                             0.081410              0.730206
63                             0.085056              0.749550
                    output_filesize_pct  processing_speed_pct
dilate_kernel_size                                           
41                             0.059812              0.328586
63                             0.060429              0.348403


And for movement threshold. Presumably, a smaller threshold means more pixels are included in output file, so would expect that filesize is greater, but processing time should be basically the same I would imagine.

In [27]:
movement_thresh_groups = df.groupby("movement_threshold")
print(movement_thresh_groups[["output_filesize_pct", "processing_speed_pct"]].mean())
print(movement_thresh_groups[["output_filesize_pct", "processing_speed_pct"]].std())

                    output_filesize_pct  processing_speed_pct
movement_threshold                                           
40                             0.093074              0.753457
50                             0.073391              0.726299
                    output_filesize_pct  processing_speed_pct
movement_threshold                                           
40                             0.067743              0.348843
50                             0.049504              0.327838


Finally, let's check out the persist factor

In [28]:
persist_groups = df.groupby("persist_factor")
print(persist_groups[["output_filesize_pct", "processing_speed_pct"]].mean())
print(persist_groups[["output_filesize_pct", "processing_speed_pct"]].std())

                output_filesize_pct  processing_speed_pct
persist_factor                                           
0.65                       0.074011              0.722360
0.75                       0.092454              0.757396
                output_filesize_pct  processing_speed_pct
persist_factor                                           
0.65                       0.050237              0.324555
0.75                       0.067379              0.351547
