# Video evaluation

In [11]:
from pathlib import Path
import pyprojroot
dir_proj = pyprojroot.here()
print("Project directory:", dir_proj)

import json
import pandas as pd
import numpy as np

Project directory: /home/ck37/projects/ecig-vaping


## Review validation set videos

In [3]:
#dataset_version = "complete-in-review-2519" # Aug. 4, 2022
#dataset_version = "all-data-3258-items" # Oct. 24, 2022
dataset_version = "all-data-3287-items" # Oct. 24, 2022
dataset_name = "training-data"
dataset_id = "annotatevape/" + dataset_name + ":" + dataset_version
print("Dataset id:", dataset_id)

Dataset id: annotatevape/training-data:all-data-3287-items


In [4]:
# TODO: created a symlink within the project.
dir_base = Path("/home/ck37/.darwin/datasets/annotatevape/" + dataset_name + "/releases/")
dir_labels = dir_base / dataset_version

!ls {dir_labels}

annotations  lists


In [5]:
dir_videos = Path("/home/ck37/.darwin/datasets/annotatevape/" + dataset_name + "/images/")
print("Video directory:", dir_videos)

# Extract the filenames for the mp4s in our images directory.
files_videos = [video.stem for video in dir_videos.glob('*.mp4')]
print(f"mp4s found ({len(files_videos)}):")
print(", ".join([video for video in files_videos]))

Video directory: /home/ck37/.darwin/datasets/annotatevape/training-data/images
mp4s found (50):


In [6]:
files_videos

['#12',
 '#5',
 '#28',
 '#16',
 'several_pods_2',
 'box_mod_ejuice',
 'eliquid',
 'multiple_pods',
 '#26',
 '#14',
 '#23',
 '#4',
 'pods_clouds2',
 '#6',
 '#13',
 'mod_ejuice',
 'pod_clouds',
 '#17',
 '#21',
 '#8',
 '#19',
 '#25',
 '#27',
 '#22',
 '#18',
 'box_pod_mod',
 '#3',
 '#7',
 '#10',
 '#29',
 'several_pods',
 'pod_cloud',
 '#30',
 '#20',
 'two_pods',
 'pods_clouds',
 'boxes_pod_clouds',
 '#2',
 'box_pod',
 '#24',
 '#9',
 '#15',
 'pods',
 'ejuices',
 'multiplepods_clouds',
 'ejuice_cloud_mod',
 '#1',
 'boxes',
 '#11']

In [7]:
# Identify which videos we have annotations for.
dir_annotations = dir_labels / "annotations"

# Extract all of the annotations we currently have for this dataset version.
all_annotations = [file.stem for file in dir_annotations.glob('*.json')]

# The intersection of the annotaitons and video files lists will give us the correct set of videos/annotations.
annotated_videos = list(set(files_videos) & set(all_annotations))
print(f'Found {len(annotated_videos)} annotated videos:\n', annotated_videos)

Found 50 annotated videos:


In [12]:

video_dfs = {}

# Object classes to analyze
#target_class = ('mod', 'pod', 'e-juice', 'box', 'smoke cloud')
target_class = ('mod', 'pod', 'e-juice', 'box', 'smoke cloud', 'e-cigarette brand name', 'warning label nicotine', 'e-juice flavor')
#('synthetic nicotine label', )

# Loop over each video and collect metadata
for video in annotated_videos:
    print("Analyzing", video)
    # Tally the annotation statistics for each video
    annotation_file = dir_annotations / str(video + ".json")
    
    if not annotation_file.is_file():
        print("Could not find annotation for", video)
        continue
    
    # Import the annotation json.
    ann = json.load(open(annotation_file, 'r'))
    
    filename = ann['image']['filename']
    
    frame_count = ann['image']['frame_count']
    print("Filename:", filename, "Frame count:", frame_count)
    
    # Define the columns that we want to track
    df_columns = (
        'video',
        'frame'
    ) + target_class
    
    # Create a dataframe to store the frame counts for each object.
    video_df = \
        pd.DataFrame(0,
                     index = np.arange(frame_count),
                     columns = df_columns)
    
    video_df['frame'] = np.arange(frame_count)
    video_df['video'] = video

#    print(video_df)
            
    annotation_len = len(ann["annotations"])
    print("Annotation len:", annotation_len)
    
    if 'exclude' in ann['annotations']:
        print("Skipping this excluded video")
        continue
        
    if filename == "multiple_pods.mp4":
        #print(ann['annotations'])
        print(annotation_file)
    
    # Each annotation element has a certain frame range
    for ann_i in ann['annotations']:
        
        # Skip if this is not a relevant annotation
        if not ann_i['name'] in target_class:
            continue
        
        # Unclear why the above isn't triggered this correctly.
        if ann_i['name'] == 'exclude':
            print("Skipping an exclusion")
            continue
        
        segments = ann_i['segments']
        #print(ann_i['name'], "Segments:", segments)
        
        # Loop over each annotated frame
        for frame_i, val in ann_i['frames'].items():
            if ann_i['name'] == 'exclude':
                print("Frame i:", frame_i)
                print("Ann i:\n", ann_i)
            new_val = video_df.at[int(frame_i), ann_i['name']] + 1
            #print("Frame i", frame_i, "Val:", new_val)
            # Increment the count of the object type for that frame.
            video_df.at[int(frame_i), ann_i['name']] = new_val
    
    print()
    video_dfs[video] = video_df
    #for 

# Apply the model to each video frame
# Analyze the accurate of the model predictions

Analyzing two_pods
Filename: two_pods.mp4 Frame count: 99
Annotation len: 15

Analyzing boxes
Filename: boxes.mp4 Frame count: 68
Annotation len: 7

Analyzing #9
Filename: #9.mp4 Frame count: 157
Annotation len: 16

Analyzing #16
Filename: #16.mp4 Frame count: 198
Annotation len: 25

Analyzing #15
Filename: #15.mp4 Frame count: 108
Annotation len: 3

Analyzing #11
Filename: #11.mp4 Frame count: 93
Annotation len: 26

Analyzing #2
Filename: #2.mp4 Frame count: 130
Annotation len: 36

Analyzing #23
Filename: #23.mp4 Frame count: 435
Annotation len: 29

Analyzing #13
Filename: #13.mp4 Frame count: 204
Annotation len: 29

Analyzing mod_ejuice
Filename: mod_ejuice.mp4 Frame count: 217
Annotation len: 45

Analyzing box_pod_mod
Filename: box_pod_mod.mp4 Frame count: 107
Annotation len: 18

Analyzing #25
Filename: #25.mp4 Frame count: 111
Annotation len: 21

Analyzing #22
Filename: #22.mp4 Frame count: 154
Annotation len: 8

Analyzing #28
Filename: #28.mp4 Frame count: 64
Annotation len: 11

A

In [13]:
video_dfs.keys()



In [14]:
video_dfs['several_pods']

Unnamed: 0,video,frame,mod,pod,e-juice,box,smoke cloud,e-cigarette brand name,warning label nicotine,e-juice flavor
0,several_pods,0,0,4,0,0,0,0,0,0
1,several_pods,1,0,4,0,0,0,0,0,0
2,several_pods,2,0,5,0,0,0,0,0,0
3,several_pods,3,0,5,0,0,0,0,0,0
4,several_pods,4,0,5,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
69,several_pods,69,0,2,0,0,1,0,0,0
70,several_pods,70,0,2,0,0,1,0,0,0
71,several_pods,71,0,2,0,0,1,0,0,0
72,several_pods,72,0,3,0,0,1,0,0,0


In [15]:
df = pd.concat(video_dfs)

In [16]:
df.shape

(5768, 10)

In [26]:
# This object isn't being used anymore.
"""
types = ('mod',
        'pod',
        'e-juice',
        'box',
        'smoke cloud')
"""

In [17]:
df['num_objects'] = df[[*target_class]].sum(axis = 1)
df['any_object'] = df['num_objects'] > 0

In [18]:
df.head()

Unnamed: 0,Unnamed: 1,video,frame,mod,pod,e-juice,box,smoke cloud,e-cigarette brand name,warning label nicotine,e-juice flavor,num_objects,any_object
two_pods,0,two_pods,0,0,2,0,0,0,0,0,0,2,True
two_pods,1,two_pods,1,0,2,0,0,0,0,0,0,2,True
two_pods,2,two_pods,2,0,2,0,0,0,0,0,0,2,True
two_pods,3,two_pods,3,0,2,0,0,0,0,0,0,2,True
two_pods,4,two_pods,4,0,2,0,0,0,0,0,0,2,True


In [19]:
# Only 192 (3.3%) frames had no objects
df['any_object'].value_counts()

True     5576
False     192
Name: any_object, dtype: int64

In [30]:
5576 / 5768

0.9667128987517337

### Save summary statistics

In [20]:
df.to_parquet(str(dir_proj / "data/video-eval-summary-stats.parquet"), engine = "pyarrow")
# TODO: make a separate tables/ subdir somewhere.
df.to_excel(str(dir_proj / "data/video-eval-summary-stats.xlsx"))

## Apply model to videos

Here we have the option of scoring the whole video and then extracting specific frames, or just scoring the annotated frames.

It may be slightly preferable to score the full video and then analyze specific frames, because with that version we could potentially do smoothing or other post-processing leveraging the prediction information on frames that weren't annotated.file_checkpoint = 'work_dirs/' + model_name + "-" + timestr +'/latest.pth'


In [8]:
# Copied from mmdet-train.ipynb
model_name = "vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco"
dir_models = Path("../models/")

path_model = dir_models / str(model_name + ".py")
print("Model file:", path_model)
print("Find model:", path_model.is_file())
timestr = "20220523-105448"

file_checkpoint = dir_proj / ('notebooks/work_dirs/' + model_name + "-" + timestr +'/latest.pth')
print("Checkpoint file:", file_checkpoint)
print("Find checkpoint:", file_checkpoint.is_file())

Model file: ../models/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py
Find model: True
Checkpoint file: /home/ck37/projects/ecig-vaping/notebooks/work_dirs/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco-20220523-105448/latest.pth
Find checkpoint: True


In [33]:
# Modified from mmdet-train.ipynb
import os

def predict_video(video_name, video_dir,
                  dir_output = pyprojroot.here() / "data/detections/videos",
                  box_threshold = 0.4, verbose = False):
    print("Analyzing", video_name)

    video_file = video_dir / video_name
    
    if not os.path.exists(video_file):
        print("Could not find video file")
        raise Exception
            
    path_out = dir_output / Path(video_file).name
    if verbose:
        print("Output path:", path_out)
    
    # Default threshold is 0.3
#    !python {pyprojroot.here()}/external/mmdetection/demo/video_demo.py "{video_file}" \
    !python {pyprojroot.here()}/notebooks/video_demo-ck.py "{video_file}" \
        {path_model} \
        {file_checkpoint} \
        --out "{path_out}" \
        --save_result \
        --score-thr {box_threshold}

In [None]:
for video_name in files_videos:
    # Second argument is the probability threshold for showing a bounding box.
    predict_video(video_name + ".mp4", dir_videos, box_threshold = 0.4, verbose = True)

Analyzing #12.mp4
Output path: /home/ck37/projects/ecig-vaping/data/detections/videos/#12.mp4
load checkpoint from local path: /home/ck37/projects/ecig-vaping/notebooks/work_dirs/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco-20220523-105448/latest.pth
2022-10-24 16:15:22,216 - root - INFO - ModulatedDeformConvPack backbone.layer2.0.conv2 is upgraded to version 2.
2022-10-24 16:15:22,217 - root - INFO - ModulatedDeformConvPack backbone.layer2.1.conv2 is upgraded to version 2.
2022-10-24 16:15:22,218 - root - INFO - ModulatedDeformConvPack backbone.layer2.2.conv2 is upgraded to version 2.
2022-10-24 16:15:22,219 - root - INFO - ModulatedDeformConvPack backbone.layer2.3.conv2 is upgraded to version 2.
2022-10-24 16:15:22,220 - root - INFO - ModulatedDeformConvPack backbone.layer3.0.conv2 is upgraded to version 2.
2022-10-24 16:15:22,221 - root - INFO - ModulatedDeformConvPack backbone.layer3.1.conv2 is upgraded to version 2.
2022-10-24 16:15:22,222 - root - INFO - ModulatedDeformConvP

In [None]:
# Upload predicted to Google drive:
for video_name in files_videos:
    video_file = pyprojroot.here() / "data/detections/videos" / (video_name + ".mp4")
    if video_file.is_file():
        !rclone copy --progress {video_file} gdrive:deep_learning/detections/video-evaluation-paper
    else:
        print("Could not find prediction video for:", video_name)

## Apply model to individual annotated frames

This may be more accurate than trying to identify which annotated frame corresponds to which specific frame in a full video.

Each video will have a directory based on its filename that contains each annotated frame.

### Setup model for inference

In [9]:
from mmdet.apis import init_detector, inference_detector
import mmcv

# Specify the path to model config and checkpoint file
# This are specified above.

# Specify device to use - 2nd GPU here.
model = init_detector(str(path_model), str(file_checkpoint), device='cuda:0')

load checkpoint from local path: /home/ck37/projects/ecig-vaping/notebooks/work_dirs/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco-20220523-105448/latest.pth


2023-05-02 07:44:31,845 - root - INFO - ModulatedDeformConvPack backbone.layer2.0.conv2 is upgraded to version 2.
2023-05-02 07:44:31,847 - root - INFO - ModulatedDeformConvPack backbone.layer2.1.conv2 is upgraded to version 2.
2023-05-02 07:44:31,848 - root - INFO - ModulatedDeformConvPack backbone.layer2.2.conv2 is upgraded to version 2.
2023-05-02 07:44:31,849 - root - INFO - ModulatedDeformConvPack backbone.layer2.3.conv2 is upgraded to version 2.
2023-05-02 07:44:31,851 - root - INFO - ModulatedDeformConvPack backbone.layer3.0.conv2 is upgraded to version 2.
2023-05-02 07:44:31,853 - root - INFO - ModulatedDeformConvPack backbone.layer3.1.conv2 is upgraded to version 2.
2023-05-02 07:44:31,855 - root - INFO - ModulatedDeformConvPack backbone.layer3.2.conv2 is upgraded to version 2.
2023-05-02 07:44:31,856 - root - INFO - ModulatedDeformConvPack backbone.layer3.3.conv2 is upgraded to version 2.
2023-05-02 07:44:31,858 - root - INFO - ModulatedDeformConvPack backbone.layer3.4.conv2 

### Inference on frames

In [25]:
from tqdm import tqdm
import pickle

# Run on all images
extensions = {".png", ".jpg"}

path_out = pyprojroot.here() / "data/detections/videos"

video_dfs = {}
detection_threshold = 0.4

#for video_name in files_videos[:2]:
for video_name in files_videos:
    print("Analyzing", video_name)
    # See if we can find the directory with annotated frames.
    video_frame_dir = dir_videos / video_name
    if video_frame_dir.is_dir():
        
        # Identify the frame images in the video's directory.
        files = [p for p in video_frame_dir.glob("*") if p.suffix in extensions]

        print(f"Found {len(files)} files to analyze.")

        det_results = {}
        
        # Define the columns that we want to track
        """df_columns = {
                'video',
                'frame',
                'mod',
                'pod',
                'e-juice',
                'box',
                'smoke cloud'
        }"""
        df_columns = (
                'video',
                'frame',                
            ) + target_class
        
        
        # Create a dataframe to store the frame counts for each object.
        video_df = \
            pd.DataFrame(0,
                         index = np.arange(len(files)),
                         columns = df_columns)

        video_df['frame'] = np.arange(len(files))
        video_df['video'] = video_name 
        
        # Loop over the frame files
        for frame_i in tqdm(files):
            result = inference_detector(model, frame_i)
            #det_results[file_i.stem] = result
            
            # Each element is one of the 9 classes.
            # Each subelement is a bounding box for that class, along with classification probability.
            # The specific class ordering is specified in the model config file (e.g. models/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py)
            # Current class order:
            classes = ('box', 'e-cigarette brand name', 'e-juice', 'e-juice flavor', 'mod', 'pod', 'smoke cloud', 'synthetic nicotine label', 'warning label nicotine')
            
            # These are the object detections that we want to count
            """pred_types = ('mod',
                          'pod',
                          'e-juice',
                          'box',
                          'smoke cloud')
            """
            pred_types = target_class
            
            # Defined much higher in the script.
            pred_types = target_class
            
            # Loop over each prediction type that we care about and increment the count if the probability exceeds the threshold
            for pred_type_i in pred_types:
                # Find the location of this object type in the model result object.
                pred_index = classes.index(pred_type_i)
                
                # Count the number of predictions for this object that exceed the probability threshold for detection.
                found_count = len([1 for pred_i in result[pred_index] if pred_i[4] > detection_threshold])
                video_df.at[int(frame_i.stem), pred_type_i] = found_count

        with open((path_out / video_name).with_suffix('.pkl'), 'wb') as file:
            pickle.dump(det_results, file, protocol = pickle.HIGHEST_PROTOCOL)
        
        # Save the count dataframe.
        video_df.to_parquet((path_out / video_name).with_suffix('.parquet'))
        video_dfs[video_name] = video_df
    
    else:
        print("Could not find annotated frame directory for:", video_name)
        
# Combine all predictions into a single dataframe.
pred_df = pd.concat(video_dfs)
pred_df.to_parquet(path_out / "video-eval-preds.parquet")

Analyzing #12
Could not find annotated frame directory for: #12
Analyzing #5
Could not find annotated frame directory for: #5
Analyzing #28
Could not find annotated frame directory for: #28
Analyzing #16
Could not find annotated frame directory for: #16
Analyzing several_pods_2
Found 24 files to analyze.


100%|███████████████████████████████████████████████████| 24/24 [00:05<00:00,  4.40it/s]


Analyzing box_mod_ejuice
Found 72 files to analyze.


100%|███████████████████████████████████████████████████| 72/72 [00:12<00:00,  5.57it/s]


Analyzing eliquid
Found 42 files to analyze.


100%|███████████████████████████████████████████████████| 42/42 [00:09<00:00,  4.35it/s]


Analyzing multiple_pods
Could not find annotated frame directory for: multiple_pods
Analyzing #26
Could not find annotated frame directory for: #26
Analyzing #14
Could not find annotated frame directory for: #14
Analyzing #23
Could not find annotated frame directory for: #23
Analyzing #4
Could not find annotated frame directory for: #4
Analyzing pods_clouds2
Found 80 files to analyze.


100%|███████████████████████████████████████████████████| 80/80 [00:13<00:00,  6.11it/s]


Analyzing #6
Could not find annotated frame directory for: #6
Analyzing #13
Could not find annotated frame directory for: #13
Analyzing mod_ejuice
Found 217 files to analyze.


100%|█████████████████████████████████████████████████| 217/217 [00:35<00:00,  6.15it/s]


Analyzing pod_clouds
Found 90 files to analyze.


100%|███████████████████████████████████████████████████| 90/90 [00:21<00:00,  4.28it/s]


Analyzing #17
Could not find annotated frame directory for: #17
Analyzing #21
Could not find annotated frame directory for: #21
Analyzing #8
Could not find annotated frame directory for: #8
Analyzing #19
Could not find annotated frame directory for: #19
Analyzing #25
Could not find annotated frame directory for: #25
Analyzing #27
Could not find annotated frame directory for: #27
Analyzing #22
Could not find annotated frame directory for: #22
Analyzing #18
Could not find annotated frame directory for: #18
Analyzing box_pod_mod
Found 107 files to analyze.


100%|█████████████████████████████████████████████████| 107/107 [00:20<00:00,  5.10it/s]


Analyzing #3
Could not find annotated frame directory for: #3
Analyzing #7
Could not find annotated frame directory for: #7
Analyzing #10
Could not find annotated frame directory for: #10
Found 69 files to analyze.


100%|███████████████████████████████████████████████████| 69/69 [00:16<00:00,  4.17it/s]


Analyzing #29
Could not find annotated frame directory for: #29
Analyzing several_pods
Found 74 files to analyze.


100%|███████████████████████████████████████████████████| 74/74 [00:17<00:00,  4.24it/s]


Analyzing pod_cloud
Found 72 files to analyze.


100%|███████████████████████████████████████████████████| 72/72 [00:17<00:00,  4.21it/s]


Analyzing #30
Could not find annotated frame directory for: #30
Analyzing #20
Could not find annotated frame directory for: #20
Analyzing two_pods
Found 99 files to analyze.


100%|███████████████████████████████████████████████████| 99/99 [00:23<00:00,  4.30it/s]


Analyzing pods_clouds
Found 46 files to analyze.


100%|███████████████████████████████████████████████████| 46/46 [00:10<00:00,  4.26it/s]


Analyzing boxes_pod_clouds
Found 74 files to analyze.


100%|███████████████████████████████████████████████████| 74/74 [00:14<00:00,  5.02it/s]


Analyzing #2
Could not find annotated frame directory for: #2
Analyzing box_pod
Found 72 files to analyze.


100%|███████████████████████████████████████████████████| 72/72 [00:15<00:00,  4.52it/s]


Analyzing #24
Could not find annotated frame directory for: #24
Analyzing #9
Could not find annotated frame directory for: #9
Analyzing #15
Could not find annotated frame directory for: #15
Analyzing pods
Found 67 files to analyze.


100%|███████████████████████████████████████████████████| 67/67 [00:13<00:00,  4.95it/s]


Analyzing ejuices
Found 93 files to analyze.


100%|███████████████████████████████████████████████████| 93/93 [00:18<00:00,  4.95it/s]


Analyzing multiplepods_clouds
Found 82 files to analyze.


100%|███████████████████████████████████████████████████| 82/82 [00:19<00:00,  4.16it/s]


Analyzing ejuice_cloud_mod
Found 61 files to analyze.


100%|███████████████████████████████████████████████████| 61/61 [00:14<00:00,  4.21it/s]


Analyzing #1
Could not find annotated frame directory for: #1
Analyzing boxes
Found 68 files to analyze.


100%|███████████████████████████████████████████████████| 68/68 [00:16<00:00,  4.15it/s]

Analyzing #11
Could not find annotated frame directory for: #11





In [26]:
pred_df.head()

Unnamed: 0,Unnamed: 1,video,frame,mod,pod,e-juice,box,smoke cloud,e-cigarette brand name,warning label nicotine,e-juice flavor
several_pods_2,0,several_pods_2,0,0,0,1,1,0,1,0,1
several_pods_2,1,several_pods_2,1,0,1,1,0,0,1,0,1
several_pods_2,2,several_pods_2,2,0,1,1,0,0,2,0,1
several_pods_2,3,several_pods_2,3,0,1,1,0,0,4,0,0
several_pods_2,4,several_pods_2,4,0,1,1,0,0,3,0,0


## Compare predictions to annotations

In [27]:
# Join annotated df to predicted df
df2 = df.join(pred_df, on = ("video", "frame"), rsuffix = "_pred")

In [28]:
df2.head()
df2.to_excel(path_out / "video-eval-comparison.xlsx")

In [30]:
#pred_types = ('mod', 'pod', 'e-juice', 'box', 'smoke cloud')
pred_types = target_class
# For each object, calculate the absolute error for each frame

result_df = pd.DataFrame(0, index = [0], columns = pred_types)
            
for pred_type_i in pred_types:
    # |actual - predicted|
    mae = np.mean(np.abs(df2[pred_type_i].values - df2[pred_type_i + "_pred"]))
    result_df.at[0, pred_type_i] = mae

In [31]:
result_df

Unnamed: 0,mod,pod,e-juice,box,smoke cloud,e-cigarette brand name,warning label nicotine,e-juice flavor
0,0.246521,0.316103,0.115971,0.114645,0.535454,0.455268,0.180252,0.292247


In [32]:
# Then calculate the final summary stats.
df['num_objects_pred'] = df[[*pred_types]].sum(axis = 1)
df['any_object_pred'] = df['num_objects_pred'] > 0

In [33]:
# Of frames with any object, what percentage did the model flag as having any object?
print(np.mean(df.loc[df.any_object].any_object_pred.values))
# Of frames without an object, what percentage did the model correctly flag as not having an object?
print(np.mean(~ df.loc[~df.any_object.values].any_object_pred.values))

1.0
1.0


In [34]:
# 192 frames with no objects.
print(df.loc[~ df.any_object.values].shape)
df.loc[~ df.any_object.values]

(192, 14)


Unnamed: 0,Unnamed: 1,video,frame,mod,pod,e-juice,box,smoke cloud,e-cigarette brand name,warning label nicotine,e-juice flavor,num_objects,any_object,num_objects_pred,any_object_pred
boxes,25,boxes,25,0,0,0,0,0,0,0,0,0,False,0,False
boxes,26,boxes,26,0,0,0,0,0,0,0,0,0,False,0,False
boxes,27,boxes,27,0,0,0,0,0,0,0,0,0,False,0,False
boxes,28,boxes,28,0,0,0,0,0,0,0,0,0,False,0,False
boxes,29,boxes,29,0,0,0,0,0,0,0,0,0,False,0,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
#27,30,#27,30,0,0,0,0,0,0,0,0,0,False,0,False
box_mod_ejuice,44,box_mod_ejuice,44,0,0,0,0,0,0,0,0,0,False,0,False
#24,52,#24,52,0,0,0,0,0,0,0,0,0,False,0,False
#12,103,#12,103,0,0,0,0,0,0,0,0,0,False,0,False


In [62]:
# 1462 frames with one or more objects.
print(df.loc[df.any_object.values].shape)
print(df.loc[df.any_object.values].any_object_pred.value_counts())
df.loc[df.any_object.values]

(1462, 11)
True    1462
Name: any_object_pred, dtype: int64


Unnamed: 0,Unnamed: 1,mod,video,e-juice,frame,smoke cloud,box,pod,num_objects,any_object,num_objects_pred,any_object_pred
several_pods_2,0,0,several_pods_2,0,0,1,0,2,3,True,3,True
several_pods_2,1,0,several_pods_2,0,1,1,0,2,3,True,3,True
several_pods_2,2,0,several_pods_2,0,2,1,0,2,3,True,3,True
several_pods_2,3,0,several_pods_2,0,3,1,0,2,3,True,3,True
several_pods_2,4,0,several_pods_2,0,4,1,0,2,3,True,3,True
...,...,...,...,...,...,...,...,...,...,...,...,...
ejuice_cloud_mod,55,1,ejuice_cloud_mod,1,55,1,0,0,3,True,3,True
ejuice_cloud_mod,56,1,ejuice_cloud_mod,1,56,1,0,0,3,True,3,True
ejuice_cloud_mod,57,1,ejuice_cloud_mod,1,57,1,0,0,3,True,3,True
ejuice_cloud_mod,58,1,ejuice_cloud_mod,1,58,1,0,0,3,True,3,True


## Extract object probabilities

May 2023

In [20]:
from tqdm import tqdm
import pickle

# Run on all images
extensions = {".png", ".jpg"}

path_out = pyprojroot.here() / "data/detections/videos"

video_dfs = {}

for video_name in files_videos:
    print("Analyzing", video_name)
    # See if we can find the directory with annotated frames.
    video_frame_dir = dir_videos / video_name
    if video_frame_dir.is_dir():
        
        # Identify the frame images in the video's directory.
        files = [p for p in video_frame_dir.glob("*") if p.suffix in extensions]

        print(f"Found {len(files)} files to analyze.")
     
        all_frames = []
        
        # Loop over the frame files
        for frame_i in tqdm(files):
            result = inference_detector(model, frame_i)
            
            # Create a dataframe for each video frame
                            
            #det_results[file_i.stem] = result
            
            # Each element is one of the 9 classes.
            # Each subelement is a bounding box for that class, along with classification probability.
            # The specific class ordering is specified in the model config file (e.g. models/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py)
            # Current class order:
            classes = ('box', 'e-cigarette brand name', 'e-juice', 'e-juice flavor', 'mod', 'pod', 'smoke cloud', 'synthetic nicotine label', 'warning label nicotine')
                        
            # Defined much higher in the script.
            pred_types = classes

            frame_results = []
            
            # Loop over each prediction type that we care about and increment the count if the probability exceeds the threshold
            for pred_type_i in pred_types:
                # Find the location of this object type in the model result object.
                pred_index = classes.index(pred_type_i)
                
                for pred_i in result[pred_index]:
                    frame_results.append({'frame': frame_i.stem,
                                          'class': pred_type_i,
                                          'prob': pred_i[4]})
            
            frame_df = pd.DataFrame(frame_results)
            all_frames.append(frame_df)
            
        video_df = pd.concat(all_frames)
        video_df['video'] = video_name
        
        print(video_df)

        # Save the prob dataframe.
        out_name = (path_out / str(video_name + "-probs")).with_suffix('.parquet')
        print(out_name)
        video_df.to_parquet(out_name)
        video_dfs[video_name] = video_df
    
    else:
        print("Could not find annotated frame directory for:", video_name)
        
# Combine all predictions into a single dataframe.
pred_df = pd.concat(video_dfs)
pred_df.to_parquet(path_out / "video-eval-preds-probs.parquet")

Analyzing #12
Could not find annotated frame directory for: #12
Analyzing #5
Could not find annotated frame directory for: #5
Analyzing #28
Could not find annotated frame directory for: #28
Analyzing #16
Could not find annotated frame directory for: #16
Analyzing several_pods_2
Found 24 files to analyze.


100%|█████████████████████████████████████████████| 24/24 [00:03<00:00,  6.77it/s]


      frame                   class      prob           video
0   0000010  e-cigarette brand name  0.750092  several_pods_2
1   0000010  e-cigarette brand name  0.691061  several_pods_2
2   0000010  e-cigarette brand name  0.636141  several_pods_2
3   0000010  e-cigarette brand name  0.211906  several_pods_2
4   0000010  e-cigarette brand name  0.098541  several_pods_2
..      ...                     ...       ...             ...
35  0000012             smoke cloud  0.057404  several_pods_2
36  0000012             smoke cloud  0.051671  several_pods_2

[718 rows x 4 columns]
/home/ck37/projects/ecig-vaping/data/detections/videos/several_pods_2-probs.parquet
Analyzing box_mod_ejuice
Found 72 files to analyze.


100%|█████████████████████████████████████████████| 72/72 [00:10<00:00,  6.57it/s]


      frame                     class      prob           video
0   0000046    e-cigarette brand name  0.109331  box_mod_ejuice
1   0000046    e-cigarette brand name  0.097620  box_mod_ejuice
2   0000046            e-juice flavor  0.115360  box_mod_ejuice
3   0000046                       mod  0.090411  box_mod_ejuice
4   0000046                       pod  0.304536  box_mod_ejuice
..      ...                       ...       ...             ...
34  0000012  synthetic nicotine label  0.092339  box_mod_ejuice

[1207 rows x 4 columns]
/home/ck37/projects/ecig-vaping/data/detections/videos/box_mod_ejuice-probs.parquet
Analyzing eliquid
Found 42 files to analyze.


100%|█████████████████████████████████████████████| 42/42 [00:06<00:00,  6.48it/s]


      frame                   class      prob    video
0   0000010                     box  0.077247  eliquid
1   0000010                     box  0.072433  eliquid
2   0000010                     box  0.066793  eliquid
3   0000010                     box  0.062283  eliquid
4   0000010  e-cigarette brand name  0.948444  eliquid
..      ...                     ...       ...      ...
31  0000012             smoke cloud  0.066606  eliquid
32  0000012             smoke cloud  0.054961  eliquid

[2308 rows x 4 columns]
/home/ck37/projects/ecig-vaping/data/detections/videos/eliquid-probs.parquet
Analyzing multiple_pods
Could not find annotated frame directory for: multiple_pods
Analyzing #26
Could not find annotated frame directory for: #26
Analyzing #14
Could not find annotated frame directory for: #14
Analyzing #23
Could not find annotated frame directory for: #23
Analyzing #4
Could not find annotated frame directory for: #4
Analyzing pods_clouds2
Found 80 files to analyze.


100%|█████████████████████████████████████████████| 80/80 [00:08<00:00,  9.21it/s]


      frame                   class      prob         video
0   0000046                     box  0.083265  pods_clouds2
1   0000046  e-cigarette brand name  0.754923  pods_clouds2
2   0000046  e-cigarette brand name  0.575860  pods_clouds2
3   0000046  e-cigarette brand name  0.066804  pods_clouds2
4   0000046  e-cigarette brand name  0.051275  pods_clouds2
..      ...                     ...       ...           ...
6   0000012                     pod  0.928048  pods_clouds2
7   0000012                     pod  0.710884  pods_clouds2
8   0000012                     pod  0.258356  pods_clouds2
9   0000012             smoke cloud  0.125992  pods_clouds2

[870 rows x 4 columns]
/home/ck37/projects/ecig-vaping/data/detections/videos/pods_clouds2-probs.parquet
Analyzing #6
Could not find annotated frame directory for: #6
Analyzing #13
Could not find annotated frame directory for: #13
Analyzing mod_ejuice
Found 217 files to analyze.


100%|███████████████████████████████████████████| 217/217 [00:22<00:00,  9.44it/s]


      frame           class      prob       video
0   0000084             box  0.057671  mod_ejuice
1   0000084         e-juice  0.055176  mod_ejuice
2   0000084  e-juice flavor  0.051542  mod_ejuice
3   0000084             pod  0.056248  mod_ejuice
4   0000084     smoke cloud  0.974895  mod_ejuice
..      ...             ...       ...         ...
21  0000204     smoke cloud  0.057680  mod_ejuice
22  0000204     smoke cloud  0.057613  mod_ejuice
23  0000204     smoke cloud  0.057119  mod_ejuice
24  0000204     smoke cloud  0.052634  mod_ejuice
25  0000204     smoke cloud  0.051472  mod_ejuice

[4838 rows x 4 columns]
/home/ck37/projects/ecig-vaping/data/detections/videos/mod_ejuice-probs.parquet
Analyzing pod_clouds
Found 90 files to analyze.


100%|█████████████████████████████████████████████| 90/90 [00:14<00:00,  6.33it/s]


      frame                   class      prob       video
0   0000084  e-cigarette brand name  0.470845  pod_clouds
1   0000084             smoke cloud  0.378580  pod_clouds
2   0000084             smoke cloud  0.373920  pod_clouds
3   0000084             smoke cloud  0.278477  pod_clouds
4   0000084             smoke cloud  0.158923  pod_clouds
..      ...                     ...       ...         ...
2   0000012                     pod  0.690014  pod_clouds
3   0000012                     pod  0.077722  pod_clouds
4   0000012             smoke cloud  0.246774  pod_clouds
5   0000012             smoke cloud  0.129949  pod_clouds
6   0000012             smoke cloud  0.065095  pod_clouds

[705 rows x 4 columns]
/home/ck37/projects/ecig-vaping/data/detections/videos/pod_clouds-probs.parquet
Analyzing #17
Could not find annotated frame directory for: #17
Analyzing #21
Could not find annotated frame directory for: #21
Analyzing #8
Could not find annotated frame directory for: #8
Analyzing 

100%|███████████████████████████████████████████| 107/107 [00:16<00:00,  6.40it/s]


      frame           class      prob        video
0   0000084     smoke cloud  0.242866  box_pod_mod
0   0000082  e-juice flavor  0.055200  box_pod_mod
1   0000082             mod  0.091853  box_pod_mod
2   0000082             mod  0.069526  box_pod_mod
3   0000082             mod  0.064669  box_pod_mod
..      ...             ...       ...          ...
16  0000052     smoke cloud  0.062020  box_pod_mod
17  0000052     smoke cloud  0.054647  box_pod_mod
0   0000038     smoke cloud  0.431432  box_pod_mod
1   0000038     smoke cloud  0.101710  box_pod_mod
0   0000012     smoke cloud  0.076385  box_pod_mod

[1409 rows x 4 columns]
/home/ck37/projects/ecig-vaping/data/detections/videos/box_pod_mod-probs.parquet
Analyzing #3
Could not find annotated frame directory for: #3
Analyzing #7
Could not find annotated frame directory for: #7
Analyzing #10
Could not find annotated frame directory for: #10
Found 69 files to analyze.


100%|█████████████████████████████████████████████| 69/69 [00:10<00:00,  6.35it/s]


      frame                   class      prob                         video
..      ...                     ...       ...                           ...

[1426 rows x 4 columns]
Analyzing #29
Could not find annotated frame directory for: #29
Analyzing several_pods
Found 74 files to analyze.


100%|█████████████████████████████████████████████| 74/74 [00:11<00:00,  6.34it/s]


      frame                   class      prob         video
0   0000046  e-cigarette brand name  0.910001  several_pods
1   0000046  e-cigarette brand name  0.881372  several_pods
2   0000046  e-cigarette brand name  0.810369  several_pods
3   0000046  e-cigarette brand name  0.442867  several_pods
4   0000046  e-cigarette brand name  0.251418  several_pods
..      ...                     ...       ...           ...
45  0000012             smoke cloud  0.064706  several_pods
46  0000012             smoke cloud  0.051780  several_pods

[3480 rows x 4 columns]
/home/ck37/projects/ecig-vaping/data/detections/videos/several_pods-probs.parquet
Analyzing pod_cloud
Found 72 files to analyze.


100%|█████████████████████████████████████████████| 72/72 [00:11<00:00,  6.35it/s]


      frame                   class      prob      video
0   0000046  e-cigarette brand name  0.104449  pod_cloud
1   0000046                 e-juice  0.163682  pod_cloud
2   0000046          e-juice flavor  0.104529  pod_cloud
3   0000046                     mod  0.250031  pod_cloud
4   0000046                     pod  0.776279  pod_cloud
..      ...                     ...       ...        ...
4   0000012                     pod  0.092586  pod_cloud
5   0000012                     pod  0.089928  pod_cloud
6   0000012                     pod  0.073492  pod_cloud
7   0000012                     pod  0.055236  pod_cloud
8   0000012             smoke cloud  0.091358  pod_cloud

[563 rows x 4 columns]
/home/ck37/projects/ecig-vaping/data/detections/videos/pod_cloud-probs.parquet
Analyzing #30
Could not find annotated frame directory for: #30
Analyzing #20
Could not find annotated frame directory for: #20
Analyzing two_pods
Found 99 files to analyze.


100%|█████████████████████████████████████████████| 99/99 [00:15<00:00,  6.33it/s]


      frame                   class      prob     video
0   0000084  e-cigarette brand name  0.436142  two_pods
1   0000084                     mod  0.599833  two_pods
2   0000084             smoke cloud  0.228208  two_pods
3   0000084             smoke cloud  0.167089  two_pods
4   0000084             smoke cloud  0.101439  two_pods
..      ...                     ...       ...       ...
10  0000012                     mod  0.105729  two_pods
11  0000012                     pod  0.930727  two_pods
12  0000012                     pod  0.889222  two_pods
13  0000012                     pod  0.085004  two_pods
14  0000012                     pod  0.056615  two_pods

[1476 rows x 4 columns]
/home/ck37/projects/ecig-vaping/data/detections/videos/two_pods-probs.parquet
Analyzing pods_clouds
Found 46 files to analyze.


100%|█████████████████████████████████████████████| 46/46 [00:07<00:00,  6.19it/s]


      frame                   class      prob        video
0   0000010  e-cigarette brand name  0.086060  pods_clouds
1   0000010  e-cigarette brand name  0.072312  pods_clouds
2   0000010  e-cigarette brand name  0.067264  pods_clouds
3   0000010  e-cigarette brand name  0.058780  pods_clouds
4   0000010          e-juice flavor  0.096818  pods_clouds
..      ...                     ...       ...          ...
13  0000012             smoke cloud  0.123441  pods_clouds
14  0000012             smoke cloud  0.064934  pods_clouds
15  0000012             smoke cloud  0.061073  pods_clouds

[823 rows x 4 columns]
/home/ck37/projects/ecig-vaping/data/detections/videos/pods_clouds-probs.parquet
Analyzing boxes_pod_clouds
Found 74 files to analyze.


100%|█████████████████████████████████████████████| 74/74 [00:11<00:00,  6.21it/s]


      frame                   class      prob             video
0   0000046             smoke cloud  0.069915  boxes_pod_clouds
0   0000010  e-cigarette brand name  0.061201  boxes_pod_clouds
1   0000010                 e-juice  0.058082  boxes_pod_clouds
..      ...                     ...       ...               ...
6   0000012             smoke cloud  0.082951  boxes_pod_clouds
7   0000012             smoke cloud  0.077692  boxes_pod_clouds
8   0000012             smoke cloud  0.072456  boxes_pod_clouds
9   0000012             smoke cloud  0.071366  boxes_pod_clouds
10  0000012             smoke cloud  0.055352  boxes_pod_clouds

[2471 rows x 4 columns]
/home/ck37/projects/ecig-vaping/data/detections/videos/boxes_pod_clouds-probs.parquet
Analyzing #2
Could not find annotated frame directory for: #2
Analyzing box_pod
Found 72 files to analyze.


100%|█████████████████████████████████████████████| 72/72 [00:11<00:00,  6.12it/s]


      frame                   class      prob    video
0   0000046                     pod  0.053489  box_pod
1   0000046             smoke cloud  0.939779  box_pod
2   0000046             smoke cloud  0.298145  box_pod
3   0000046             smoke cloud  0.223535  box_pod
4   0000046             smoke cloud  0.156514  box_pod
..      ...                     ...       ...      ...
13  0000012                     pod  0.065617  box_pod
14  0000012                     pod  0.062318  box_pod

[1459 rows x 4 columns]
/home/ck37/projects/ecig-vaping/data/detections/videos/box_pod-probs.parquet
Analyzing #24
Could not find annotated frame directory for: #24
Analyzing #9
Could not find annotated frame directory for: #9
Analyzing #15
Could not find annotated frame directory for: #15
Analyzing pods
Found 67 files to analyze.


100%|█████████████████████████████████████████████| 67/67 [00:08<00:00,  7.71it/s]


      frame                   class      prob video
0   0000046  e-cigarette brand name  0.053282  pods
1   0000046                 e-juice  0.220324  pods
2   0000046          e-juice flavor  0.151849  pods
3   0000046          e-juice flavor  0.087645  pods
4   0000046                     mod  0.567800  pods
..      ...                     ...       ...   ...
6   0000012                     pod  0.887309  pods
7   0000012                     pod  0.815313  pods
8   0000012                     pod  0.075041  pods
9   0000012             smoke cloud  0.287493  pods
10  0000012             smoke cloud  0.059366  pods

[630 rows x 4 columns]
/home/ck37/projects/ecig-vaping/data/detections/videos/pods-probs.parquet
Analyzing ejuices
Found 93 files to analyze.


100%|█████████████████████████████████████████████| 93/93 [00:12<00:00,  7.58it/s]


      frame        class      prob    video
0   0000084          box  0.364664  ejuices
1   0000084          box  0.262176  ejuices
2   0000084          box  0.159500  ejuices
3   0000084          box  0.142693  ejuices
4   0000084          box  0.104856  ejuices
..      ...          ...       ...      ...
32  0000012          pod  0.076117  ejuices
33  0000012          pod  0.067190  ejuices
34  0000012          pod  0.052167  ejuices
35  0000012  smoke cloud  0.065435  ejuices
36  0000012  smoke cloud  0.060141  ejuices

[4628 rows x 4 columns]
/home/ck37/projects/ecig-vaping/data/detections/videos/ejuices-probs.parquet
Analyzing multiplepods_clouds
Found 82 files to analyze.


100%|█████████████████████████████████████████████| 82/82 [00:13<00:00,  6.01it/s]


      frame        class      prob                video
0   0000046      e-juice  0.054797  multiplepods_clouds
1   0000046          mod  0.143012  multiplepods_clouds
2   0000046          mod  0.141727  multiplepods_clouds
3   0000046          pod  0.576364  multiplepods_clouds
4   0000046          pod  0.366702  multiplepods_clouds
..      ...          ...       ...                  ...
3   0000012          pod  0.150330  multiplepods_clouds
4   0000012          pod  0.050726  multiplepods_clouds
5   0000012  smoke cloud  0.062139  multiplepods_clouds
6   0000012  smoke cloud  0.056302  multiplepods_clouds
7   0000012  smoke cloud  0.051946  multiplepods_clouds

[931 rows x 4 columns]
/home/ck37/projects/ecig-vaping/data/detections/videos/multiplepods_clouds-probs.parquet
Analyzing ejuice_cloud_mod
Found 61 files to analyze.


100%|█████████████████████████████████████████████| 61/61 [00:10<00:00,  6.07it/s]


      frame                   class      prob             video
0   0000046             smoke cloud  0.054684  ejuice_cloud_mod
0   0000010  e-cigarette brand name  0.054002  ejuice_cloud_mod
1   0000010          e-juice flavor  0.073053  ejuice_cloud_mod
2   0000010             smoke cloud  0.981999  ejuice_cloud_mod
3   0000010             smoke cloud  0.165922  ejuice_cloud_mod
..      ...                     ...       ...               ...
4   0000012                     pod  0.076711  ejuice_cloud_mod
5   0000012             smoke cloud  0.959120  ejuice_cloud_mod
6   0000012             smoke cloud  0.283607  ejuice_cloud_mod
7   0000012             smoke cloud  0.088931  ejuice_cloud_mod
8   0000012             smoke cloud  0.068241  ejuice_cloud_mod

[1169 rows x 4 columns]
/home/ck37/projects/ecig-vaping/data/detections/videos/ejuice_cloud_mod-probs.parquet
Analyzing #1
Could not find annotated frame directory for: #1
Analyzing boxes
Found 68 files to analyze.


100%|█████████████████████████████████████████████| 68/68 [00:11<00:00,  5.95it/s]

      frame                     class      prob  video
0   0000046                       box  0.305452  boxes
1   0000046                       box  0.057641  boxes
2   0000046    e-cigarette brand name  0.149133  boxes
3   0000046    e-cigarette brand name  0.102404  boxes
4   0000046    e-cigarette brand name  0.078932  boxes
..      ...                       ...       ...    ...
29  0000012               smoke cloud  0.055094  boxes
30  0000012  synthetic nicotine label  0.062537  boxes

[1325 rows x 4 columns]
/home/ck37/projects/ecig-vaping/data/detections/videos/boxes-probs.parquet
Analyzing #11
Could not find annotated frame directory for: #11



