# Apply models to new videos

Jan. 2023

This is derived from video-evaluation.ipynb

In [1]:
from pathlib import Path
import pyprojroot
dir_proj = pyprojroot.here()
print("Project directory:", dir_proj)

Project directory: /home/ck37/projects/ecig-vaping


In [2]:
# Video list

dir_videos = Path("/vape/collection/appended_scrape_download/TikTok/Influencers/tot_coding")
print("Video directory:", dir_videos)

# Not using this code currently:
"""
# Extract the filenames for the mp4s in our target directory.
files_videos = [video.stem for video in dir_videos.glob('**/*.mp4')]
print(f"mp4s found ({len(files_videos)}):")
#print(", ".join([video for video in files_videos]))
""";

Video directory: /vape/collection/appended_scrape_download/TikTok/Influencers/tot_coding


In [3]:
import pandas as pd

In [4]:
file_video_list = "../analytic_sample_17361_20192022.csv"
video_df = pd.read_csv(dir_videos / file_video_list)
video_df.info()
print(video_df.head())
print(video_df.video_path.values[0])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17361 entries, 0 to 17360
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Unnamed: 0         17361 non-null  int64 
 1   infl_username      17361 non-null  object
 2   number             17361 non-null  int64 
 3   video_path         17361 non-null  object
 4   video_shortcode    17361 non-null  object
 5   year               17361 non-null  int64 
 6   year_recent        17361 non-null  int64 
 7   date_tot           17361 non-null  object
 8   n_videos_per_infl  17361 non-null  int64 
dtypes: int64(5), object(4)
memory usage: 1.2+ MB
   Unnamed: 0 infl_username  number  \
0           1   _auliarmdhn       2   
1           2   _auliarmdhn       3   
2           3   _auliarmdhn       4   
3           4   _auliarmdhn       5   
4           5   _auliarmdhn       6   

                                          video_path  \
0  /home/juliav/vape/collection/ap

In [5]:
# Fix video paths (or skip)

# Model setup

In [5]:
# Copied from mmdet-train.ipynb and video-evaluation.ipynb
model_name = "vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco"
dir_models = dir_proj / "models"

path_model = dir_models / str(model_name + ".py")
print("Model file:", path_model)
print("Find model:", path_model.is_file())
timestr = "20220523-105448"

file_checkpoint = dir_proj / ('notebooks/work_dirs/' + model_name + "-" + timestr +'/latest.pth')
print("Checkpoint file:", file_checkpoint)
print("Find checkpoint:", file_checkpoint.is_file())

Model file: /home/ck37/projects/ecig-vaping/models/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco.py
Find model: True
Checkpoint file: /home/ck37/projects/ecig-vaping/notebooks/work_dirs/vfnet_x101_64x4d_fpn_mdconv_c3-c5_mstrain_2x_coco-20220523-105448/latest.pth
Find checkpoint: True


In [6]:
# Modified from video-evaluation.ipynb
import os

def predict_video(video_name, video_dir,
                  dir_output = pyprojroot.here() / "data/detections/videos",
                  box_threshold = 0.4,
                  overwrite = False,
                  verbose = False):

    video_file = video_dir / video_name
    
    if not os.path.exists(video_file):
        print("Could not find video file")
        raise Exception
    
    # This will contain the .mp4 file extension.
    path_out = dir_output / Path(video_file).name
    
    if path_out.is_file() and not overwrite:
        print("Skipping file - output already exists. ", Path(video_file.name))
        return
    
    if verbose:
        print("Analyzing", video_name)

    
    if verbose:
        print("Output path:", path_out)
    
    # This will create an output mp4 and an output pkl (to be analyzed).
    # Default threshold is 0.3
    !python {pyprojroot.here()}/notebooks/video_demo-ck.py "{video_file}" \
        {path_model} \
        {file_checkpoint} \
        --out "{path_out}" \
        --save_result \
        --score-thr {box_threshold}

In [7]:
dir_output = pyprojroot.here() / "data/detections/" / Path(file_video_list).stem
print("Analysis output dir:", dir_output)
if not os.path.exists(dir_output):
    os.makedirs(dir_output)
print("Exists:", os.path.exists(dir_output))

Analysis output dir: /home/ck37/projects/ecig-vaping/data/detections/analytic_sample_17361_20192022
Exists: True


In [27]:
%%time

if False:

    print(f"Analyzing single video video.")

    video_path = Path("/home/ck37/tmp/several_pods_cropped(1).mp4")
    video_name = video_path.name
    video_dir = video_path.parent

    # Second argument is the probability threshold for showing a bounding box.
    predict_video(video_name, video_dir,
                  dir_output = Path("/home/ck37/"),
                  box_threshold = 0.4, verbose = True)

Analyzing single video video.
Analyzing several_pods_cropped(1).mp4
Output path: /home/ck37/several_pods_cropped(1).mp4
Skipping file - output already exists.
CPU times: user 419 µs, sys: 220 µs, total: 639 µs
Wall time: 521 µs


### Analyze videos

In [None]:
%%time

print(f"Analyzing {video_df.shape[0]:,} videos.")
for index, row in video_df.iterrows():
    print(f"Video {index}")
    video_path = Path(row['video_path'])
    video_name = video_path.name
    video_dir = video_path.parent
    # TODO: check if video has already been analyzed.
    # Second argument is the probability threshold for showing a bounding box.
    predict_video(video_name, video_dir, dir_output = dir_output, box_threshold = 0.4, verbose = True)

## Review results

In [None]:
# Upload predicted to Google drive:
for video_name in files_videos:
    # TO FIX:
    # video_file = pyprojroot.here() / "data/detections/videos" / (video_name + ".mp4")
    if video_file.is_file():
        # TO FIX:
        !rclone copy --progress {video_file} gdrive:deep_learning/detections/{Path(file_video_list).stem}
    else:
        print("Could not find prediction video for:", video_name)