In [1]:
import pandas as pd
import cv2
import numpy as np
import re
import subprocess

from pathlib import Path
from tqdm import tqdm
from datetime import timedelta, datetime

project_dir = Path('..').resolve()

video_dir = Path('/Volumes/LAB-DRIVE/telfer-thesis/raw-unsorted/undone/')

In [2]:
list(video_dir.glob('Basler*__*.mp4'))

[PosixPath('/Volumes/LAB-DRIVE/telfer-thesis/raw-unsorted/undone/Basler_acA1920-40um__23999063__20220613_150909775.mp4'),
 PosixPath('/Volumes/LAB-DRIVE/telfer-thesis/raw-unsorted/undone/Basler_acA1920-40um__23999063__20220613_110536538.mp4'),
 PosixPath('/Volumes/LAB-DRIVE/telfer-thesis/raw-unsorted/undone/Basler_acA1920-40um__23999063__20220613_111914471.mp4'),
 PosixPath('/Volumes/LAB-DRIVE/telfer-thesis/raw-unsorted/undone/Basler_acA1920-40um__23999063__20220613_114749899.mp4'),
 PosixPath('/Volumes/LAB-DRIVE/telfer-thesis/raw-unsorted/undone/Basler_acA1920-40um__23999063__20220613_120531410.mp4'),
 PosixPath('/Volumes/LAB-DRIVE/telfer-thesis/raw-unsorted/undone/Basler_acA1920-40um__23999063__20220613_122132533.mp4'),
 PosixPath('/Volumes/LAB-DRIVE/telfer-thesis/raw-unsorted/undone/Basler_acA1920-40um__23999063__20220613_123614881.mp4'),
 PosixPath('/Volumes/LAB-DRIVE/telfer-thesis/raw-unsorted/undone/Basler_acA1920-40um__23999063__20220613_125100883.mp4'),
 PosixPath('/Volumes/LAB

## Find Video File from Meta Details

In [8]:
df = pd.read_csv(project_dir / 'data/raw/raw_videos.csv')

def get_original_filename(row):
    return f""

def match_to_file(row):
    s = f'{row.camera}__*__{row.year:04}{row.month:02}{row.day:02}_{row.hour:02}{row.minutes:02}{row.seconds:02}*.mp4'
    try:
        return next(video_dir.glob(s)).parts[-1]
    except:
        return ""
    
df['original_video'] = df.apply(match_to_file, axis=1)

In [9]:
df = df[df.original_video != ''].copy()
df

Unnamed: 0,camera,year,month,day,hour,minutes,seconds,animal,recording,start,end,discard,Notes,original_video
0,Basler_acA1920-40um,2022,6,13,10,42,34,,,,,1.0,,Basler_acA1920-40um__23999063__20220613_104234...
1,Basler_acA1920-40um,2022,6,13,11,5,36,m1,0.0,3:45,-1,,,Basler_acA1920-40um__23999063__20220613_110536...
2,Basler_acA1920-40um,2022,6,13,11,19,14,m2,0.0,0,-1,,,Basler_acA1920-40um__23999063__20220613_111914...
3,Basler_acA1920-40um,2022,6,13,11,32,43,m3,0.0,0:19,-1,,,Basler_acA1920-40um__23999063__20220613_113243...
4,Basler_acA1920-40um,2022,6,13,11,47,49,m4,0.0,0:25,-1,,,Basler_acA1920-40um__23999063__20220613_114749...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
340,Basler_acA1920-40um,2023,3,25,15,26,0,f27,3.0,,,,,Basler_acA1920-40um__23999063__20230325_152600...
341,Basler_acA1920-40um,2023,3,25,16,41,26,f24,4.0,,,,,Basler_acA1920-40um__23999063__20230325_164126...
342,Basler_acA1920-40um,2023,3,25,17,0,56,f25,4.0,,,,,Basler_acA1920-40um__23999063__20230325_170056...
343,Basler_acA1920-40um,2023,3,25,17,16,32,f26,4.0,,,,,Basler_acA1920-40um__23999063__20230325_171632...


In [16]:
temp = df.dropna(subset=['animal', 'recording'])
groups = temp.groupby(by=['animal', 'recording'])
for (animal, recording), group_df in groups:
    if len(group_df) > 1:
        print(animal, recording, len(group_df))

f1 2.0 2
f10 3.0 2
f11 2.0 3
f11 3.0 4
f12 2.0 2
f13 0.0 2
f13 3.0 2
f14 0.0 3
f14 2.0 2
f15 0.0 2
f15 1.0 2
f2 1.0 2
f2 3.0 2
f20 0.0 2
f3 0.0 2
f3 4.0 3
f4 1.0 2
f4 3.0 2
f5 4.0 2
f8 3.0 4
m1 3.0 2
m14 2.0 2
m15 2.0 2
m16 2.0 2
m25 0.0 2
m26 2.0 3
m27 2.0 2
m30 2.0 3
m4 4.0 2
m7 0.0 2


## Crop Video

In [127]:

def to_seconds(time_str):
    d = datetime.strptime(time_str,"%M:%S")
    d = timedelta(minutes=d.minute, seconds=d.second)
    return int(d.total_seconds())

def seconds_to_str(seconds):
    hours, remainder = divmod(seconds, 3600)
    minutes, seconds = divmod(remainder, 60)
    return f"{minutes:02}:{seconds:02}"

def preprocess_video(row):
    if row.discard == 1:
        return 
    
    cmd = "ffmpeg -hide_banner -loglevel error"

    # Add a start flag if start time is specified
    if row.start != '0' and row.start != 'nan':
        cmd += f' -ss {row.start}'
    
    # Add a duration flag if end time is specified
    if row.end != '-1' and row.end != 'nan':
        if row.start != '0' and row.start != 'nan':
            duration = seconds_to_str(to_seconds(row.end) - to_seconds(row.start))
        else:
            duration = row.end

        cmd += f' -t {duration}'

    # Add the video in
    cmd += f' -i "{str(video_dir / row.original_video)}"'

    # Specify codec
    cmd += ' -c copy'

    # Output file
    recording_labels = dict(enumerate(['acclimation', 'preinjection', '1h-postinjection', '2h-postinjection', '4h-postinjection']))
    fname = f"{row.animal}_{recording_labels[int(row.recording)]}.mp4"
    cmd += f' "{output_dir / fname}"'

    return cmd

output_dir = Path('/Volumes/LAB-DRIVE/telfer-thesis/output-test')
! rm -rf {output_dir}
! mkdir -p {output_dir}

df['start'] = df['start'].astype(str)
df['end'] = df['end'].astype(str)

for idx, row in df.iterrows():
    cmd = preprocess_video(row)

    if cmd: 
        print(cmd)
        process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE)
        process.wait()
    

ffmpeg -hide_banner -loglevel error -ss 3:45 -i "/Volumes/LAB-DRIVE/telfer-thesis/raw-unsorted/undone/Basler_acA1920-40um__23999063__20220613_110536538.mp4" -c copy "/Volumes/LAB-DRIVE/telfer-thesis/output-test/m1_acclimation.mp4"
ffmpeg -hide_banner -loglevel error -i "/Volumes/LAB-DRIVE/telfer-thesis/raw-unsorted/undone/Basler_acA1920-40um__23999063__20220613_111914471.mp4" -c copy "/Volumes/LAB-DRIVE/telfer-thesis/output-test/m2_acclimation.mp4"
ffmpeg -hide_banner -loglevel error -ss 0:19 -i "/Volumes/LAB-DRIVE/telfer-thesis/raw-unsorted/undone/Basler_acA1920-40um__23999063__20220613_113243463.mp4" -c copy "/Volumes/LAB-DRIVE/telfer-thesis/output-test/m3_acclimation.mp4"
ffmpeg -hide_banner -loglevel error -ss 0:25 -i "/Volumes/LAB-DRIVE/telfer-thesis/raw-unsorted/undone/Basler_acA1920-40um__23999063__20220613_114749899.mp4" -c copy "/Volumes/LAB-DRIVE/telfer-thesis/output-test/m4_acclimation.mp4"
ffmpeg -hide_banner -loglevel error -ss 0:46 -i "/Volumes/LAB-DRIVE/telfer-thesis/raw

ValueError: cannot convert float NaN to integer

In [58]:
import subprocess
process = subprocess.Popen("ffmpeg -hide_banner -loglevel error", shell=True, stdout=subprocess.PIPE)
process.wait()

1