## Process videos for inference and training

Takes a folder with a set of recording in it and crops them and saves them organize by camera used. If desired will create a 'training' dataset also, which is a random sample of shorter clips extracted from the whole dataset. 

In [1]:
from glob import glob 
import os 
import pandas as pd

Set parameters here. Need to specify path to a file, `crops.csv`, that contains crop information for each recording. As an example, this file has the format:

```
,source_video_name,top,left,right,bottom,camera,experiment,animal
0,e3v813a-20220131T174814-181650,200,0,460,0,e3v813a,Overnight Videos,Animal 1
1,e3v813a-20220201T195224-202053,200,0,460,0,e3v813a,Overnight Videos,Animal 2
2,e3v813a-20220202T210531-213427,200,0,460,0,e3v813a,Overnight Videos,Animal 3
```
where `top, left, right, bottom` indicate the number of pixels to remove from that respective side. When applying the crop, videos are matched according to the `animal`, `camera` and `experiment` in this table to determine what crop to apply. If data is missing from this table, no crop is applied.  

In [2]:
#Parameteres for training data
sample_dur = 300 # amount to extract, in seconds
n_samples = 10   # number of videos per camera to extract


#Overnight videos
# create_training_data = True
# raw_video_path = '/home/blansdel/ImageAnalysisScratch/Zakharenko/Brett videos/Overnight Videos/'
# inference_video_path = '/home/blansdel/ImageAnalysisScratch/Zakharenko/animal-behavior-ml/inference'
# train_video_path = '/home/blansdel/ImageAnalysisScratch/Zakharenko/animal-behavior-ml/training/'
# crops_path = '/home/blansdel/projects/dlc_training/behavior-ml-analysis/scripts/crops.csv'
# ext = 'avi'

#Overnight videos LSD

#Input video path
raw_video_path = '/home/blansdel/ImageAnalysisScratch/Zakharenko/Brett videos/Overnight Videos LSD/'

#Output path for inference videos
inference_video_path = '/home/blansdel/ImageAnalysisScratch/AnimalBehaviorCore/Zakharenko/animal-behavior-ml/inference'

#Output path for training videos
train_video_path = '/home/blansdel/ImageAnalysisScratch/AnimalBehaviorCore/Zakharenko/animal-behavior-ml/training/'

create_training_data = False #Whether to save data to train_video_path
crops_path = '/mnt/storage2/blansdel/projects/dlc_training/behavior-ml-analysis/scripts/crops.csv'
ext = 'avi' #Input file extension 

trial_run = False #If true: don't actually run commands, just print them

In [1]:

def crop_videos(videos, out_base_dir, crops, trial_run = False):

    for vid in videos:
        #Get data about video 
        an = vid.split('/')[-2]
        camera = vid.split('/')[-1].split('.')[0].split('-')[0]
        expt = vid.split('/')[-3]
        out_dir = os.path.join(out_base_dir, expt, an, camera)

        vid_ = vid.replace(' ', '\ ')
        out_dir = out_dir.replace(' ', '\ ')
        os.makedirs(out_dir, exist_ok = True)

        crop_ps = crops.loc[(crops.camera == camera) & (crops.experiment == expt) & (crops.animal == an),
                            ['top', 'left', 'right', 'bottom']].to_numpy().squeeze()

        if len(crop_ps) == 0:
            print('No crop data found for {}, just copying as is instead.'.format(vid))
            cmd = 'cp {} {}'.format(vid_, out_dir)
            if trial_run:
                print(cmd)
            else:
                os.system(cmd)
            continue

        width = crop_ps[2] + crop_ps[1]
        height = crop_ps[0] + crop_ps[3]
        left = crop_ps[1]
        top = crop_ps[0]
        cmd = 'ffmpeg -i ' + vid_ + ' -c:v libx264 -crf 10 -filter:v "crop=in_w-' + str(width) + ':in_h-' \
            + str(height) + ':' + str(left) + ':' + \
            str(top) + '" -y "' + os.path.join(out_dir, os.path.basename(vid_.replace(f'.{ext}', '_cropped.mp4'))) + '"'
        if trial_run:
            print(cmd)
        else:
            os.system(cmd)
        

In [None]:
animals = glob(os.path.join(raw_video_path, '*'))
crops = pd.read_csv(crops_path, index_col = 0)

video_files = []
for dr in animals:
    video_files += glob(os.path.join(dr, f'*.{ext}'))

crop_videos(video_files, inference_video_path, crops, trial_run = trial_run)

cameras = crops['camera'].unique()

In [None]:

if create_training_data:
    inference_vids = glob(inference_video_path + '/*/*/*/*_cropped.mp4')
    camera_inference_videos = [v.split('/')[-2] for v in inference_vids]
    animal_inference_videos = [v.split('/')[-3] for v in inference_vids]

    inferences = pd.DataFrame({'video': inference_vids, 'camera': camera_inference_videos, 'animal': animal_inference_videos})
    animals = inferences['animal'].unique()

    for cam in cameras:
        cam_inferences = inferences.loc[inferences.camera == cam]
        n_animals_w_this_camera = len(cam_inferences['animal'].unique())
        samples_per_animal = n_samples // n_animals_w_this_camera
        for an in cam_inferences['animal'].unique():
            cam_inferences_ = cam_inferences.loc[cam_inferences.animal == an.replace('//', '/')]
            #Randomly select n_samples from each camera
            cam_inferences_sample = cam_inferences_.sample(n = samples_per_animal)
            #Use ffmpeg to extract sample_dur from each of these videos
            for i, vid in cam_inferences_sample.iterrows():
                vid_ = vid.video
                an = vid_.split('/')[-3]
                camera = vid_.split('/')[-1].split('.')[0].split('-')[0]
                expt = vid_.split('/')[-4]
                out_dir = os.path.join(train_video_path, expt, camera, an)
                os.makedirs(out_dir, exist_ok = True)

                cmd = 'ffmpeg -i "' + vid_ + '" -c:v copy -t ' + str(sample_dur) + \
                    ' -y "' + os.path.join(out_dir, os.path.basename(vid.video.replace(f'.{ext}', '_sample.mp4'))) + '"'
                if trial_run:
                    print(cmd)
                else:
                    os.system(cmd)