# Read stopped sections metadata and generate videoclips from it.

In [None]:
from moviepy.editor import VideoFileClip, concatenate_videoclips
import numpy as np
import pandas as pd
from pathlib import Path
from datetime import datetime

In [None]:
clean_datadir = Path('./example_data')
datadir = Path('/media/alex/alex_backup/Van_6-10-18')

In [None]:
df = pd.read_csv(clean_datadir / 'stopped_sections.csv', index_col=0)
df['video_files'] = df['video_files'].apply(lambda x:tuple(map(Path, eval(x))))
df['directory'] = df['directory'].apply(Path)
df['time'] = df['time'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S.%f'))
for s in 'start_vid end_vid duration'.split():
    df[s] = df[s].apply(pd.Timedelta)
print(*map(type, df.loc[0]), sep='\n')
df.head()

function that takes a row of the dataframe and generates an edited portion of the time of the observation, stitching together multiple files where necessary and cutting out sections that are not included in the observation.

In [None]:
def gen_splitfile(row: pd.core.series.Series, output_dir: Path, in_format: str, out_format: str):
    '''generates data from a row of metadata.
    row: pandas series object containing metadata for this observation. Must have columns:
        time, video_files, start_vid, end_vid, directory, duration
    output_dir: Path of desired directory to store resulting videos.
    in_format: datetime string format of input files (the video_files column)
    out_format: datetime string of desired output files (will format with the beginning timestamp of the video)
    '''
    start_time, vidfiles, start, end, directory, duration = row['time video_files start_vid end_vid directory duration'.split()]
    clips = tuple(map(lambda x:VideoFileClip(str(directory / x)), vidfiles))
    output_filename = str(output_dir /
                          (start_time + start).strftime(out_format))
    print(f'calculating {output_filename}, duration: {duration}')
    
    # special case of start time and end time occuring in the same videoclip
    if len(clips) == 1:
        clip, = clips
        vidfile, = vidfiles
        clip = clip.subclip(start.total_seconds(), end.total_seconds())
        filetime = datetime.strptime(vidfile.name, in_format)
        clip.write_videofile(output_filename, verbose=False, logger=None)
        
    # start time starts in a different videoclip to end time
    
    clips_ret = []
    # add start clip from where the time starts
    clips_ret.append(clips[0].subclip(start.total_seconds(), clips[0].duration))
    # add all clips between the first and last clip
    clips_ret.extend(clips[1:-2])
    # add last clip up until the end time
    clips_ret.append(clips[-1].subclip(0, end.total_seconds()))
    concatenate_videoclips(clips_ret).write_videofile(output_filename, verbose=False, logger=None)
    
#gen_splitfile(df[df['video_files'].apply(len) > 1].iloc[0], clean_datadir, 'FILE%y%m%d-%H%M%S.MP4', 'FILEsp%y%m%d-%H%M%S.MP4')

apply this to the longest 5 observations in the dataset

In [None]:
for i, row in enumerate(df.sort_values(by='duration', ascending=False).iterrows()):
    if i > 5:
        print('done.')
        break
    gen_splitfile(row[1], clean_datadir, 'FILE%y%m%d-%H%M%S.MP4', 'FILEsp%y%m%d-%H%M%S.MP4')