In [None]:
import os
import pandas as pd
import numpy as np
import json

In [None]:
# setup paths
pwd = os.getcwd().replace("notebooks","")
path_cache = pwd + 'cache/'
path_data = pwd + 'data/'

In [None]:
# setup logging
import logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s]  %(message)s",
    handlers=[
        logging.FileHandler("{0}/{1}.log".format(pwd, "logs")),
        logging.StreamHandler()
    ])
logger = logging.getLogger()

In [None]:
def add_splits_to_labels_file(vids_valid, vids_test):
    """
    Helper function to add splits to your labels file 

    If your labels file has the columns "video","filename","label", 
    you can use this function to add train/valid/test splits by specifying
    as lists of video names which videos should be valid and which should be test

    Will overwrite labels file on disk

    Sample usage
    """
    # e.g. 
    # * vids_valid = ['vid_a', 'vid_b', 'vid_c']
    # * vids_test = ['vid_y', 'vid_z']
    # all the rest will be train

    labels = pd.read_csv(path_data + 'labels.csv', usecols=['video','frame','label'])

    def allocate_set(vid):
        if vid in vids_valid:
            return "valid"
        elif vid in vids_test:
            return "test"
        else: 
            return "train"

    # apply split
    labels['split'] = labels['video'].apply(lambda x: allocate_set(x))

    # sort 
    labels.sort_values(["video","frame"], inplace=True)

    # output as csv
    labels.to_csv(path_data + 'labels.csv', index=False)