# Create CSV File To Prepare Training, Validation, Test Sets

In [1]:
import csv
import os
import os.path
import random

In [2]:
# backhand = backhand
# bslice = backhand slice
# bvolley = backhand volley
# backhand2h = two handed backhand
# serflat = flat serve
# foreflat = flat forehand groundshot
# foreopen = open stands forehand ground shot? not sure what diff is between this and foreflat
# fslice = forehand slice ground shot
# fvolley = forehand volley
# serkick = kick serve
# serslice = slice serve
# smash = smash

# classes can be consolidated into 6 classes for better performance:

# backhand = backhand, bslice, backhand2h
# forehand = foreflat, foreopen, fslice
# service = serflat, serkick, serslice
# bvolley
# fvolley
# smash

In [11]:
train_size = 0.8  # training set proportion
val_size = 0.1  # validation set proportion

# set how many videos to incorporate in dataset from each class
max_class_size = 300 # something arbitrarily large will generate .npy for all data

random.seed(1)  # generate same random dataset

In [12]:
# --- EXECUTE THIS CELL TO CREATE DATASET CSV FILE --- #

with open('data/data_file.csv', 'w') as csvfile:
    writer = csv.writer(csvfile, delimiter=',')
    
    # get subdirectories in VIDEO_RGB

    path = os.path.join('../code/VIDEO_RGB')

    # ignore .DS_store
    class_folders = [item for item in os.listdir(path) if not item.startswith('.')]
    
    
    # iterate over each folder, and write train/test samples to csv file
    for class_label in class_folders:
#         print class_label

        subpath = os.path.join(path, class_label)

        videos = []
        for vid in os.listdir(subpath):
            
            # make sure vid is a file and not .DS_Store
            if os.path.isfile(os.path.join(subpath, vid)) and not vid.startswith('.'):
                vid = os.path.splitext(vid)[0]
                videos.append(vid)

        # --- EDIT THESE LINES: choose what sequences to generate ---- #
        
        # TURN ON RANDOM SHUFFLE WHEN GENERATING ACTUAL DATASET
        random.shuffle(videos)  # NOTE: shuffles videos in place

        # alter size of dataset
        if max_class_size < len(videos):
            videos = videos[:max_class_size]
            
        # ------------------------------------------------------------ #

        # split into training and test sets
        train_split_ind = int(round(train_size*len(videos)))
        val_split_ind = train_split_ind + int(round(val_size*len(videos)))
        
        train = videos[0:train_split_ind]
        validation = videos[train_split_ind:val_split_ind]
        test = videos[val_split_ind:]

        # --- RELABEL TO CONSOLIDATE CLASSES --- #
        if class_label == 'backhand2h' or class_label == 'bslice':
            class_label = 'backhand'
            
        if class_label == 'foreflat' or class_label == 'foreopen' or class_label == 'fslice':
            class_label = 'forehand'
            
        if class_label == 'serflat' or class_label == 'serkick' or class_label == 'serslice':
            class_label = 'service'

        # write train_size proportion to csv as train
        for sample in train:
            writer.writerow(['train'] + [class_label] + [sample])
            
        # write val_size proportion to csv as validation
        for sample in validation:
            writer.writerow(['validation'] + [class_label] + [sample])
        
        # write remaining proportion to csv as test
        for sample in test:
            writer.writerow(['test'] + [class_label] + [sample])

backhand
backhand2h
bslice
bvolley
foreflat
foreopen
fslice
fvolley
serflat
serkick
serslice
smash
