# train_test_split development notebook
## Code to perform stratified split on class examples into train and test sets for use with generator class

Input format:

data ====== class1
        |
        |== class2
        |
        |== class3
        
Output format:

data ====== train ===== class1
        |            |
        |            |= class2
        |            |
        |            |= class3
        |
        |== test ====== class1
                     |
                     |= class2
                     |
                     |= class3
        


In [12]:
import random
import shutil
import os

In [28]:
def train_test_split(root='./data', classes=['goals', 'nongoals'], split_ratio=0.8):
    
    # make train and test directories
    current_dir = os.listdir(root)
    train_dir = os.path.join(root, 'train')
    test_dir = os.path.join(root, 'test')
    
    if 'train' not in current_dir:
        os.mkdir(train_dir)
    
    if 'test' not in current_dir:
        os.mkdir(test_dir)
    
    train_dir_content = os.listdir(train_dir)
    test_dir_content = os.listdir(train_dir)
    
    for cls in classes:
        cls_dir = os.path.join(root, cls)
        print('Accessing files in ' + cls_dir)
        cls_list = os.listdir(cls_dir)
        
        random.shuffle(cls_list)
        split = round(len(cls_list) * split_ratio)
        
        cls_train_dir = os.path.join(train_dir, cls)
        cls_test_dir = os.path.join(test_dir, cls)
        
        if cls not in train_dir_content:
            os.mkdir(cls_train_dir)
            
        if cls not in test_dir_content:
            os.mkdir(cls_test_dir)
        
        cls_train_set = cls_list[:split]
        cls_test_set = cls_list[split:]
        
        for dir in cls_train_set:
            shutil.copy2(os.path.join(cls_dir, dir), cls_train_dir)
            
        for dir in cls_test_set:
            shutil.copy2(os.path.join(cls_dir, dir), cls_test_dir)

In [29]:
train_test_split(root='../SoccerNet-code/data',
                 classes=['goals', 'nongoals']
                )

Accessing files in ../SoccerNet-code/data/goals
Accessing files in ../SoccerNet-code/data/nongoals


In [19]:
!ls ../SoccerNet-code/data

england_epl		      README.md
europe_uefa-champions-league  SoccerNet_V1.1_Commentaries.csv
france_ligue-1		      SoccerNet_V1.1_Features.csv
germany_bundesliga	      SoccerNet_V1.1_Labels.csv
goals			      SoccerNet_V1.1_Videos.csv
italy_serie-a		      spain_laliga
nongoals


In [34]:
!ls ../SoccerNet-code/data/test/goals

106.mkv  155.mkv  184.mkv  245.mkv  2.mkv    337.mkv  390.mkv  56.mkv  89.mkv
107.mkv  156.mkv  190.mkv  264.mkv  315.mkv  343.mkv  391.mkv  5.mkv   91.mkv
10.mkv	 164.mkv  192.mkv  268.mkv  319.mkv  353.mkv  392.mkv  60.mkv  94.mkv
120.mkv  165.mkv  193.mkv  274.mkv  31.mkv   358.mkv  395.mkv  61.mkv  98.mkv
123.mkv  169.mkv  201.mkv  275.mkv  325.mkv  359.mkv  39.mkv   65.mkv  99.mkv
135.mkv  173.mkv  209.mkv  278.mkv  329.mkv  360.mkv  3.mkv    76.mkv
140.mkv  178.mkv  216.mkv  285.mkv  32.mkv   36.mkv   409.mkv  79.mkv
145.mkv  179.mkv  222.mkv  288.mkv  331.mkv  383.mkv  40.mkv   7.mkv
146.mkv  17.mkv   22.mkv   290.mkv  332.mkv  388.mkv  41.mkv   80.mkv
147.mkv  183.mkv  230.mkv  294.mkv  335.mkv  389.mkv  45.mkv   81.mkv
