## Data Collection
---
Create a labeled dataset from source video.

**Tasks**
1. Check count of currently saved frames
2. Capture new frames from video
    1. Split frames into train/validation/test folders
3. Label images
    1. Use COCO Annotator to label boxes, masks, keypoints

**Assumptions**
1. Images/ train or test folders need to be created manually before splitting 
2. Files are named 'frameX.jpg'
3. If labeled before split, need to update XML or path to images 

In [1]:
import os
import glob
import random
import math
import cv2

In [2]:
'''
Get all image paths in image folder
'''

# Get all paths in root folder
def GetSavedImageRoot(img_folder):
    return [img for img in glob.glob(img_folder+'/*.jpg')]

# Get all paths in train/test/val
def GetSavedImagePaths(img_folder):
    img_paths = []
    for i,folder in enumerate(['train','test','val']):
        img_paths += [img for img in glob.glob(img_folder+'/'+folder+'/*.jpg')]
    return img_paths

# Get names without paths for all in train/test/val
def GetSavedFrameNames(img_folder):
    paths = GetSavedImagePaths(img_folder)
    tmp = []
    for path in paths:
        if path.find('/swin_to_padd_'):
            tmp.append(path[path.find("/s")+1:])
        elif path.find('/padd_to_swin_'):
            tmp.append(path[path.find("/p")+1:])
    return tmp 

In [3]:
# Xmins x 60frames/secs X 60sec/min = 10+ mins unused in model per vid
def FPSToMin(mins):
    return mins*60*60

In [4]:
'''
Split images into train/test and move from images root to train/test folder
'''

def MoveImages(img_paths):
    
    # Split train/test     
    random.shuffle(img_paths)
    num_test_images = math.ceil(len(img_paths)*0.2)
    
    test_img_paths = img_paths[:num_test_images]
    train_img_paths = img_paths[num_test_images:]
    
    # Split train/validation     
    random.shuffle(train_img_paths)
    num_validation_images = math.ceil(len(train_img_paths)*0.1)
    
    validation_img_paths = train_img_paths[:num_validation_images]
    train_img_paths = train_img_paths[num_validation_images:]
    
#     len(train_img_paths+num_validation_images+train_img_paths)

    # Move images to folder
    for i, group in enumerate([train_img_paths,validation_img_paths,test_img_paths],1):
        if i == 1:
            tmp = 'train'
        elif i == 2:
            tmp = 'val'
        else:
            tmp = 'test'
        
        for img_path in group:
            if img_path.find('swin_to_padd_') > -1:
                os.rename(img_path,img_path.replace('swin',tmp+'/swin'))
            elif img_path.find('padd_to_swin_') > -1:
                os.rename(img_path,img_path.replace('padd',tmp+'/padd'))
            else:
                print('Error in moving file')

In [5]:
'''
Capture frames from video at interval
'''

def CaptureFramesFromVideo(PATH_TO_VIDEO,PATH_TO_IMG_FOLDER,num_images):
    
    # Set key for each video for labeling     
    if PATH_TO_VIDEO[-35:] == 'Swin 2 Padding VIDEO 1 CAMERA 1.avi':
        vidKey = 'swin_to_padd_'
    elif PATH_TO_VIDEO[-35:] == 'Padding 2 Swin VIDEO 2 CAMERA 1.avi':
        vidKey = 'padd_to_swin_'
    else:
        print('Could not find video file')
        
    # Get video obj
    vidcap = cv2.VideoCapture(PATH_TO_VIDEO)
    
    # Randomize start frame in first X mins
    currFrame = random.randint(1,FPSToMin(3))
    vidcap.set(cv2.CAP_PROP_POS_FRAMES, currFrame)
    
    # Get number frames in sample.     
    totalFrames = FPSToMin(80)  
    
    # Get step value for range      
    step = math.floor((totalFrames-currFrame)/num_images) 
        
    # init count and open video     
    savedCount = 0
    while vidcap.isOpened():
        
        # Read each frame         
        success,image = vidcap.read()  
        if not success:
            print("Error in capturing video.")
        else:
            # Create frame and path names
            frame = vidKey+"frame_%d.jpg" % currFrame
            img_path = PATH_TO_IMG_FOLDER+'/'+frame
            
            # Save image, increment count if image in folders
            if frame not in GetSavedFrameNames(PATH_TO_IMG_FOLDER):
                cv2.imwrite(img_path, image)
                savedCount += 1
                if savedCount == num_images:
                    print('Saved',num_images,'images from video.')
                    break
            
            # Get next frame by step             
            currFrame += step
            vidcap.set(1, currFrame)
    
    # Release video
    vidcap.release()
    cv2.destroyAllWindows()
    
    # Split and move images
    img_paths = GetSavedImageRoot(PATH_TO_IMG_FOLDER)
    MoveImages(img_paths)

### Capture Frames from Video

In [6]:
'''
Check current number saved images in train/val/test
'''

# Set path to root directory for saving images
PATH_TO_IMG_FOLDER = '/Users/jessedecker/projects/rail_segmentation/datasets/pantograph'

img_paths = GetSavedImagePaths(PATH_TO_IMG_FOLDER)
print('Current saved images:',len(img_paths))

Current saved images: 100


In [7]:
'''
Call to capture frames. 
Saves images to 
'''

# Set path to video file
PATH_TO_VIDEO_1 = '/Users/jessedecker/projects/rail_segmentation/assets/Swin 2 Padding VIDEO 1 CAMERA 1.avi'
PATH_TO_VIDEO_2 = '/Users/jessedecker/projects/rail_segmentation/assets/Padding 2 Swin VIDEO 2 CAMERA 1.avi'

# Set number of images to save (per video)
num_images = 50

for pathToVideo in [PATH_TO_VIDEO_1,PATH_TO_VIDEO_2]:
    CaptureFramesFromVideo(pathToVideo,PATH_TO_IMG_FOLDER,num_images)

Saved 50 images from video.
Saved 50 images from video.


### Label Images

Use COCO Annotator to define bounding boxes, masks, keypoints in the images.

Save JSON file with train/test/val images. 