In [1]:
import pandas as pd
import numpy as np
import matplotlib as plt
import os

In [2]:
# directory where all the data is present
base_dir = "D:\\something-something-project\\data\\"

In [3]:
print (base_dir)

D:\something-something-project\data\


In [4]:
# loading only training and validation json files
train_vids_id = pd.read_json(base_dir + 'something-something-v2-train.json')
validation_vids_id = pd.read_json(base_dir + 'something-something-v2-validation.json')

In [5]:
train_vids_id.head(50)

Unnamed: 0,id,label,template,placeholders
0,78687,holding potato next to vicks vaporub bottle,Holding [something] next to [something],"[potato, vicks vaporub bottle]"
1,42326,spreading margarine onto bread,Spreading [something] onto [something],"[margarine, bread]"
2,100904,putting pen on a surface,Putting [something] on a surface,[pen]
3,80715,"lifting up one end of bottle, then letting it ...","Lifting up one end of [something], then lettin...",[bottle]
4,34899,holding bulb,Holding [something],[bulb]
5,184568,pushing strap camera from right to left,Pushing [something] from right to left,[strap camera]
6,112783,spilling mouthwash onto roll on,Spilling [something] onto [something],"[mouthwash, roll on]"
7,108895,pushing spiderman invitaion so that it slightl...,Pushing [something] so that it slightly moves,[spiderman invitaion]
8,169270,moving headset across a surface until it falls...,Moving [something] across a surface until it f...,[headset]
9,66533,folding paper towel,Folding [something],[paper towel]


In [28]:
train_vids_id.shape

(168913, 4)

In [29]:
validation_vids_id.head()

Unnamed: 0,id,label,template,placeholders
0,74225,spinning cube that quickly stops spinning,Spinning [something] that quickly stops spinning,[cube]
1,116154,showing clay box on top of wallet,Showing [something] on top of [something],"[clay box, wallet]"
2,198186,wiping words off of a paper,Wiping [something] off of [something],"[words, a paper]"
3,137878,pushing scissors so that it falls off the table,Pushing [something] so that it falls off the t...,[scissors]
4,151151,turning the camera left while filming wall mou...,Turning the camera left while filming [something],[wall mounted fan]


In [30]:
validation_vids_id.shape

(24777, 4)

In [31]:
# grouping the data based on following classes, grouping together all the data which belongs to one action
# we will be using below 9 classes:
classes = {'Dropping [something]' : [], 
           'Holding [something]' : [],
           'Moving [something]' : [], 
           'Picking [something]' : [],
           'Poking [something]' : [],
           'Pouring [something]' : [],
           'Putting [something]' : [],
           'Showing [something]' : [], 
           'Tearing [something]' : []}

actual_class = {'Dropping [something]' : 'Dropping_something', 
           'Holding [something]' : 'Holding_something',
           'Moving [something]' : 'Moving_something', 
           'Picking [something]' : 'Picking_something',
           'Poking [something]' : 'Poking_something',
           'Pouring [something]' : 'Pouring_something',
           'Putting [something]' : 'Putting_something',
           'Showing [something]' : 'Showing_something', 
           'Tearing [something]' : 'Tearing_something'}

In [32]:
# utility function to return the class name from the template name
def get_class_name(template_name):
    for key in classes:
        if key in template_name:
            return key
    return None

In [33]:
# grouping the training data
# iterating the pd dataframe
for index, row in train_vids_id.iterrows():
    row_class = get_class_name(row['template'])
    if (row_class != None):
        # print ('Template: ', row['template'], '\tclass: ', row_class)
        classes[row_class].append((row['id'], row['placeholders'][0]))

In [39]:
classes['Dropping [something]'][100][1]

'shoe cleaning brush'

In [37]:
# printing the len of each class
for key in classes:
    print ('Class: ', key, '\t\tLength: ', len(classes[key]))

    holding something : ~14k

Class:  Dropping [something] 		Length:  4680
Class:  Holding [something] 		Length:  6894
Class:  Moving [something] 		Length:  14877
Class:  Picking [something] 		Length:  980
Class:  Poking [something] 		Length:  4194
Class:  Pouring [something] 		Length:  1700
Class:  Putting [something] 		Length:  16731
Class:  Showing [something] 		Length:  4141
Class:  Tearing [something] 		Length:  3021


In [14]:
# we got our classes and it's count
# let's start getting images as 0.5 fps and store them in their respective folders
import cv2

In [43]:
count = 1

# function to extract frames and save it in respective class folder
def getFrame(sec, vidcap, output_dir, class_name, obj, count):
    vidcap.set(cv2.CAP_PROP_POS_MSEC, sec*1000)
    hasFrames, image = vidcap.read()
    if hasFrames:
        if not os.path.exists(output_dir + actual_class[class_name]): 
            os.makedirs(output_dir + actual_class[class_name])
        
        os.chdir(output_dir + actual_class[class_name])
        cv2.imwrite(actual_class[class_name].split('_')[0] + '_' + obj + '_' + str(count) + ".jpg", image)     # save frame as JPG file
    return hasFrames

In [16]:
# since we will be using different classes in the dict to generate images,
# let's hope for now there are no overlapping of vids in different keys.
# To make things faster, lets implement multi-processing in python so that
# our code executes faster and utilizes more cpu.

In [44]:
# run this cell if you want to simply generate images without multi-processing

# directory where all the videos are saved
video_dir = 'D:\\something-something-project\\data\\videos\\20bn-something-something-v2\\'
# directory where images will be saved
output_dir = 'D:\\something-something-project\\test-images\\'

for key in classes:
    global count
    count = 1
    print ("Current class: ", key)
    break_flag = False
    for vid in classes[key]:
        vidcap = cv2.VideoCapture(video_dir + str(vid[0]) + '.webm')
        sec = 0
        fps = 1
        success = getFrame(sec, vidcap, output_dir, key, vid[1], count)
        while success:
            count = count + 1
            # limiting the number of images (frames) per class to 5000
#             if (count == 5000):
#                 break_flag = True
#                 break
            sec = sec + fps
            sec = round(sec, 2)
            success = getFrame(sec, vidcap, output_dir, key, vid[1], count)
#         if break_flag:
#             break
        break
    break

Current class:  Dropping [something]


In [19]:
# doing the same thing for validation set now

# grouping the data based on following classes, grouping together all the data which belongs to one action
# we will be using below 9 classes:
validation_classes = {'Dropping [something]' : [], 
           'Holding [something]' : [],
           'Moving [something]' : [], 
           'Picking [something]' : [],
           'Poking [something]' : [],
           'Pouring [something]' : [],
           'Putting [something]' : [],
           'Showing [something]' : [], 
           'Tearing [something]' : []}


# utility function to return the class name from the template name
def get_validation_class_name(template_name):
    for key in validation_classes:
        if key in template_name:
            return key
    return None


# grouping the validation data
# iterating the pd dataframe
for index, row in validation_vids_id.iterrows():
    row_class = get_validation_class_name(row['template'])
    if (row_class != None):
        # print ('Template: ', row['template'], '\tclass: ', row_class)
        validation_classes[row_class].append(row['id'])
        
# printing the len of each class
for key in validation_classes:
    print ('Class: ', key, '\t\tLength: ', len(validation_classes[key]))

Class:  Dropping [something] 		Length:  827
Class:  Holding [something] 		Length:  1053
Class:  Moving [something] 		Length:  1911
Class:  Picking [something] 		Length:  199
Class:  Poking [something] 		Length:  419
Class:  Pouring [something] 		Length:  458
Class:  Putting [something] 		Length:  1894
Class:  Showing [something] 		Length:  697
Class:  Tearing [something] 		Length:  898


In [20]:
# write the validation classes data into the disk
count = 1

# function to extract frames and save it in respective class folder
def getValidationFrame(sec, vidcap, output_dir, class_name, count):
    vidcap.set(cv2.CAP_PROP_POS_MSEC, sec*1000)
    hasFrames, image = vidcap.read()
    if hasFrames:
        if not os.path.exists(output_dir + actual_class[class_name]): 
            os.makedirs(output_dir + actual_class[class_name])
        
        os.chdir(output_dir + actual_class[class_name])
        cv2.imwrite(actual_class[class_name] + '_image_' + str(count) + ".jpg", image)     # save frame as JPG file
    return hasFrames

In [21]:
# directory where all the videos are saved
video_dir = 'D:\\something-something-project\\data\\videos\\20bn-something-something-v2\\'
# directory where images will be saved
output_dir = 'D:\\something-something-project\\validation-images\\'

for key in validation_classes:
    global count
    count = 1
    print ("Current class: ", key)
    for vid in validation_classes[key]:
        vidcap = cv2.VideoCapture(video_dir + str(vid) + '.webm')
        sec = 0
        fps = 0.5
        success = getValidationFrame(sec, vidcap, output_dir, key, count)
        while success:
            count = count + 1
            sec = sec + fps
            sec = round(sec, 2)
            success = getValidationFrame(sec, vidcap, output_dir, key, count)

Current class:  Dropping [something]
Current class:  Holding [something]
Current class:  Moving [something]
Current class:  Picking [something]
Current class:  Poking [something]
Current class:  Pouring [something]
Current class:  Putting [something]
Current class:  Showing [something]
Current class:  Tearing [something]
