# Installing Hub

In [None]:
!pip3 install hub --quiet

# Run below cells and restart the runtime
# if you are running it in colab
# import os
# os.kill(os.getpid(), 9) 

# Loading Packages

In [None]:
from IPython.display import clear_output

In [None]:
import os
import pandas as pd
from glob import glob
import time

In [None]:
# Download Video Data
!wget http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/hmdb51_org.rar
!unrar e hmdb51_org.rar hmdb_rar/
clear_output()

In [None]:
rar_files = glob('hmdb_rar/*.rar')

base_path = 'hmdb'

for (index, rar) in enumerate(rar_files, 1):
    path = os.path.join(base_path, rar.split('/')[-1].split('.')[0], '')
    print(f'{index} -> {path}')
    !unrar e {rar} {path}
    clear_output()

In [None]:
# Download Train/Test Splits
!wget http://serre-lab.clps.brown.edu/wp-content/uploads/2013/10/test_train_splits.rar
!unrar e test_train_splits.rar hmdb_splits/
clear_output()

# Processing dataset

In [None]:
txts = glob('hmdb_splits/*split1.txt')

In [None]:
class_labels = [txt.split('/')[-1].split('_test')[0] for txt in txts]

# !rm -rf dataset
!mkdir -p dataset/train dataset/test dataset/extras

hmdb_path = 'hmdb'
base_path = 'dataset'
subfolders = ['extras', 'train', 'test']

start = time.time()

for (index, label) in enumerate(class_labels, 1):
    txt_path = os.path.join('hmdb_splits', label+'_test_split1.txt')
    df = pd.read_csv(txt_path, names=['Video', 'ID'], delimiter=r"\s+")

    for name, group in df.groupby(by='ID'):
        
        folder_path = os.path.join(base_path, subfolders[name], label)
        os.mkdir(folder_path)
        
        for (video_index, video_name) in enumerate(group['Video'].tolist()):
            print('[{0}] -> [{1}] -> [{2}] -> [{3}]-> [{4}]]'.format(
                        index,
                        label,
                        subfolders[name],
                        video_index,
                        video_name
            ))
            source = os.path.join(hmdb_path, label, video_name)
            destination = os.path.join(folder_path, video_name.split('.')[0]+'.mp4')
            !ffmpeg -i {source} {destination}
            clear_output()
            
stop = time.time()
print(f'Time elapsed in conversion : {round(stop-start, 2)}s')

# Uploading to hub

In [None]:
import hub

# Login to ActiveLoop

%env BUGGER_OFF=True
!activeloop login -u username -p password
!activeloop reporting --off

In [None]:
# Process data in filename

"""
PROPERTY -> LABELS (ABBREVIATION)
visible body parts -> head(h), upper body(u), full body (f), lower body(l)
camera motion -> motion (cm), static (nm)
number of people involved in the action	Single (np1), two (np2), three (np3)
camera viewpoint -> Front (fr), back (ba), left(le), right(ri)
video quality -> good (goo), medium (med), ok (bad)
"""

visible_body_parts = ['head', 'upper_body', 'full_body', 'lower_body']
camera_motion = ['motion', 'static']
camera_viewpoint = ['front', 'back', 'left', 'right']
number_of_people = ['zero', 'one', 'two', 'three', 'four', 'five', 'six', 'seven', 'eight', 'nine', 'ten']
video_quality = ['good', 'medium', 'ok']

labels_list = [visible_body_parts, 
               camera_motion, 
               number_of_people, 
               camera_viewpoint, 
               video_quality]

label_dict = {
    # visible body parts
    'h' : 0, 'u' : 1, 'f' : 2, 'l' : 3,
    
    # camera motion
    'cm' : 0, 'nm' : 1,
    
    # number of people involved in the action
    'np0' : 0, 'np1' : 1, 'np2' : 2, 'np3' : 3, 
    'np4' : 4, 'np5' : 5, 'np6' : 6, 'np7' : 7, 
    'np8' : 8, 'np9' : 9, 'np10': 10,
    
    # camera viewpoint
    'fr' : 0, 'ba' : 1, 'le' : 2, 'ri' : 3,
    
    # video quality
    'goo' : 0, 'med' : 1, 'bad' : 2
}

def process_filename(name, label_dict, labels_list):
    return [label_dict[val] for val in name.split('_')[-6:-1]]

name = "50_FIRST_DATES_dive_f_cm_np1_ri_bad_35"
process_filename(name, label_dict, labels_list)

In [None]:
base_path = 'dataset'
subfolder = 'extras' # ['extras', 'train', 'test']

hubname = f'hub://<username>/hmdb51-{subfolder}'
ds = hub.dataset(hubname)

dataset_path = os.path.join(base_path, subfolder)
class_labels = os.listdir(dataset_path)

start = time.time()

with ds:
    ds.create_tensor('visible_body_parts', htype='class_label', class_names=visible_body_parts)
    ds.create_tensor('camera_motion', htype='class_label', class_names=camera_motion)
    ds.create_tensor('camera_viewpoint', htype='class_label', class_names=camera_viewpoint)
    ds.create_tensor('number_of_people', htype='class_label', class_names=number_of_people)
    ds.create_tensor('video_quality', htype='class_label', class_names=video_quality)
    ds.create_tensor('labels', htype='class_label', class_names=class_labels)
    ds.create_tensor('videos', htype='video', sample_compression='mp4')
    
    for index, label in enumerate(class_labels):
        folder_path = os.path.join(dataset_path, label)
        # print(f'[{index}] | [{label}] | [{time.time()-start}]')
        for video in os.listdir(folder_path):
            video_path = os.path.join(folder_path, video)
            a, b, c, d, e = process_filename(video, label_dict, labels_list)
            ds.append({
                'videos' : hub.read(video_path),
                'labels' : index,
                'visible_body_parts' : a,
                'camera_motion' : b, 
                'camera_viewpoint' : c, 
                'number_of_people' : d, 
                'video_quality' : e
            })

        
stop = time.time()
print(f'Time elapsed in uploading : {round(stop-start, 2)}s')

# Hub Dataset Links
https://app.activeloop.ai/activeloop/hmdb51-train <br>
https://app.activeloop.ai/activeloop/hmdb51-test <br>
https://app.activeloop.ai/activeloop/hmdb51-extras