In [6]:
import numpy as np
import pandas as pd
from os import listdir
import os
import re

In [7]:
os.chdir('../..')

### All in one folder

In [9]:
ms_per_frame = 120
data_dir = 'data/gesture/'
filenames = listdir(data_dir)
is_frame_based = True

pattern = '(?P<filename>(?P<filetype>[a-z]*)_(?P<movement>[a-z]*)_(?P<person>[a-z]*)_(?P<filenum>\d*)'

if is_frame_based:
    pattern = pattern + '(_(per_frame|(?P<frame_length>\d*)))\.csv)'
else:
    pattern = pattern + '(_(?P<frame_length>\d*))?\.csv)'

ds = pd.DataFrame(columns = ['filename','filetype','movement','person','filenum','frame_length'])
reg = re.compile(pattern)

matches = []
for file_name in filenames:
    match = reg.search(file_name)
    if match:
        matches.append(match) 

for i, match in enumerate(matches):
    ds.loc[i] = match.groupdict()

ds_features = ds[(ds.filetype == 'features') & (ds.frame_length == '000{0}'.format(str(ms_per_frame))[-3:])]
ds_labels = ds[ds.filetype == 'labels']

comb_ds = pd.merge(
    ds_features,
    ds_labels,
    on = ['movement','person','filenum'],
    how = 'left',
    suffixes = ['_features','_labels']
)[['movement','filename_features','filename_labels']]

comb_ds = comb_ds.drop(comb_ds[(comb_ds.movement != 'none') & (pd.isnull(comb_ds.filename_labels))].index)
comb_ds = comb_ds.fillna({'filename_labels': 'labels_none.csv'})
comb_ds = comb_ds.reset_index(drop = True)
comb_ds = comb_ds[['filename_features','filename_labels']]

data_source_df = ds
combined_data_files_df = comb_ds

In [12]:
ds.sample(5)

Unnamed: 0,filename,filetype,movement,person,filenum,frame_length
11,features_land_l_02_120.csv,features,land,l,2,120.0
42,labels_land_c_02_per_frame.csv,labels,land,c,2,
14,features_left_c_02_120.csv,features,left,c,2,120.0
8,features_land_c_02_120.csv,features,land,c,2,120.0
34,labels_flip_c_01_per_frame.csv,labels,flip,c,1,


In [13]:
combined_data_files_df.sample(5)

Unnamed: 0,filename_features,filename_labels
19,features_none_p_01_120.csv,labels_none.csv
3,features_flip_l_01_120.csv,labels_flip_l_01_per_frame.csv
8,features_land_c_02_120.csv,labels_land_c_02_per_frame.csv
15,features_left_c_03_120.csv,labels_left_c_03_per_frame.csv
10,features_land_l_01_120.csv,labels_land_l_01_per_frame.csv


### Separate folders

In [30]:
ms_per_frame = 120
data_dir = 'data/gesture/'
is_frame_based = True

In [31]:
ds = pd.DataFrame(columns = ['filename','filetype','movement','person','filenum','frame_length'])

In [32]:
filenames_features = listdir(data_dir + 'features/')
if is_frame_based:
    filenames_labels = listdir(data_dir + 'labels_framebased/')
else:
    filenames_labels = listdir(data_dir + 'labels_timebased/')

In [33]:
pattern = '(?P<filename>(?P<filetype>[a-z]*)_(?P<movement>[a-z]*)_(?P<person>[a-z]*)_(?P<filenum>\d*)(_(?P<frame_length>\d*))?\.csv)'
reg = re.compile(pattern)

In [34]:
matches = []

for file_name in filenames_features:
    match = reg.search(file_name)
    if match:
        matches.append(match)

for file_name in filenames_labels:
    match = reg.search(file_name)
    if match:
        matches.append(match)

for i, match in enumerate(matches):
    ds.loc[i] = match.groupdict()

ds_features = ds[(ds.filetype == 'features') & (ds.frame_length == '000{0}'.format(str(ms_per_frame))[-3:])]
ds_labels = ds[ds.filetype == 'labels']

comb_ds = pd.merge(ds_features,
                   ds_labels,
                   on = ['movement','person','filenum'],
                   how = 'left',
                   suffixes = ['_features','_labels'])[['movement','filename_features','filename_labels']]

comb_ds = comb_ds.drop(comb_ds[(comb_ds.movement != 'none') & (pd.isnull(comb_ds.filename_labels))].index)
comb_ds = comb_ds.fillna({'filename_labels': 'labels_none.csv'})
comb_ds = comb_ds.reset_index(drop = True)
comb_ds = comb_ds[['filename_features','filename_labels']]

data_source_df = ds
combined_data_files_df = comb_ds

In [35]:
ds_labels.sample(5)

Unnamed: 0,filename,filetype,movement,person,filenum,frame_length
49,labels_left_c_03.csv,labels,left,c,3,
55,labels_takeoff_c_01.csv,labels,takeoff,c,1,
34,labels_flip_c_01.csv,labels,flip,c,1,
47,labels_left_c_01.csv,labels,left,c,1,
54,labels_right_c_02.csv,labels,right,c,2,


In [36]:
combined_data_files_df.sample(5)

Unnamed: 0,filename_features,filename_labels
24,features_takeoff_c_03_120.csv,labels_takeoff_c_03.csv
7,features_land_c_01_120.csv,labels_land_c_01.csv
11,features_land_l_02_120.csv,labels_land_l_02.csv
19,features_none_p_01_120.csv,labels_none.csv
0,features_flip_c_01_120.csv,labels_flip_c_01.csv
