In [1]:
import os
import numpy as np
import pandas as pd
import re

# Import data augmentation
import sys
module_path = os.path.abspath(os.path.join('../..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from augmentation.methods import *

In [2]:
import warnings
warnings.simplefilter('ignore')

In [3]:
random_state = 47
np.random.seed(random_state)

In [4]:
posenet_dataset_path = '../../datasets/posenet-uncut/'
kinect_dataset_path = '../../datasets/kinect_fixed_not_cut/'
goodness_score = pd.read_csv('../../datasets/VideoScoring.csv')
exrecise_score = pd.read_csv('../../datasets/ExerciseScoring.csv')

train_test_ratio = 0.9
new_label = 'ExreciseScore'
exersice_score_indicator = 'O_Score'
goodness_score_indicator = 'AVG'
goodness_score_threshold = 3

In [5]:
# Drop video augmentation results
goodness_score = goodness_score[goodness_score['FileName'].str.match('U')==False]
goodness_score = goodness_score[goodness_score[goodness_score_indicator]>=goodness_score_threshold]

posenet_ok = goodness_score['FileName']+'.csv'

exrecise_score['Posenet'] = exrecise_score['Posenet'] + '.csv'
exrecise_score['Kinect'] = exrecise_score['Kinect'] + '.csv'

In [6]:
posenet_files = []
kinect_files = []

for file in os.listdir(posenet_dataset_path):
    if not file.find(".csv",0) == -1:
        if file in (goodness_score['FileName']+'.csv').to_list():
            posenet_files.append(file)
        
for file in os.listdir(kinect_dataset_path):
    if not file.find(".csv",0) == -1:
        if file in (goodness_score['FileName']+'_kinect.csv').to_list():
            kinect_files.append(file)
        
print('Total posenet datasets: {}'.format(len(posenet_files)))
print('Total kinect datasets: {}'.format(len(kinect_files)))

Total posenet datasets: 157
Total kinect datasets: 157


In [7]:
TRAIN_SPLIT_POSENET = int(len(posenet_files)*train_test_ratio)
TRAIN_SPLIT_KINECT = int(len(kinect_files)*train_test_ratio)
print(TRAIN_SPLIT_POSENET,TRAIN_SPLIT_KINECT)

141 141


In [8]:
def read_dataset(path,file_list,split_point,is_posenet=True,isTrain=True):
    all_df = None
    start=0
    end=0
    
    if isTrain:
        start = 0
        end = split_point
    else:
        start = split_point
        end = len(file_list)
    
    for file in file_list[start:end]:
        df = pd.read_csv(path+file)
        
        if is_posenet:  
            df = df[df.columns.drop(list(df.filter(regex='_eye_')))]
            df = df[df.columns.drop(list(df.filter(regex='_ear_')))]
            df = df[df.columns.drop(list(df.filter(regex='score')))]
            df = df.rename(columns={'nose_x': 'head_x', 'nose_y': 'head_y'})
            df[new_label] = float(exrecise_score[exrecise_score['Posenet'] == file][exersice_score_indicator])
        else:  
            df = df.drop(columns=['Unnamed: 0','FrameNo'])
            df[new_label] = float(exrecise_score[exrecise_score['Kinect'] == file][exersice_score_indicator])
    
        if all_df is None:
            all_df = df
        else:
            all_df = all_df.append(df, ignore_index=True)  
    
    return all_df

In [9]:
posenet_train_dataset = read_dataset(posenet_dataset_path,posenet_files,TRAIN_SPLIT_POSENET,True,True)
posenet_test_dataset = read_dataset(posenet_dataset_path,posenet_files,TRAIN_SPLIT_POSENET,True,False)
kinect_train_dataset = read_dataset(kinect_dataset_path,kinect_files,TRAIN_SPLIT_KINECT,False,True)
kinect_test_dataset = read_dataset(kinect_dataset_path,kinect_files,TRAIN_SPLIT_KINECT,False,False)

## 3. Data Augmentation

In [10]:
def data_augmentation(df):
    # Mirror X coordinate
    df = mirror(df,'x', append=True)
    print(df.shape)

    # Stretch by 50%
    df_temp = augMultiplier(df.drop(columns=[new_label]), multiplier=1.5)
    df_temp[new_label] = df[new_label]
    df = df.append(df_temp, ignore_index=True)
    print(df.shape)

    # Compress by 25%
    df_temp = augMultiplier(df.drop(columns=[new_label]), multiplier=0.25)
    df_temp[new_label] = df[new_label]
    df = df.append(df_temp, ignore_index=True)
    print(df.shape)

    # Rotate by p/7
#     samples = df.sample(5000)
#     angle = 3.1415 / 7
#     samples_rotated = rotate(samples.drop(columns=[new_label]), angle=angle, posenet=False)
#     samples_rotated[new_label] = samples[new_label].append(samples[new_label], ignore_index=True)
#     df = df.append(samples_rotated, ignore_index=True)
    print(df.shape)

    # Rotate by -p/9
#     samples = df.sample(5000)
#     angle = 3.1415 / -9
#     samples_rotated = rotate(samples.drop(columns=[new_label]), angle=angle, posenet=False)
#     samples_rotated[new_label] = samples[new_label].append(samples[new_label], ignore_index=True)
#     df = df.append(samples_rotated, ignore_index=True)
#     print(df.shape)

    return df

In [11]:
posenet_train_dataset = data_augmentation(posenet_train_dataset)
posenet_test_dataset = data_augmentation(posenet_test_dataset)
kinect_train_dataset = data_augmentation(kinect_train_dataset)
kinect_test_dataset = data_augmentation(kinect_test_dataset)

(225560, 27)
(451120, 27)
(902240, 27)
(902240, 27)
(23874, 27)
(47748, 27)
(95496, 27)
(95496, 27)
(223874, 40)
(447748, 40)
(895496, 40)
(895496, 40)
(26534, 40)
(53068, 40)
(106136, 40)
(106136, 40)


## Save result

In [12]:
def cut_and_save(df,slice_size,name):
    cut_size = int(len(df) / slice_size)
    q = 0

    for i in range(slice_size):
        temp = df.loc[cut_size*i:cut_size*(i+1)-1,:]
        temp.to_csv('../../datasets/exercise_score/{}_{}.csv'.format(name,i+1), index=False)


    print('A dataset with {} has been saved in /datasets/exercise_score/{}.csv'.format(len(df),name))

In [13]:
cut_and_save(posenet_train_dataset,5,'posenet_train_dataset')
cut_and_save(posenet_test_dataset,3,'posenet_test_dataset')
cut_and_save(kinect_train_dataset,5,'kinect_train_dataset')
cut_and_save(kinect_test_dataset,3,'kinect_test_dataset')

A dataset with 902240 has been saved in /datasets/exercise_score/posenet_train_dataset.csv
A dataset with 95496 has been saved in /datasets/exercise_score/posenet_test_dataset.csv
A dataset with 895496 has been saved in /datasets/exercise_score/kinect_train_dataset.csv
A dataset with 106136 has been saved in /datasets/exercise_score/kinect_test_dataset.csv
