## Creating dataset of different activities 
This scripts read the action annotation text files provided for each long video. extracts the start and end time of each action and trim the pose estimation results for the long video based on the start and end time. the trimed results is written to the folder related to the annotated action.

In [1]:
# import required packages
import sys
import os
import io
import cPickle as pickle
import os.path as osp
import numpy as np
import pylab
import math
import shutil
import glob
import re
from copy import deepcopy

### Definition of requires functions and variables

In [2]:
def _id_or_index(ix, val):
    if len(val) == 0:
        return val
    else:
        return val[ix]
def robust_pickle_dump(data_dict, file_name):
    file_name = os.path.abspath(file_name)
    with open(file_name, 'wb') as f:
        pickle.dump(data_dict, f, pickle.HIGHEST_PROTOCOL)
DTYPE = np.float32
height = 1080  # frame height in pixel
width = 1920  # frame width in pixel
fps = 30.0

In [3]:
pose_base_path = '../../PoseTracking/outputs/TuftsVideos'
actions = ['Sitting', 'Sit-to-Stand', 'Stand-to-Sit', 'Standing', 'Walking']
base_data_path = '../input/TuftsVideos/Posture/test'
max_len = 4  # in seconds
min_len = 0.2
for subj_id in range(32,35,1):
#    for visit_no in [1, 2]:
        visit_no = 1
        annotation_path = '../TuftsVideo_annotations/121601_{:02d}{:02d}'.format(subj_id, visit_no)
        annotation_file_name = '121601_{:02d}{:02d}_Arbitration.txt'.format(subj_id, visit_no)
        pose_data_path = glob.glob(osp.join(pose_base_path, '031216010{:02d}'.format(subj_id), 
                                            'Visit_{:02d}'.format(visit_no),'*.pkl'))
        print('---processing subject {:2d} visit {:2d}'.format(subj_id, visit_no))
# reading the pose results
        with open(pose_data_path[0], 'rb') as res:
            target_dets = pickle.load(res)    # {'target_boxes':[], 'target_keyps':[]}
# reading annotation file
        delimiters = ",","\t", "\n", "\t\t"
        regexpattern = '|'.join(map(re.escape, delimiters))
# read the posture related annotations in a list
        annot_list = []
        with open(osp.join(annotation_path, annotation_file_name), 'rb') as f:
            for line in f:
                temp = re.split(regexpattern, line)
                temp = list(filter(lambda x: x!= '', temp))
                if temp[0]=='Posture':
                    annot_list.append(temp[:7])
            #print(temp[:7])

### Making a list of all actions and save the clip results in the corresponding action folder

# creat proper folders according to the labels if there do not exist
        
        for i in range(len(annot_list)):
            st_t = float(annot_list[i][3])
            end_t = float(annot_list[i][4])
            length = float(annot_list[i][5])
            act = annot_list[i][6]
            if length < min_len:
                print('---Data between {} and {} related to action {} is removed due to the duration below {} seconds'
                      .format(st_t, end_t, act, min_len))
            if act in actions and length > min_len:
                if length <= max_len:
                    print('---Data between {} and {} related to action {} with duration {} is saved'
                      .format(st_t, end_t, act, length))
                    st_fr = int(st_t * fps)
                    end_fr = int(end_t * fps)
                    clip_name = '{:02d}{:02d}_{:06d}_{:06d}.pkl'.format(subj_id, visit_no, st_fr, end_fr)
                    dest_file = osp.join(base_data_path, act, clip_name)
                    clip = dict(target_boxes = target_dets['target_boxes'][st_fr:end_fr],
                           target_keyps = target_dets['target_keyps'][st_fr:end_fr])
                    robust_pickle_dump(clip, dest_file)
                else:
                    clip_no = int(length // max_len)
                    resid = length % max_len
                    print('---Data between {} and {} related to {} is devided to {} clips with length of {} seconds each'
                          .format(st_t, end_t, act, clip_no, max_len))
                    for i in range(clip_no):
                        st_fr = int((st_t + i*max_len) * fps)
                        end_fr = int((st_t + (i+1)*max_len) * fps)-1
                        clip_name = '{:02d}{:02d}_{:06d}_{:06d}.pkl'.format(subj_id, visit_no, st_fr, end_fr)
                        dest_file = osp.join(base_data_path, act, clip_name)
                        clip = dict(target_boxes = target_dets['target_boxes'][st_fr:end_fr],
                        target_keyps = target_dets['target_keyps'][st_fr:end_fr])
                        robust_pickle_dump(clip, dest_file)
                    if resid > min_len:
                        print('---Residual of the data between {} and {} related to action {} with duration {} is saved'
                              .format(st_t, end_t, act, resid))
                        st_fr = int((st_t + clip_no*max_len) * fps)
                        end_fr = int(end_t * fps)
                        clip_name = '{:02d}{:02d}_{:06d}_{:06d}.pkl'.format(subj_id, visit_no, st_fr, end_fr)
                        dest_file = osp.join(base_data_path, act, clip_name)
                        clip = dict(target_boxes = target_dets['target_boxes'][st_fr:end_fr],
                        target_keyps = target_dets['target_keyps'][st_fr:end_fr])
                        robust_pickle_dump(clip, dest_file)
                
    
    

---processing subject 32 visit  1
---Data between 0.0 and 641.474 related to Sitting is devided to 160 clips with length of 4 seconds each
---Residual of the data between 0.0 and 641.474 related to action Sitting with duration 1.474 is saved
---Data between 641.474 and 641.641 related to action PGT N/A is removed due to the duration below 0.2 seconds
---Data between 641.641 and 655.221 related to Sitting is devided to 3 clips with length of 4 seconds each
---Residual of the data between 641.641 and 655.221 related to action Sitting with duration 1.58 is saved
---Data between 655.521 and 949.682 related to Sitting is devided to 73 clips with length of 4 seconds each
---Residual of the data between 655.521 and 949.682 related to action Sitting with duration 2.161 is saved
---Data between 949.682 and 951.016 related to action Sit-to-Stand with duration 1.334 is saved
---Data between 951.016 and 952.318 related to action Sit-to-Stand with duration 1.302 is saved
---Data between 952.318 and

---Data between 0.0 and 512.578 related to Sitting is devided to 128 clips with length of 4 seconds each
---Residual of the data between 0.0 and 512.578 related to action Sitting with duration 0.578 is saved
---Data between 512.945 and 525.291 related to Sitting is devided to 3 clips with length of 4 seconds each
---Residual of the data between 512.945 and 525.291 related to action Sitting with duration 0.346 is saved
---Data between 525.658 and 538.941 related to Sitting is devided to 3 clips with length of 4 seconds each
---Residual of the data between 525.658 and 538.941 related to action Sitting with duration 1.283 is saved
---Data between 538.941 and 539.008 related to action PGT N/A is removed due to the duration below 0.2 seconds
---Data between 539.009 and 549.249 related to Sitting is devided to 2 clips with length of 4 seconds each
---Residual of the data between 539.009 and 549.249 related to action Sitting with duration 2.24 is saved
---Data between 549.249 and 549.415 rela

---Residual of the data between 1432.831 and 1845.109 related to action Sitting with duration 0.278 is saved
---Data between 1845.443 and 2574.071 related to Sitting is devided to 182 clips with length of 4 seconds each
---Residual of the data between 1845.443 and 2574.071 related to action Sitting with duration 0.628 is saved
---Data between 2574.071 and 2574.138 related to action PGT N/A is removed due to the duration below 0.2 seconds
---Data between 2574.138 and 2658.825 related to Sitting is devided to 21 clips with length of 4 seconds each
---Residual of the data between 2574.138 and 2658.825 related to action Sitting with duration 0.687 is saved
---Data between 2659.359 and 2671.236 related to Sitting is devided to 2 clips with length of 4 seconds each
---Residual of the data between 2659.359 and 2671.236 related to action Sitting with duration 3.877 is saved
---Data between 2671.568 and 2672.469 related to action Sitting with duration 0.901 is saved
---Data between 2672.736 and

---Residual of the data between 1902.0 and 2745.946 related to action Sitting with duration 3.946 is saved
---Data between 2745.946 and 2746.08 related to action PGT N/A is removed due to the duration below 0.2 seconds
---Data between 2746.08 and 2746.949 related to action Sitting with duration 0.869 is saved
---Data between 2762.76 and 2763.282 related to action Sitting with duration 0.522 is saved
---Data between 2763.794 and 2764.751 related to action Sitting with duration 0.957 is saved
---Data between 2764.751 and 2768.332 related to action Sit-to-Stand with duration 3.581 is saved
---Data between 2768.332 and 2773.003 related to Walking is devided to 1 clips with length of 4 seconds each
---Residual of the data between 2768.332 and 2773.003 related to action Walking with duration 0.671 is saved
---Data between 2773.003 and 2773.245 related to action Standing with duration 0.242 is saved
---Data between 2773.646 and 2778.834 related to Walking is devided to 1 clips with length of 