In [13]:
import numpy as np
import math
import random
import pandas as pd
from tqdm import tqdm

In [2]:
def read_train_test_list(path):
    lines = []
    with open(path) as f:
        lines.append(f.readlines())
    f.close()
    lines = np.hstack(lines)
    train = []
    test = []
    for line in lines[1].split(', '):
        train.append(line)
    train = np.stack(train)
    for line in lines[3].split(', '):
        test.append(line)
    test = np.stack(test)
    return train,test

def read_skeleton(path):
    lines = []
    with open(path) as f:
        lines.append(f.readlines())
    f.close()
    poses = []
    for p in lines[0]:
        p = p.split('\n')[0]
        p = [float(pose) for pose in p.split(' ')]
        p = np.stack(p)
        p = p.reshape([-1,3])
        poses.append(p)
    poses = np.stack(poses)
    return poses

def read_label(path):
    lines = []
    labels = []
    with open(path) as f:
        lines.append(f.readlines())
    f.close()
    for l in lines[0]:
        l = l.split('\n')[0]
        l = [int(label) for label in l.split(',')]
        labels.append(l)
    return labels

## Reading the train list and test list

In [3]:
root_dir = '/mnt/nasbi/no-backups/datasets/pose_action/PKUMMD/'
split_folder = root_dir+'split/'
skeleton_folder = root_dir+'skeleton/'
label_folder = root_dir+'label/'

In [4]:
cs_path = split_folder+'cross-subject.txt'
cv_path = split_folder+'cross-view.txt'

cs_train_list, cs_test_list = read_train_test_list(cs_path)
cv_train_list, cv_test_list = read_train_test_list(cv_path)

## Extracing the poses and save them

In [14]:
data_dir = '/home/fan/projects/pose/skeleton/data/PKU/'

In [8]:
cs_train_poses = pd.Series()
with tqdm(total=len(cs_train_list)-1) as pbar:
    for f in cs_train_list[:-1]: 
        cs_train_poses[f] = read_skeleton(skeleton_folder+f+'.txt')
        pbar.update(1)

cs_train_poses.to_pickle(data_dir+'cs_train_poses.pkl',compression='gzip')

100%|██████████| 944/944 [27:19<00:00,  1.83s/it]


In [9]:
cs_test_poses = pd.Series()
with tqdm(total=len(cs_test_list)-1) as pbar:
    for f in cs_test_list[:-1]: 
        cs_test_poses[f] = read_skeleton(skeleton_folder+f+'.txt')
        pbar.update(1)

cs_test_poses.to_pickle(data_dir+'cs_test_poses.pkl',compression='gzip')    

100%|██████████| 131/131 [04:17<00:00,  2.52s/it]


In [10]:
cv_train_poses = pd.Series()
with tqdm(total=len(cv_train_list)-1) as pbar:
    for f in cv_train_list[:-1]: 
        cv_train_poses[f] = read_skeleton(skeleton_folder+f+'.txt')
        pbar.update(1)

cv_train_poses.to_pickle(data_dir+'cv_train_poses.pkl',compression='gzip')

100%|██████████| 717/717 [21:47<00:00,  1.75s/it]


In [11]:
cv_test_poses = pd.Series()
with tqdm(total=len(cv_test_list)-1) as pbar:
    for f in cv_test_list[:-1]: 
        cv_test_poses[f] = read_skeleton(skeleton_folder+f+'.txt')
        pbar.update(1)

cv_test_poses.to_pickle(data_dir+'cv_test_poses.pkl',compression='gzip')    

100%|██████████| 358/358 [11:43<00:00,  1.89s/it]


In [15]:
cs_train_poses = pd.read_pickle(data_dir+'cs_train_poses.pkl',compression='gzip')

# Extracing the labels and saving them

In [38]:
cv_test_labels = pd.Series() 
with tqdm(total=len(cv_test_list)-1) as pbar:
    for f in cv_test_list[:-1]:
        cv_test_labels[f] = read_label(label_folder+f+'.txt')
        pbar.update(1)
cv_test_labels.to_pickle(data_dir+'cv_test_labels.pkl',compression='gzip')     

100%|██████████| 358/358 [00:01<00:00, 316.58it/s]


In [40]:
cv_train_labels = pd.Series() 
with tqdm(total=len(cv_train_list)-1) as pbar:
    for f in cv_train_list[:-1]:
        cv_train_labels[f] = read_label(label_folder+f+'.txt')
        pbar.update(1)
cv_train_labels.to_pickle(data_dir+'cv_train_labels.pkl',compression='gzip') 

100%|██████████| 717/717 [00:02<00:00, 340.13it/s]


In [43]:
cs_test_labels = pd.Series() 
with tqdm(total=len(cs_test_list)-1) as pbar:
    for f in cs_test_list[:-1]:
        cs_test_labels[f] = read_label(label_folder+f+'.txt')
        pbar.update(1)
cs_test_labels.to_pickle(data_dir+'cs_test_labels.pkl',compression='gzip')  

100%|██████████| 131/131 [00:00<00:00, 606.88it/s]


In [44]:
cs_train_labels = pd.Series() 
with tqdm(total=len(cs_train_list)-1) as pbar:
    for f in cs_train_list[:-1]:
        cs_train_labels[f] = read_label(label_folder+f+'.txt')
        pbar.update(1)
cs_train_labels.to_pickle(data_dir+'cs_train_labels.pkl',compression='gzip') 

100%|██████████| 944/944 [00:01<00:00, 509.47it/s]
