In [1]:
# for data
import pandas as pd
import numpy as np

# for plotting
import matplotlib.pyplot as plt
import seaborn as sns

# for statistical tests
import scipy
import statsmodels.formula.api as smf
import statsmodels.api as sm

# for machine learning
from sklearn import model_selection, preprocessing, feature_selection, ensemble, linear_model, metrics, decomposition

from csv import reader

In [39]:
NUM_FEATURES = 3
NUM_JOINTS = 20
NUM_FRAMES = 16
FILE_NAME = 'train.csv'

In [40]:
dtf = pd.read_csv(FILE_NAME, header = None)
dtf = dtf.set_index(dtf.columns[0])

dtf = dtf.sample(frac = 1)
X_train = dtf.iloc[:,:-1]
Y_train = dtf.iloc[:,-1:]
X_train

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,951,952,953,954,955,956,957,958,959,960
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1403,-11.258000,2.78660,-3.999400,-3.85580,1.54040,-2.276000,6.714200,3.24890,4.58880,10.90300,...,180.4500,-65.1230,209.3200,61.7110,2.5790,208.58000,96.4790,11.27400,207.60000,63.3190
4277,-18.158000,8.38960,-4.724800,-7.55610,4.61370,-0.439890,1.520500,1.50210,1.81470,0.89008,...,66.2160,32.1540,-4.6373,-1.8971,-6.9919,3.25280,-5.7017,16.80600,-1.07180,-87.6550
6929,-0.408690,0.49399,-0.618760,1.13690,0.15342,0.063659,2.773000,0.49845,0.90459,2.10390,...,-3.4070,-42.0700,-12.3770,-6.0775,30.9800,0.82884,3.1034,23.35000,1.46540,6.5448
8697,-9.500600,3.25010,-7.068600,-4.28300,2.05120,-4.431200,-0.069019,1.03990,-1.25790,0.38387,...,7.5393,1.0368,-3.9418,-10.7680,-10.1790,-2.29240,-17.1760,-3.81540,-2.28380,-28.8340
3717,0.516920,1.31350,-1.901700,2.02740,2.61280,-2.132600,5.821000,4.46720,-3.56550,3.92210,...,-28.0560,9.4634,-4.4178,-1.0324,2.7195,6.11190,-1.1214,6.42120,1.60300,-4.8217
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8744,1.942600,0.25684,2.449400,0.56727,-1.07820,-2.221800,-1.887000,-3.06120,-9.23290,-2.51270,...,198.3000,2.4006,-93.3590,71.4200,-1.0357,-78.44100,34.9490,-32.98000,-9.15640,54.3950
6326,0.079234,-0.80433,-2.492900,1.28310,-0.34357,-0.654510,1.732500,-0.68448,1.12130,0.70978,...,-5.8900,2.0468,-2.8890,-4.7754,-1.0007,-0.79301,-2.7299,-0.58793,-0.85626,-2.3231
5285,-0.187490,0.14985,-0.007812,-0.23791,0.16121,0.287630,-0.137070,0.25283,0.65266,-0.67326,...,-10.0460,-5.0715,5.0004,-10.7140,-3.2136,-4.32430,2.1966,-9.76160,-3.79240,2.3529
7008,-9.115500,3.14300,-0.932120,-5.48990,1.38080,-0.396010,-1.909600,0.48509,-0.63815,-1.29570,...,-19.9450,-56.0270,-13.5460,-86.3950,49.7630,-12.17100,-93.5710,58.26500,23.76100,-166.0400


In [4]:
def read_xyz(row):
    skeleton_data = read_skeleton(row)
    
    data = np.zeros((NUM_FRAMES, NUM_JOINTS, NUM_FEATURES))
    for m, i in enumerate(skeleton_data['frame_info']):
        for n, j in enumerate(i['joint_info']):
            feature_info = j['feature_info']
            data[m, n, :] = [feature_info['x'], feature_info['y'], feature_info['z']]

    data = data.transpose(2, 0, 1)
    return data

In [5]:
def read_skeleton(row):
    data = row[1:]
    label = row[:1]
    skeleton_data = {}
    skeleton_data['num_frame'] = NUM_FRAMES
    skeleton_data['frame_info'] = []
    
    for frame in range(NUM_FRAMES):
        offset = NUM_JOINTS * NUM_FEATURES
        data_in_frame = row[1+frame*offset:1+(frame+1)*offset]
        frame_info = {}
        frame_info['num_joints'] = NUM_JOINTS
        frame_info['joint_info'] = []
        
        for feature in range(NUM_JOINTS):
            offset = NUM_FEATURES
            data_in_joint = data_in_frame[feature*offset:(feature+1)*offset]
            joint_info = {}
            joint_info['num_features'] = NUM_FEATURES
            joint_info['feature_info'] = {
                k: float(v)
                for k, v in zip(['x', 'y', 'z'], data_in_joint)
            }
            frame_info['joint_info'].append(joint_info)
                    
        skeleton_data['frame_info'].append(frame_info)
    return skeleton_data

In [9]:
def normalisation(data):
    N, C, T, V = data.shape #based on st-GCN, N - num of data, C - num of features for each joint, T - num of frames, V - num of joints, we dont have M which is number of people in one frame
    transpose_data = np.transpose(data, [0, 2, 3, 1])  # N, C, T, V to  N, T, V, C
    

In [42]:
fp = np.zeros((len(dtf), NUM_FEATURES, NUM_FRAMES, NUM_JOINTS, 1), dtype=np.float32)
    #construct a matrix, with num of data, num of features for each joint, num of frames, num of joints, num of people(always 1 in our case)
with open(FILE_NAME, 'r') as f:
        csv_reader = reader(f)
        for i, row in enumerate(csv_reader):
            data = read_xyz(row)
            fp[i, :, :, :, 0] = data
#fp = normalisation(fp)
np.save('data.npy', fp)

In [44]:
unpickled_df = pd.read_pickle("train_label.pkl")
unpickled_df

(['S001C001P001R001A001.skeleton',
  'S001C001P004R001A049.skeleton',
  'S001C001P005R001A007.skeleton',
  'S001C001P002R001A020.skeleton'],
 [0, 48, 6, 19])