In [57]:
# for data
import pandas as pd
import numpy as np

# for plotting
import matplotlib.pyplot as plt
import seaborn as sns

# for statistical tests
import scipy
import statsmodels.formula.api as smf
import statsmodels.api as sm

# for machine learning
from sklearn import model_selection, preprocessing, feature_selection, ensemble, linear_model, metrics, decomposition

from csv import reader
import pickle

In [69]:
NUM_FEATURES = 3
NUM_JOINTS = 20
NUM_FRAMES = 16
FILE_NAME = 'train.csv'

In [70]:
dtf = pd.read_csv(FILE_NAME, header = None)
dtf = dtf.set_index(dtf.columns[0])

dtf = dtf.sample(frac = 1)
X_train = dtf.iloc[:,:-1]
Y_train = dtf.iloc[:,-1:]
X_train

Unnamed: 0_level_0,1,2,3,4,5,6,7,8,9,10,...,951,952,953,954,955,956,957,958,959,960
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
3554,-2.15510,-3.413700,4.00260,-2.05380,-0.051400,7.49780,-2.281400,3.94750,13.39100,-1.33270,...,0.79524,2.45550,26.1540,-16.4620,1.31290,6.26860,-1.46780,5.21640,6.28330,-3.595800
2395,0.40081,1.923800,1.04880,0.38835,1.925800,0.24990,0.049842,1.98950,-0.30944,-0.59880,...,0.12391,-0.94008,1.3146,2.4727,0.33367,-0.32259,0.55242,-0.15783,-0.65279,0.631330
4581,1.26270,4.999300,3.37470,0.63942,2.893900,2.50700,-0.061666,0.55866,0.68053,-0.67686,...,5.19020,-0.38982,3.5979,2.7779,7.04750,1.77440,1.36990,0.82441,4.32320,-0.638680
7249,0.46435,3.248800,-2.14750,-0.62461,0.958280,-0.93527,-1.168700,-1.15880,0.24902,0.11588,...,5.13330,-58.19400,-21.7080,-131.2200,-1.17690,7.20930,-32.35200,-1.82860,6.81570,-33.969000
7044,6.04780,2.255000,2.88400,6.78260,1.407900,3.40390,7.374500,0.42711,3.89470,5.41940,...,23.63500,-5.23440,23.7270,12.2250,1.07360,-0.91200,5.39740,21.06900,-15.97000,59.083000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7155,1.26750,-0.092339,0.57568,1.88650,-0.074304,0.89309,3.702900,-0.95730,1.66430,2.60930,...,-12.04200,9.37310,-3.2766,-9.0456,4.76050,1.69460,-7.48960,-2.10650,-1.13120,4.900500
2969,2.34230,-5.364100,5.16830,5.48460,-6.777800,-1.60350,9.599900,-9.64160,-9.09990,3.17130,...,-1.49040,-1.67070,1.1150,2.8779,-2.80390,3.48620,-1.03690,-9.78960,3.52450,-0.090165
2832,-0.40842,0.240920,-0.88425,-1.04440,-0.126800,0.63800,-1.750500,-0.42044,1.85860,-3.91140,...,15.40700,-3.35420,-8.8085,12.2730,-1.23130,0.91362,-8.72510,4.08490,2.01340,-2.440500
3286,-7.87870,-1.614000,-5.31350,-0.37598,-1.180500,-3.47290,5.284000,-1.63440,-3.07230,6.66730,...,17.42900,0.10522,41.8340,-30.2530,-1.90860,-4.11890,-6.02470,-1.26680,-4.87510,-4.381200


In [51]:
def read_xyz(row):
    skeleton_data, label = read_skeleton(row)
    
    data = np.zeros((NUM_FRAMES, NUM_JOINTS, NUM_FEATURES))
    for m, i in enumerate(skeleton_data['frame_info']):
        for n, j in enumerate(i['joint_info']):
            feature_info = j['feature_info']
            data[m, n, :] = [feature_info['x'], feature_info['y'], feature_info['z']]

    data = data.transpose(2, 0, 1)
    return data, label

In [66]:
def read_skeleton(row):
    data = row[1:]
    label = int(row[-1])
    skeleton_data = {}
    skeleton_data['num_frame'] = NUM_FRAMES
    skeleton_data['frame_info'] = []
    
    for frame in range(NUM_FRAMES):
        offset = NUM_JOINTS * NUM_FEATURES
        data_in_frame = row[1+frame*offset:1+(frame+1)*offset]
        frame_info = {}
        frame_info['num_joints'] = NUM_JOINTS
        frame_info['joint_info'] = []
        
        for feature in range(NUM_JOINTS):
            offset = NUM_FEATURES
            data_in_joint = data_in_frame[feature*offset:(feature+1)*offset]
            joint_info = {}
            joint_info['num_features'] = NUM_FEATURES
            joint_info['feature_info'] = {
                k: float(v)
                for k, v in zip(['x', 'y', 'z'], data_in_joint)
            }
            frame_info['joint_info'].append(joint_info)
                    
        skeleton_data['frame_info'].append(frame_info)
    return skeleton_data, label

In [53]:
def normalisation(data):
    N, C, T, V = data.shape #based on st-GCN, N - num of data, C - num of features for each joint, T - num of frames, V - num of joints, we dont have M which is number of people in one frame
    transpose_data = np.transpose(data, [0, 2, 3, 1])  # N, C, T, V to  N, T, V, C
    

In [71]:
fp = np.zeros((len(dtf), NUM_FEATURES, NUM_FRAMES, NUM_JOINTS, 1), dtype=np.float32)
    #construct a matrix, with num of data, num of features for each joint, num of frames, num of joints, num of people(always 1 in our case)
with open(FILE_NAME, 'r') as f:
        csv_reader = reader(f)
        labels = []
        for i, row in enumerate(csv_reader):
            data, label = read_xyz(row)
            labels.append(label)
            fp[i, :, :, :, 0] = data
#fp = normalisation(fp)
np.save('data.npy', fp)

with open('label.pkl', 'wb') as f:
    pickle.dump(labels, f)

In [72]:
unpickled_df = pd.read_pickle("label.pkl")
unpickled_df

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
