In [3]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import import_ipynb
from rotation import rotation_matrix, unit_vector, angle_between, x_rotation, y_rotation, z_rotation

# for machine learning
from sklearn import model_selection, preprocessing, feature_selection, ensemble, linear_model, metrics, decomposition
from imblearn.over_sampling import SMOTE
from imblearn.combine import SMOTETomek, SMOTEENN
from imblearn.under_sampling import TomekLinks, RandomUnderSampler, EditedNearestNeighbours, AllKNN

import csv
from csv import reader
import pickle

importing Jupyter notebook from rotation.ipynb


In [4]:
NUM_FEATURES = 3
NUM_JOINTS = 20
NUM_FRAMES = 16
FILE_NAME = '../train.csv'
test_FILE_NAME = '../test.csv'

In [5]:
dtf = pd.read_csv(FILE_NAME, header = None)
dtf = dtf.set_index(dtf.columns[0])

dtf = dtf.sample(frac = 1)
X = dtf.iloc[:,:-1]
y = dtf.iloc[:,-1:]

dtf_test = pd.read_csv(test_FILE_NAME, header = None)
dtf_test = dtf_test.set_index(dtf.columns[0])

dtf_test = dtf_test.sample(frac = 1)

In [6]:
def read_xyz(row):
    skeleton_data, label = read_skeleton(row)
    
    data = np.zeros((NUM_FRAMES, NUM_JOINTS, NUM_FEATURES))
    for m, i in enumerate(skeleton_data['frame_info']):
        for n, j in enumerate(i['joint_info']):
            feature_info = j['feature_info']
            data[m, n, :] = [feature_info['x'], feature_info['y'], feature_info['z']]

    data = data.transpose(2, 0, 1)
    return data, label

In [7]:
def read_skeleton(row):
    label = row[-1]
    #label = -1
    skeleton_data = {}
    skeleton_data['index'] = row[0]
    skeleton_data['num_frame'] = NUM_FRAMES
    skeleton_data['frame_info'] = []
    
    for frame in range(NUM_FRAMES):
        offset = NUM_JOINTS * NUM_FEATURES
        data_in_frame = row[frame*offset:(frame+1)*offset]
        frame_info = {}
        frame_info['num_joints'] = NUM_JOINTS
        frame_info['joint_info'] = []
        
        for feature in range(NUM_JOINTS):
            offset = NUM_FEATURES
            data_in_joint = data_in_frame[feature*offset:(feature+1)*offset]
            joint_info = {}
            joint_info['num_features'] = NUM_FEATURES
            joint_info['feature_info'] = {
                k: float(v)
                for k, v in zip(['x', 'y', 'z'], data_in_joint)
            }
            frame_info['joint_info'].append(joint_info)
                    
        skeleton_data['frame_info'].append(frame_info)
    return skeleton_data, label

In [8]:
def normalisation(data):
    N, C, T, V, M = data.shape
    s = np.transpose(data, [0, 4, 2, 3, 1])
    zaxis=[0, 1]
    xaxis=[8, 4]
    
    print('sub the center joint #1 (spine joint in ntu and neck joint in kinetics)')
    for i_s, skeleton in enumerate(tqdm(s)):
        if skeleton.sum() == 0:
            continue
        main_body_center = skeleton[0][:, 1:2, :].copy()
        for i_p, person in enumerate(skeleton):
            if person.sum() == 0:
                continue
            mask = (person.sum(-1) != 0).reshape(T, V, 1)
            s[i_s, i_p] = (s[i_s, i_p] - main_body_center) * mask

    print('parallel the bone between hip(jpt 0) and spine(jpt 1) of the first person to the z axis')
    for i_s, skeleton in enumerate(tqdm(s)):
        if skeleton.sum() == 0:
            continue
        joint_bottom = skeleton[0, 0, zaxis[0]]
        joint_top = skeleton[0, 0, zaxis[1]]
        axis = np.cross(joint_top - joint_bottom, [0, 0, 1])
        angle = angle_between(joint_top - joint_bottom, [0, 0, 1])
        matrix_z = rotation_matrix(axis, angle)
        for i_p, person in enumerate(skeleton):
            if person.sum() == 0:
                continue
            for i_f, frame in enumerate(person):
                if frame.sum() == 0:
                    continue
                for i_j, joint in enumerate(frame):
                    s[i_s, i_p, i_f, i_j] = np.dot(matrix_z, joint)

    print('parallel the bone between right shoulder(jpt 8) and left shoulder(jpt 4) of the first person to the x axis')
    for i_s, skeleton in enumerate(tqdm(s)):
        if skeleton.sum() == 0:
            continue
        joint_rshoulder = skeleton[0, 0, xaxis[0]]
        joint_lshoulder = skeleton[0, 0, xaxis[1]]
        axis = np.cross(joint_rshoulder - joint_lshoulder, [1, 0, 0])
        angle = angle_between(joint_rshoulder - joint_lshoulder, [1, 0, 0])
        matrix_x = rotation_matrix(axis, angle)
        for i_p, person in enumerate(skeleton):
            if person.sum() == 0:
                continue
            for i_f, frame in enumerate(person):
                if frame.sum() == 0:
                    continue
                for i_j, joint in enumerate(frame):
                    s[i_s, i_p, i_f, i_j] = np.dot(matrix_x, joint)

    data = np.transpose(s, [0, 4, 2, 3, 1])
    return data
    

In [9]:
# Stratified Train test split
sss = model_selection.StratifiedShuffleSplit(n_splits=1, test_size = 0.2, random_state=None)
for train_index, test_index in sss.split(X, y):
    # print("TRAIN:", train_index, "TEST:", test_index)
    X_train, X_test = X.iloc[train_index], X.iloc[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]

scaler = preprocessing.MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
#submission_test = scaler.transform(dtf_test)

In [11]:

smote = SMOTE(sampling_strategy='minority', random_state=42)

# fit predictor and target variable
x_smote, y_smote = smote.fit_resample(X_train, y_train)
x_smote.shape

# Try these below instead --- Jin Hong

# x_smote, y_smote = SMOTE(random_state=42).fit_resample(X_train, y_train)
# x_smote, y_smote = SMOTETomek(random_state=42).fit_resample(X_train, y_train)
# x_smote y_smote = SMOTEENN(random_state=42).fit_resample(X_train, y_train)


# ------------------------ Divider ------------------------------#
# ------------- Manual downsample + SMOTETomek ------------------#

# strategy = Counter(y_train)
# for key, val in strategy.items():
#     if key < 11:
#         strategy[key] = 200
        
#     else:
#         strategy[key] = val

# x_rus, y_rus = RandomUnderSampler(sampling_strategy=strategy, random_state=42).fit_resample(X_train, y_train)
# x_smote, y_smote = SMOTETomek(sampling_strategy='all', random_state=42).fit_resample(x_rus, y_rus)

(7963, 960)

In [None]:
fp = np.zeros((len(x_smote), NUM_FEATURES, NUM_FRAMES, NUM_JOINTS, 1), dtype=np.float32) #construct a matrix, with num of data, num of features for each joint, num of frames, num of joints, num of people(always 1 in our case)
for i, row in enumerate(x_smote):
    data, label = read_xyz(row)
    fp[i, :, :, :, 0] = data
fp = normalisation(fp)
np.save('train_data_normalisation.npy', fp)

In [114]:
labels = []
for i, row in enumerate(y_smote.values):
    labels.append(row[0])
with open('label.pkl', 'wb') as f:
    pickle.dump(labels, f)

In [61]:
fp = np.zeros((len(dtf), NUM_FEATURES, NUM_FRAMES, NUM_JOINTS, 1), dtype=np.float32)
    #construct a matrix, with num of data, num of features for each joint, num of frames, num of joints, num of people(always 1 in our case)
with open(FILE_NAME, 'r') as f:
        csv_reader = reader(f)
        labels = []
        for i, row in enumerate(csv_reader):
            data, label = read_xyz(row)
            labels.append(int(label))
            fp[i, :, :, :, 0] = data
#fp = normalisation(fp)
np.save('train_data.npy', fp)
#np.save('test_data.npy', fp)

# with open('label.pkl', 'wb') as f:
#     pickle.dump(labels, f)

962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961


962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961


962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961


962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961


962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961


962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961


962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961


962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961


962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961
962
961


In [115]:
unpickled_df = pd.read_pickle("label.pkl")
unpickled_df

[9,
 20,
 17,
 8,
 39,
 8,
 3,
 1,
 16,
 10,
 9,
 7,
 10,
 16,
 35,
 4,
 2,
 23,
 6,
 12,
 7,
 36,
 2,
 34,
 7,
 2,
 30,
 7,
 23,
 10,
 16,
 2,
 24,
 33,
 26,
 9,
 10,
 5,
 21,
 3,
 3,
 8,
 15,
 2,
 10,
 9,
 1,
 7,
 45,
 10,
 24,
 2,
 9,
 17,
 11,
 7,
 26,
 1,
 4,
 48,
 9,
 9,
 2,
 3,
 17,
 21,
 5,
 3,
 15,
 4,
 19,
 15,
 8,
 17,
 39,
 8,
 3,
 3,
 7,
 2,
 7,
 5,
 1,
 14,
 28,
 10,
 10,
 16,
 7,
 6,
 41,
 3,
 10,
 9,
 3,
 16,
 6,
 4,
 24,
 11,
 2,
 10,
 38,
 1,
 1,
 5,
 42,
 22,
 5,
 10,
 5,
 18,
 5,
 13,
 32,
 27,
 13,
 10,
 1,
 9,
 10,
 26,
 20,
 5,
 13,
 1,
 2,
 12,
 2,
 1,
 2,
 17,
 4,
 6,
 1,
 19,
 3,
 6,
 14,
 4,
 5,
 6,
 6,
 10,
 17,
 7,
 5,
 10,
 8,
 17,
 3,
 5,
 2,
 26,
 24,
 8,
 9,
 6,
 11,
 20,
 20,
 9,
 8,
 29,
 1,
 22,
 14,
 22,
 29,
 22,
 5,
 19,
 5,
 19,
 9,
 22,
 10,
 4,
 7,
 2,
 39,
 9,
 6,
 5,
 40,
 8,
 10,
 16,
 6,
 19,
 8,
 16,
 28,
 21,
 27,
 9,
 16,
 4,
 10,
 2,
 2,
 8,
 12,
 18,
 16,
 6,
 9,
 6,
 15,
 2,
 23,
 18,
 14,
 4,
 3,
 17,
 24,
 19,
 22,
 6,
 38,
 2,
 2,
