In [80]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
#import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder 
#from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
from sklearn.ensemble import GradientBoostingClassifier

In [81]:
# Fixing format of the label
def enc_label(label):
    code = 0
    if label == "right-turn":
        code = 1
    if label == "side":
        code = 2
    if label == "cuban-basic":
        code = 3
    if label == "suzie-q":
        code = 4
    return code

# Define hyperparameters
BATCH_SIZE = 64
EPOCHS = 100
MAX_SEQ_LENGTH = 40   # number of frames per figure

# Import the data
PATH_DATA_TRAIN = "Data_train_validate/Data_train_norm.csv"
PATH_DATA_VAL = "Data_train_validate/Data_val_norm.csv"
data_train = pd.read_csv(PATH_DATA_TRAIN)
data_val = pd.read_csv(PATH_DATA_VAL)

In [82]:
# columns to kee in the model

#cols = ['neck_x', 'neck_y', 
#        'rshoulder_x', 'rshoulder_y',
#        'lshoulder_x', 'lshoulder_y', 
#        'midhip_x', 'midhip_y', 
#        'rhip_x', 'rhip_y', 
#        'rknee_x', 'rknee_y', 
#        'rankle_x', 'rankle_y', 
#        'lhip_x', 'lhip_y', 
#        'lknee_x', 'lknee_y', 
#        'lankle_x', 'lankle_y',
#        'label']

In [83]:
cols = ['nose_x', 'nose_y', 'neck_x',
       'neck_y', 'rshoulder_x', 'rshoulder_y', 'relbow_x', 'relbow_y',
       'rwrist_x', 'rwrist_y', 'lshoulder_x', 'lshoulder_y', 'lelbow_x',
       'lelbow_y', 'lwrist_x', 'lwrist_y', 'midhip_x', 'midhip_y', 'rhip_x',
       'rhip_y', 'rknee_x', 'rknee_y', 'rankle_x', 'rankle_y', 'lhip_x',
       'lhip_y', 'lknee_x', 'lknee_y', 'lankle_x', 'lankle_y', 'reye_x',
       'reye_y', 'leye_x', 'leye_y', 'rear_x', 'rear_y', 'lear_x', 'lear_y',
       'lbigtoe_x', 'lbigtoe_y', 'lsmalltoe_x', 'lsmalltoe_y', 'lheal_x',
       'lheal_y', 'rbigtoe_x', 'rbigtoe_y', 'rsmalltoe_x', 'rsmalltoe_y',
       'rheal_x', 'rheal_y', 'label']

In [84]:
NUM_FEATURES = len(cols) - 1     # number of join coordinates

In [85]:
print(NUM_FEATURES)

50


In [86]:
# Function to select a number of frames per figure and right in the correct format for the mdoel

def transf_data(data):
    # Data preprocessing, get the input X and the label y
    ind_start = data[data['status'] == "S"].index.tolist()
    ind_end = data[data['status'] == "E"].index.tolist()

    # Take intervals between consecutive "S", they define one figure
    X = []
    y = []
    
    # Keep only some columns in the XGBoost-model
    data_copy = data.copy()
    data_copy = data_copy.loc[:, cols].reset_index(drop=True)
   

    for i in range(len(ind_start) - 1):
        X.append(data_copy.iloc[ind_start[i]: ind_end[i], :-1])  # the last 25 (visibility ) + 2
        y.append(data_copy.loc[ind_start[i], 'label'])

    # select frames from the interval
    ind_samp = []

    for i in range(len(ind_start) - 1):
        # Take frames that are evenlly distributed
        aux = np.linspace(ind_start[i]
                          , ind_end[i]
                          , MAX_SEQ_LENGTH
                          , endpoint=False).astype(int)

        # random
        # aux = np.random.randint(ind_start[i], ind_end[i], MAX_SEQ_LENGTH)
        # aux.sort()
        ind_samp.append(aux)

    # Changing format of the data to be compatible with Tensor Flow
    X = [x.loc[ind_samp[ind], :].to_numpy() for (ind, x) in enumerate(X)]
    X = np.array(X)
    X = X.reshape(len(ind_start) - 1, MAX_SEQ_LENGTH * NUM_FEATURES).astype("float32")
    # TODO: decide of the X values need to be normalized
    y = [enc_label(x) for x in y]
    y = np.array(y).astype("float32")

    return X, y



In [87]:
# Train set
X_train, y_train = transf_data(data_train)
X_val, y_val = transf_data(data_val)

In [88]:
X_train.shape

(384, 2000)

In [89]:
clf = GradientBoostingClassifier(n_estimators=150, learning_rate=1.0,
...     max_depth=1, random_state=0).fit(X_train, y_train)

In [90]:
clf.score(X_train, y_train)

1.0

In [91]:
clf.score(X_val, y_val)

0.46808510638297873