In [48]:
%matplotlib notebook
import pandas as pd
import numpy as np
import math
import statistics
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import SGDClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC, LinearSVC
from sklearn import svm
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.preprocessing import StandardScaler, LabelEncoder, normalize
import matplotlib.pyplot as plt
import seaborn as sn
import matplotlib
from mlxtend.plotting import plot_confusion_matrix
import warnings
from scipy.signal import savgol_filter, find_peaks, correlate
from sklearn.decomposition import PCA
import scipy.fftpack as FFT
from sklearn.utils import shuffle
import joblib
import scipy.stats as stats
warnings.filterwarnings('ignore')
matplotlib.rcParams['figure.figsize'] = [12,5]

In [81]:
def sampling_freq(df):
    start = df['time'].iloc[0]
    sum_samples = 0
    index = 0
    while  (start + pd.to_timedelta(1, unit='s')) < df['time'].iloc[-1]:
        end =  start + pd.to_timedelta(1, unit='s')
        sum_samples += df[(df['time'] >= start) & (df['time'] < end)].shape[0]
        start += pd.to_timedelta(1, unit='s')
        index += 1
    return sum_samples/index

def convert_to_float(x):
    try:
        return np.float(x)
    except:
        return np.nan

def convert_to_datetime(x):
    dt = datetime.fromtimestamp(x // 1000000000)
    s = dt.strftime('%Y-%m-%d %H:%M:%S')
    s += '.' + str(int(x % 1000000000)).zfill(9)
    return s

def heatmap(cm, labels):
    df_cm = pd.DataFrame(cm, index=labels, columns=labels)
    plt.figure(figsize = (10,7))
    sn.heatmap(df_cm, annot=True)

    plt.show()
    
def preprocess(data, activity, drop_interval_begin = 3, drop_interval_end = 3):
    #convert nanoseconds to date 
    data['time'] = data['time'].apply(convert_to_datetime) 
    data['time'] = pd.to_datetime(data['time'])

#convert to same data type
    for i in range(1, 4): 
        data[column_names_org[i]] = data[column_names_org[i]].apply(convert_to_float)
    
#drop rows with NaN values 
    data.dropna(axis=0, how='any', inplace=True) #TODO: invullen met mean, mod of median / interpolatie

#drop duplicates 
    data.drop_duplicates(subset=None, keep='first', inplace = True)
    
    #drop first and last 3 sec
    indexFirst = data[ (data['time'].iloc[0]+ pd.to_timedelta(drop_interval_begin, unit='s')) > data['time'] ].index
    data.drop(indexFirst , inplace=True)
    indexLast = data[ (data['time'].iloc[-1]- pd.to_timedelta(drop_interval_end, unit='s')) < data['time'] ].index
    data.drop(indexLast , inplace=True)
    
    #add activity label
    data['activity'] = activity
    
    return data

#TODO: verschillende activiteiten in 1 frame
def get_frames(df, frame_size, hop_size):

    N_FEATURES = 3

    frames = []
    labels = []
    for i in range(0, len(df) - frame_size, hop_size):
        x = df['x'].values[i: i + frame_size]
        y = df['y'].values[i: i + frame_size]
        z = df['z'].values[i: i + frame_size]
       
        # Retrieve the most often used label in this segment
        label = stats.mode(df['activity'][i: i + frame_size])[0][0]
        frames.append([x, y, z])
        labels.append(label)

   #print("ORG:",frames)
    # Bring the segments into a better shape
    frames = np.asarray(frames)#.reshape(-1, frame_size, N_FEATURES)
    print("NEW:",frames)
    labels = np.asarray(labels)

    return frames, labels

In [50]:
column_names_org = ['time', 'x', 'y', 'z']
moves = ["jump_fast", "jump_slow", "side_swing", "cross_over", "forward_180"]
moves_df = {}

# PREPROCESSING

## jump fast

In [51]:
jump_fast_files = ["jump_fast.csv", "jump_fast (meting2).csv"]
jump_fast_df = []

In [52]:
for f in jump_fast_files:
    jump_fast = pd.read_csv(r"C:\Users\Elise\Documents\unif\master\semester2\masterproef\gitProject\thesis\data\\" + f, sep=';', header=0, skipinitialspace=True)
    jump_fast = preprocess(jump_fast, "jump_fast")
    jump_fast_df.append(jump_fast)

In [53]:
#preserve raw data
jump_fast_raw =  pd.concat(jump_fast_df, ignore_index=True)
moves_df["jump_fast"] = jump_fast_raw
jump_fast_raw.head()

Unnamed: 0,time,x,y,z,activity
0,1970-01-01 01:23:31.161128960,-13.727624,-5.393422,10.4351,jump_fast
1,1970-01-01 01:23:31.179347968,-14.868999,-8.69791,10.521241,jump_fast
2,1970-01-01 01:23:31.198877696,-17.388641,-6.417551,2.141576,jump_fast
3,1970-01-01 01:23:31.218669568,-28.742586,-34.614571,9.489935,jump_fast
4,1970-01-01 01:23:31.237543936,-12.117254,-13.351951,8.276773,jump_fast


In [54]:
freq = {}

In [55]:
freq["jump_fast"] = 0
for df in jump_fast_df:
    f = sampling_freq(df)
    freq["jump_fast"] += f
freq["jump_fast"] /= len(jump_fast_df)
freq

{'jump_fast': 51.828947368421055}

## jump slow

In [56]:
jump_slow_files = ["jump_slow.csv", "jump_slow1 (meting2).csv"]
jump_slow_df = []

In [57]:
for f in jump_slow_files:
    d = pd.read_csv(r"C:\Users\Elise\Documents\unif\master\semester2\masterproef\gitProject\thesis\data\\" + f, sep=';', header=0, skipinitialspace=True)
    d = preprocess(d, "jump_slow")
    jump_slow_df.append(d)

In [58]:
#preserve raw data
jump_slow_raw =  pd.concat(jump_slow_df, ignore_index=True)
moves_df["jump_slow"] = jump_slow_raw
jump_slow_raw.head()

Unnamed: 0,time,x,y,z,activity
0,1970-01-01 01:22:43.384205312,5.812166,-7.269395,-5.000999,jump_slow
1,1970-01-01 01:22:43.403997184,-1.052842,-5.4341,-2.892923,jump_slow
2,1970-01-01 01:22:43.422347264,-4.833502,-8.195417,-3.342773,jump_slow
3,1970-01-01 01:22:43.442663424,-8.176275,-9.966106,-6.037092,jump_slow
4,1970-01-01 01:22:43.461931008,-7.305287,-7.922636,-9.257831,jump_slow


In [59]:
freq["jump_slow"] = 0
for df in jump_slow_df:
    f = sampling_freq(df)
    freq["jump_slow"] += f
freq["jump_slow"] /= len(jump_slow_df)
freq

{'jump_fast': 51.828947368421055, 'jump_slow': 51.85828877005348}

## side swing

In [60]:
side_swing_files = ["side_swing.csv", "side_swing2 (meting2).csv", "side_swing3 (meting2).csv"]
side_swing_df = []

In [61]:
for f in side_swing_files:
    d = pd.read_csv(r"C:\Users\Elise\Documents\unif\master\semester2\masterproef\gitProject\thesis\data\\" + f, sep=';', header=0, skipinitialspace=True)
    d = preprocess(d, "side_swing")
    side_swing_df.append(d)

In [62]:
#preserve raw data
side_swing_raw =  pd.concat(side_swing_df, ignore_index=True)
moves_df["side_swing"] = side_swing_raw
side_swing_raw.head()

Unnamed: 0,time,x,y,z,activity
0,1970-01-01 04:25:01.846446080,-4.242475,-8.889337,2.167897,side_swing
1,1970-01-01 04:25:01.859028992,-4.242475,-8.889337,2.167897,side_swing
2,1970-01-01 04:25:01.871611904,-5.032106,-9.166903,3.656233,side_swing
3,1970-01-01 04:25:01.887340544,-4.862216,-9.195618,5.790631,side_swing
4,1970-01-01 04:25:01.911457792,-4.972285,-10.100104,5.395815,side_swing


In [63]:
freq["side_swing"] = 0
for df in side_swing_df:
    f = sampling_freq(df)
    freq["side_swing"] += f
freq["side_swing"] /= len(side_swing_df)
freq

{'jump_fast': 51.828947368421055,
 'jump_slow': 51.85828877005348,
 'side_swing': 52.17316017316017}

## cross over

In [64]:
cross_over_files = ["cross_over2.csv", "cross_over3.csv", "cross_over4.csv", "cross_over5.csv"]
cross_over_df = []

In [65]:
for f in cross_over_files:
    d = pd.read_csv(r"C:\Users\Elise\Documents\unif\master\semester2\masterproef\gitProject\thesis\data\\" + f, sep=';', header=0, skipinitialspace=True)
    d = preprocess(d, "cross_over")
    cross_over_df.append(d)

In [66]:
#preserve raw data
cross_over_raw =  pd.concat(cross_over_df, ignore_index=True)
moves_df["cross_over"] = cross_over_raw
cross_over_raw.head()

Unnamed: 0,time,x,y,z,activity
0,1970-01-01 11:20:00.582410240,-2.160719,-6.233303,-14.897714,cross_over
1,1970-01-01 11:20:00.599187456,-3.905087,-10.815558,-13.521841,cross_over
2,1970-01-01 11:20:00.620158976,-6.647261,-13.981263,-12.650853,cross_over
3,1970-01-01 11:20:00.641130496,-10.040284,-15.531812,-11.734403,cross_over
4,1970-01-01 11:20:00.657907712,-7.441679,-21.116659,-14.184652,cross_over


In [67]:
freq["cross_over"] = 0
for df in cross_over_df:
    f = sampling_freq(df)
    freq["cross_over"] += f
freq["cross_over"] /= len(cross_over_df)
freq

{'jump_fast': 51.828947368421055,
 'jump_slow': 51.85828877005348,
 'side_swing': 52.17316017316017,
 'cross_over': 52.37883116883117}

## forward 180

In [68]:
forward_180_files = ["forward_180_2.csv"]
forward_180_df = []

In [69]:
for f in forward_180_files:
    d = pd.read_csv(r"C:\Users\Elise\Documents\unif\master\semester2\masterproef\gitProject\thesis\data\\" + f, sep=';', header=0, skipinitialspace=True)
    d = preprocess(d, "forward_180")
    forward_180_df.append(d)

In [70]:
#preserve raw data
forward_180_raw = pd.concat(forward_180_df, ignore_index=True)
moves_df["forward_180"] = forward_180_raw
forward_180_raw.head()

Unnamed: 0,time,x,y,z,activity
0,1970-01-01 11:32:59.263336448,-18.680767,-12.942778,5.675776,forward_180
1,1970-01-01 11:32:59.284307968,-16.331015,-10.521241,5.505885,forward_180
2,1970-01-01 11:32:59.305279488,-17.601603,-3.617948,5.479564,forward_180
3,1970-01-01 11:32:59.322056704,-18.474983,-5.826523,7.613962,forward_180
4,1970-01-01 11:32:59.343028224,-16.907684,-7.776674,8.19781,forward_180


In [71]:
freq["forward_180"] = 0
for df in forward_180_df:
    f = sampling_freq(df)
    freq["forward_180"] += f
freq["forward_180"] /= len(forward_180_df)
freq

{'jump_fast': 51.828947368421055,
 'jump_slow': 51.85828877005348,
 'side_swing': 52.17316017316017,
 'cross_over': 52.37883116883117,
 'forward_180': 51.81666666666667}

# CLASSIFICATION

## shuffle - balance data

In [72]:
shape = np.min([jump_fast_raw.shape[0], jump_slow_raw.shape[0], side_swing_raw.shape[0], cross_over_raw.shape[0], forward_180_raw.shape[0]])
jump_fast_raw = shuffle(jump_fast_raw).head(shape).copy()
jump_slow_raw = shuffle(jump_slow_raw).head(shape).copy()
side_swing_raw = shuffle(side_swing_raw).head(shape).copy()
cross_over_raw = shuffle(cross_over_raw).head(shape).copy()
forward_180_raw = shuffle(forward_180_raw).head(shape).copy()
moves_df["jump_fast"] = jump_fast_raw
moves_df["jump_slow"] = jump_slow_raw
moves_df["side_swing"] = side_swing_raw
moves_df["cross_over"] = cross_over_raw
moves_df["forward_180"] = forward_180_raw
shape

2939

## merge activities

In [73]:
merged_raw = pd.concat([jump_fast_raw, jump_slow_raw, side_swing_raw, cross_over_raw, forward_180_raw], ignore_index=True) 
target = merged_raw['activity']
merged_raw

Unnamed: 0,time,x,y,z,activity
0,1970-01-01 01:23:49.004615680,2.957529,-6.472585,-15.227923,jump_fast
1,1970-01-01 04:38:17.816293376,0.203390,-10.884951,-4.730610,jump_fast
2,1970-01-01 04:38:31.342923776,-1.397408,-13.050455,-1.296910,jump_fast
3,1970-01-01 01:23:54.174488576,-18.415163,10.258031,13.246666,jump_fast
4,1970-01-01 04:38:30.688612352,-3.538985,-9.087940,4.082155,jump_fast
...,...,...,...,...,...
14690,1970-01-01 11:33:43.827816448,-14.761323,-9.643075,1.335195,forward_180
14691,1970-01-01 11:33:24.169113600,-0.334995,-5.161318,3.012564,forward_180
14692,1970-01-01 11:34:41.658880000,-0.346959,-6.393622,-4.577470,forward_180
14693,1970-01-01 11:33:01.234659328,-11.188839,-10.485349,-13.835300,forward_180


## labeling

In [74]:
#label activity
le = preprocessing.LabelEncoder()
le.fit(merged_raw['activity'])
merged_raw['activity'] = le.transform(merged_raw['activity'])
labels = le.inverse_transform([0,1,2,3,4])
labels

array(['cross_over', 'forward_180', 'jump_fast', 'jump_slow',
       'side_swing'], dtype=object)

## normalize

In [75]:
Normalizer = preprocessing.Normalizer().fit(merged_raw[['x', 'y', 'z']])
merged_raw[['x', 'y', 'z']] = Normalizer.transform(merged_raw[['x', 'y', 'z']])
merged_raw.head()

Unnamed: 0,time,x,y,z,activity
0,1970-01-01 01:23:49.004615680,0.175953,-0.385074,-0.905957,2
1,1970-01-01 04:38:17.816293376,0.017134,-0.916996,-0.398527,2
2,1970-01-01 04:38:31.342923776,-0.105953,-0.989497,-0.098333,2
3,1970-01-01 01:23:54.174488576,-0.739678,0.412032,0.532076,2
4,1970-01-01 04:38:30.688612352,-0.334733,-0.859578,0.386108,2


## segmentation

In [76]:
sampling_freq=sum(freq.values())/len(freq)
sampling_freq

52.01117882942651

In [77]:
frame_size = int(sampling_freq)*1
hop_size = int(frame_size/2)

In [82]:
X, y = get_frames(merged_raw.drop('time', axis=1), frame_size, hop_size)

NEW: [[[ 0.17595272  0.01713447 -0.10595273 ... -0.09579411 -0.18085531
   -0.23133068]
  [-0.38507447 -0.91699633 -0.98949718 ... -0.26323143 -0.59024087
   -0.16139351]
  [-0.90595711 -0.39852747 -0.09833288 ... -0.95996495 -0.78670647
   -0.95939473]]

 [[ 0.33548329 -0.29559177 -0.88797062 ... -0.025908    0.24396918
   -0.10167381]
  [-0.78824931  0.75839659 -0.13396945 ... -0.9863278  -0.95535279
   -0.90996422]
  [ 0.51586237 -0.58091317 -0.43995495 ...  0.16274593  0.1666736
   -0.40202929]]

 [[ 0.35662732 -0.33774278 -0.36505852 ... -0.79662987  0.40995952
   -0.22927937]
  [-0.55591272 -0.89533697 -0.86882472 ... -0.58112446 -0.69926768
   -0.18540713]
  [-0.75085152 -0.29034726  0.33447852 ...  0.16635869 -0.58562607
   -0.9555392 ]]

 ...

 [[-0.51974997 -0.68544614 -0.59288665 ... -0.80309935 -0.73744697
    0.50195903]
  [-0.80642824 -0.72575849 -0.80402756 ... -0.36875204 -0.67470408
   -0.85812073]
  [-0.28201679 -0.05863616 -0.0450011  ... -0.46803138  0.0307632
   -0

## neural network

In [83]:
X.shape, y.shape

((564, 3, 52), (564,))

In [84]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0, stratify = y)

In [85]:
X_train.shape, X_test.shape

((451, 3, 52), (113, 3, 52))

In [86]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], X_test.shape[2], 1)

In [87]:
X_train[0].shape, X_test[0].shape

((3, 52, 1), (3, 52, 1))

In [88]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.layers import Conv2D, MaxPool2D
from tensorflow.keras.optimizers import Adam

In [89]:
model = Sequential()
model.add(Conv2D(16, (3, 3), activation = 'relu', input_shape = X_train[0].shape))
model.add(BatchNormalization())
model.add(Dropout(0.1))

model.add(Conv2D(32, (1, 1), activation='relu'))
model.add(Dropout(0.2))

model.add(Flatten())

model.add(Dense(64, activation = 'relu'))
model.add(Dropout(0.5))

model.add(Dense(7, activation='softmax'))

In [90]:
model.compile(optimizer=Adam(learning_rate = 0.001), loss = 'sparse_categorical_crossentropy', metrics = ['accuracy'])

In [91]:
epo = 30
history = model.fit(X_train, y_train, epochs = epo, batch_size=64, validation_data= (X_test, y_test), verbose=1, shuffle=True) 

Train on 451 samples, validate on 113 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [92]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_2 (Conv2D)            (None, 1, 50, 16)         160       
_________________________________________________________________
batch_normalization_1 (Batch (None, 1, 50, 16)         64        
_________________________________________________________________
dropout_3 (Dropout)          (None, 1, 50, 16)         0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 1, 50, 32)         544       
_________________________________________________________________
dropout_4 (Dropout)          (None, 1, 50, 32)         0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 1600)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 64)               

In [93]:
def plot_learningCurve(history, epochs):
  # Plot training & validation accuracy values
  epoch_range = range(1, epochs+1)
  plt.plot(epoch_range, history.history['accuracy'])
  plt.plot(epoch_range, history.history['val_accuracy'])
  plt.title('Model accuracy')
  plt.ylabel('Accuracy')
  plt.xlabel('Epoch')
  plt.legend(['Train', 'Val'], loc='upper left')
  plt.show()

  # Plot training & validation loss values
  plt.plot(epoch_range, history.history['loss'])
  plt.plot(epoch_range, history.history['val_loss'])
  plt.title('Model loss')
  plt.ylabel('Loss')
  plt.xlabel('Epoch')
  plt.legend(['Train', 'Val'], loc='upper left')
  plt.show()

In [94]:
plt.figure()
plot_learningCurve(history, epo)

<IPython.core.display.Javascript object>

In [95]:
y_pred = model.predict_classes(X_test)
print(X_test.shape)
y_pred

(113, 3, 52, 1)


array([4, 1, 1, 3, 1, 4, 1, 3, 4, 4, 3, 3, 1, 1, 0, 4, 2, 4, 1, 2, 0, 3,
       0, 0, 0, 1, 4, 1, 1, 1, 4, 1, 4, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 3, 1, 1, 1, 4, 1, 1, 3, 0, 3, 1, 0, 1, 1, 3, 3,
       4, 4, 4, 1, 0, 2, 1, 1, 0, 0, 1, 2, 3, 0, 4, 1, 4, 3, 4, 4, 1, 0,
       1, 4, 4, 1, 1, 3, 1, 4, 1, 1, 0, 3, 1, 4, 4, 1, 0, 4, 2, 1, 3, 0,
       3, 0, 0], dtype=int64)

In [96]:
mat = confusion_matrix(y_test, y_pred)
results = model.evaluate(X_test, y_test, batch_size=128)
print('test loss, test acc:', results)
heatmap(mat, labels)
print(results)

test loss, test acc: [0.6431986689567566, 0.76106197]


<IPython.core.display.Javascript object>

[0.6431986689567566, 0.76106197]


## save model

In [97]:
model.save('rope_skipping_simple_cnn') 

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
INFO:tensorflow:Assets written to: rope_skipping_simple_cnn\assets


In [98]:
converter = tf.lite.TFLiteConverter.from_saved_model("rope_skipping_simple_cnn")
tflite_model = converter.convert()
open("converted_model.tflite", "wb").write(tflite_model)

417688