In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import json

# Data

In [3]:
# get the data, a.json are "good" posture, b.json are "bad" postures

# !curl -F "file=@something.ext" https://file.io
# !curl -o a.json https://file.io/lhlVRw
# !curl -o b.json https://file.io/LTxKCc

!ls -lah

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 13.5M    0 13.5M    0     0  17.9M      0 --:--:-- --:--:-- --:--:-- 17.8M


In [2]:
# func to load json, extract x, y, concatenate them, turn into np.array, normalise /480
def tmp(f):
    tmp = json.load(f)
    tmp_x = np.array([[j['position']['x'] for j in i['keypoints']] for i in tmp])
    tmp_y = np.array([[j['position']['y'] for j in i['keypoints']] for i in tmp])
    tmp_name = np.array([i['name'] for i in tmp])
    return np.concatenate((tmp_x, tmp_y), axis=1) / 480, tmp_name
    

with open('a.json', 'r') as f: a, a_name = tmp(f)
with open('b.json', 'r') as f: b, b_name = tmp(f)
    
# combining a, b to a dataset

X = np.concatenate((a, b))
y = np.append(np.zeros(len(a)), np.ones(len(b))) # 0: a, 1: b

ab_name = np.concatenate((a_name, b_name)) 

print(a.shape, b.shape, X.shape, y.shape, ab_name.shape)


del tmp, a, b, a_name, b_name, 

(10475, 34) (10457, 34) (20932, 34) (20932,) (20932,)


In [3]:
# split into train/val/test
np.random.seed(0)

# 60% train, 20% validation, 20% test
tmp = np.random.permutation(len(X))
tmp_train = tmp[:round(len(tmp) * 0.6)]
tmp_val = tmp[round(len(tmp) * 0.6):round(len(tmp) * 0.8)]
tmp_test = tmp[round(len(tmp) * 0.8):]

X_train, y_train = X[tmp_train], y[tmp_train]
X_val, y_val = X[tmp_val], y[tmp_val]
X_test, y_test = X[tmp_test], y[tmp_test]

ab_name_train = ab_name[tmp_train]
ab_name_val = X[tmp_val]
ab_name_test = X[tmp_test]

print('Train', X_train.shape, y_train.shape, y_train.sum())
print('Val', X_val.shape, y_val.shape, y_val.sum())
print('Test', X_test.shape, y_test.shape, y_test.sum())


del tmp, tmp_train, tmp_val, tmp_test

Train (12559, 34) (12559,) 6302.0
Val (4187, 34) (4187,) 2090.0
Test (4186, 34) (4186,) 2065.0


# Training

## Regression

In [0]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [57]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(random_state=0, solver='lbfgs', multi_class='auto').fit(X_train, y_train)

print(model.score(X_train, y_train))
print(model.score(X_val, y_val))
print(model.score(X_test, y_test))

# confusion_matrix(y_val, model.predict(X_val))

print(classification_report(y_val, model.predict(X_val)))

print(model.intercept_, model.coef_)


0.9793773389601083
0.9818485789347982
0.9835164835164835
              precision    recall  f1-score   support

         0.0       0.98      0.98      0.98      2097
         1.0       0.98      0.98      0.98      2090

   micro avg       0.98      0.98      0.98      4187
   macro avg       0.98      0.98      0.98      4187
weighted avg       0.98      0.98      0.98      4187

[17.24016087] [[-11.59297432 -10.48313072 -11.24994747 -11.06962914  -8.26816833
   -6.85886515  -4.83989711  -0.72550568  -2.80627788  -1.84612645
   -4.40763068   9.69464209   4.04290725   6.51791598   5.20727441
    1.36463445   2.36079032   8.79429058   7.34754462   7.04710324
    6.00282833   4.35750347   3.0650297    3.49635641   2.20437731
    1.81695029   1.90782238   2.83214111   0.07692417  -0.96839343
   -3.14235201  -3.8216309   -0.91195957  -1.31549044]]


In [66]:
# can use these coef_ and intercept_ for javascript

def predict(x):
    tmp = (x * model.coef_).sum() + model.intercept_
    tmp = 1 / (1 + np.exp(-tmp))
    return tmp

for i in range(1, 10):
    print(predict(X_val[i]), model.predict_proba(X_val)[i][1])
    

[0.98000469] 0.9800046868259261
[0.9973319] 0.9973319041085145
[0.99948657] 0.9994865731088585
[0.9205517] 0.9205517045703779
[0.76994517] 0.769945172288728
[0.00067037] 0.0006703694322360918
[0.86025183] 0.8602518348713658
[0.81909494] 0.8190949387801347
[0.00043184] 0.0004318431132530536


array([[-11.59297432, -10.48313072, -11.24994747, -11.06962914,
         -8.26816833,  -6.85886515,  -4.83989711,  -0.72550568,
         -2.80627788,  -1.84612645,  -4.40763068,   9.69464209,
          4.04290725,   6.51791598,   5.20727441,   1.36463445,
          2.36079032,   8.79429058,   7.34754462,   7.04710324,
          6.00282833,   4.35750347,   3.0650297 ,   3.49635641,
          2.20437731,   1.81695029,   1.90782238,   2.83214111,
          0.07692417,  -0.96839343,  -3.14235201,  -3.8216309 ,
         -0.91195957,  -1.31549044]])

## KNN

In [125]:
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=5).fit(X_train, y_train)

print(model.score(X_train, y_train))
print(model.score(X_val, y_val))
print(model.score(X_test, y_test))

# confusion_matrix(y_val, model.predict(X_val))

print(classification_report(y_val, model.predict(X_val)))

0.9988852615654112
0.9980893240983998
0.9985666507405638
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      2097
         1.0       1.00      1.00      1.00      2090

   micro avg       1.00      1.00      1.00      4187
   macro avg       1.00      1.00      1.00      4187
weighted avg       1.00      1.00      1.00      4187



## Basic neural network

In [11]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Conv2D, SeparableConv2D, MaxPooling2D, AveragePooling2D, GlobalMaxPooling2D, BatchNormalization, Flatten, Dropout, InputLayer
from keras.optimizers import Adam, Adamax, RMSprop
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


In [0]:
def train(make_model, n = 5, optimizer = lambda: 'rmsprop', callbacks = lambda: [EarlyStopping(patience=5, verbose=1)], verbose=0):
    # given a model, train it for n times and plot the associated metrics
    # make_model, optimizer and callbacks should be provided as a function as each time the functions are called, brand new instances are created in the for loop below. Use this because can't use deepcopy

    models = [] # to hold the model weights
    hists = [] # contains all the history
    
    make_model(None).summary()
    
    plt.figure(figsize=(4 * (n + 2), 8)) # the figure

    for i in range(n):
        model = make_model(i)
        model.compile(loss='binary_crossentropy', optimizer=optimizer(), metrics=['accuracy'])
        hist = model.fit(X_train, y_train, batch_size=256, epochs=100, validation_data=(X_val, y_val), callbacks=callbacks(), verbose=verbose)        
        hists.append(hist)
        
        models.append(model) # store the model
#         model.set_weights(initial_weights) # restore to the original weights

        r = range(2, len(hist.history['acc']) + 1) # starting from epoch 2, ignore the first epoch
        plt.subplot(2, n + 1, i + 2) # plot the loss history, starting with subplot 3
        plt.plot(r, hist.history['loss'][1:], '.-', label='Train loss') # ignore the first epoch
        plt.plot(r, hist.history['val_loss'][1:], '.-', label='Val loss')
        plt.legend()
        
        plt.subplot(2, n + 1, i + 2 + n + 1) # plot the acc history, starting with subplot 3
        plt.plot(r, hist.history['acc'][1:], '.-', label='Train acc')
        plt.plot(r, hist.history['val_acc'][1:], '.-', label='Val acc')
        plt.legend()

    
    plt.subplot(2, n + 1, 1) # plot the loss summary at the first subplot
    metrics = ['loss'] * n + ['val_loss'] * n 
    values = np.concatenate([
        [i.history['loss'][-1] for i in hists],
        [i.history['val_loss'][-1] for i in hists],
    ])
    plt.plot(metrics, values, '.')
    plt.ylabel('Loss')
    values = values.reshape(2, -1)
    print('Loss', *values)
    print('Mean', values.mean(1), 'Std', values.std(1))
    
    plt.subplot(2, n + 1, n + 2) # plot the acc summary at the second subplot
    metrics = ['acc'] * n + ['val_acc'] * n
    values = np.concatenate([
        [i.history['acc'][-1] for i in hists],
        [i.history['val_acc'][-1] for i in hists]
    ])
    plt.plot(metrics, values, '.')
    plt.ylabel('Accuracy')
    values = values.reshape(2, -1)
    print('\nAcc', *values)
    print('Mean', values.mean(1), 'Std', values.std(1))

    plt.tight_layout()
    
    return models, hists


In [0]:
%%time

callbacks = lambda: [
    ReduceLROnPlateau(patience=3, verbose=1, factor=0.5, min_lr=1e-5),
    EarlyStopping(patience=5, verbose=1)
]

_ = train(lambda x: Sequential([
    Dense(64, input_shape=(X.shape[1],), activation='relu'),
    Dense(1, activation='sigmoid')
]), callbacks=callbacks, verbose=1)

In [0]:
%%time

callbacks = lambda: [
    ReduceLROnPlateau(patience=3, verbose=1, factor=0.5, min_lr=1e-5),
    EarlyStopping(patience=5, verbose=0)
]

_ = train(lambda x: Sequential([
    Dense(64, input_shape=(X.shape[1],), activation='relu'),
    Dense(64, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
]), callbacks=callbacks, verbose=0)

## Convolution network

In [49]:
# convert the data to 2D

def generate_img(features, target, multiplier = 480):
    for x, y in zip(features, target):
        tmp = np.zeros((480, 270, 1)) # pic of 480 x 270
        i = int(len(features) / 2)
        x = np.int64(features[:i] * multiplier)
        y = np.int64(np.min(features[i:] * multiplier, 269))
        print(y)
        for i, j in zip(x, y):
            tmp[i, j, 0] = 1
            yield (tmp, y)
            
model = Sequential()
model.add(Conv2D(32, kernel_size=3, strides=1, activation='relu', input_shape=(480, 270, 1)))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
model.fit_generator(
    generate_img(X_train, y_train),
#     steps_per_epoch=256,
    steps_per_epoch=len(X_train),
    epochs=100,
    validation_data=generate_img(X_val, y_val),
    validation_steps=len(X_val),
    verbose=1
)


Epoch 1/100


AxisError: ignored

In [0]:
def train2(make_model, n = 5, optimizer = lambda: 'rmsprop', callbacks = lambda: [EarlyStopping(patience=5, verbose=1)], verbose=0):
    # given a model, train it for n times and plot the associated metrics
    # make_model, optimizer and callbacks should be provided as a function as each time the functions are called, brand new instances are created in the for loop below. Use this because can't use deepcopy

    models = [] # to hold the model weights
    hists = [] # contains all the history
    
    make_model(None).summary()
    
    plt.figure(figsize=(4 * (n + 2), 8)) # the figure

    for i in range(n):
        model = make_model(i)
        model.compile(loss='binary_crossentropy', optimizer=optimizer(), metrics=['accuracy'])
        hist = model.fit(X_train2, y_train2, batch_size=256, epochs=100, validation_data=(X_val2, y_val2), callbacks=callbacks(), verbose=verbose)        
        hists.append(hist)
        
        models.append(model) # store the model
#         model.set_weights(initial_weights) # restore to the original weights

        r = range(2, len(hist.history['acc']) + 1) # starting from epoch 2, ignore the first epoch
        plt.subplot(2, n + 1, i + 2) # plot the loss history, starting with subplot 3
        plt.plot(r, hist.history['loss'][1:], '.-', label='Train loss') # ignore the first epoch
        plt.plot(r, hist.history['val_loss'][1:], '.-', label='Val loss')
        plt.legend()
        
        plt.subplot(2, n + 1, i + 2 + n + 1) # plot the acc history, starting with subplot 3
        plt.plot(r, hist.history['acc'][1:], '.-', label='Train acc')
        plt.plot(r, hist.history['val_acc'][1:], '.-', label='Val acc')
        plt.legend()

    
    plt.subplot(2, n + 1, 1) # plot the loss summary at the first subplot
    metrics = ['loss'] * n + ['val_loss'] * n 
    values = np.concatenate([
        [i.history['loss'][-1] for i in hists],
        [i.history['val_loss'][-1] for i in hists],
    ])
    plt.plot(metrics, values, '.')
    plt.ylabel('Loss')
    values = values.reshape(2, -1)
    print('Loss', *values)
    print('Mean', values.mean(1), 'Std', values.std(1))
    
    plt.subplot(2, n + 1, n + 2) # plot the acc summary at the second subplot
    metrics = ['acc'] * n + ['val_acc'] * n
    values = np.concatenate([
        [i.history['acc'][-1] for i in hists],
        [i.history['val_acc'][-1] for i in hists]
    ])
    plt.plot(metrics, values, '.')
    plt.ylabel('Accuracy')
    values = values.reshape(2, -1)
    print('\nAcc', *values)
    print('Mean', values.mean(1), 'Std', values.std(1))

    plt.tight_layout()
    
    return models, hists


In [0]:
%%time
_ = train(lambda x: Sequential([
    Conv2D(32, kernel_size=3, strides=1, activation='relu', input_shape=(480, 270, 1)),
    Flatten(),
    Dense(1, activation='sigmoid')    
]))

<480x270 sparse matrix of type '<class 'numpy.float64'>'
	with 17 stored elements in Compressed Sparse Row format>