In [0]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import json

# Data

In [2]:
# get the data, a.json are "good" posture, b.json are "bad" postures

# !curl -F "file=@something.ext" https://file.io
!curl -o a.json https://raw.githubusercontent.com/aunz/ds-upright/master/data/a.json
!curl -o b.json https://raw.githubusercontent.com/aunz/ds-upright/master/data/b.json

!ls -lah

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 13.5M  100 13.5M    0     0  49.3M      0 --:--:-- --:--:-- --:--:-- 49.1M
  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
100 13.5M  100 13.5M    0     0  29.2M      0 --:--:-- --:--:-- --:--:-- 29.1M
total 28M
drwxr-xr-x 1 root root 4.0K Jan 28 19:45 .
drwxr-xr-x 1 root root 4.0K Jan 28 19:44 ..
-rw-r--r-- 1 root root  14M Jan 28 19:54 a.json
-rw-r--r-- 1 root root  14M Jan 28 19:54 b.json
drwxr-xr-x 1 root root 4.0K Jan  8 17:14 .config
drwxr-xr-x 1 root root 4.0K Jan  8 17:15 sample_data


In [4]:
# func to load json, extract x, y, concatenate them, turn into np.array, normalise /480
def tmp(f):
    tmp = json.load(f)
    tmp_x = np.array([[j['position']['x'] for j in i['keypoints']] for i in tmp])
    tmp_x = tmp_x.clip(0, 480)
    tmp_x2 = 480 - tmp_x # create mirror
    tmp_y = np.array([[j['position']['y'] for j in i['keypoints']] for i in tmp])
    tmp_y = tmp_y.clip(0, 270)
    tmp_name = np.array([i['name'] for i in tmp]) # a1 001, a1 002 etc
    tmp_name2 = np.array([i['name'] + ' m ' for i in tmp]) # mirror
    tmp1 = np.concatenate((tmp_x, tmp_y), axis=1) / 480
    tmp2 = np.concatenate((tmp_x2, tmp_y), axis=1) / 480
    return np.concatenate((tmp1, tmp2)), np.concatenate((tmp_name, tmp_name2))
    

with open('a.json', 'r') as f: a, a_name = tmp(f)
with open('b.json', 'r') as f: b, b_name = tmp(f)
    
# combining a, b to a dataset

X = np.concatenate((a, b))
y = np.append(np.zeros(len(a)), np.ones(len(b))) # 0: a, 1: b

ab_name = np.concatenate((a_name, b_name)) 

print(a.shape, b.shape, X.shape, y.shape, ab_name.shape)

del tmp, a, b, a_name, b_name

(20950, 34) (20914, 34) (41864, 34) (41864,) (41864,)


In [6]:
# split into 60% train, 20% val, 20% test
np.random.seed(0)

tmp = np.random.permutation(len(X))
tmp_train = tmp[:round(len(tmp) * 0.6)]
tmp_val = tmp[round(len(tmp) * 0.6):round(len(tmp) * 0.8)]
tmp_test = tmp[round(len(tmp) * 0.8):]

X_train, y_train = X[tmp_train], y[tmp_train]
X_val, y_val = X[tmp_val], y[tmp_val]
X_test, y_test = X[tmp_test], y[tmp_test]

ab_name_train = ab_name[tmp_train]
ab_name_val = ab_name[tmp_val]
ab_name_test = ab_name[tmp_test]

print('Train', X_train.shape, y_train.shape, y_train.mean())
print('Val', X_val.shape, y_val.shape, y_val.mean())
print('Test', X_test.shape, y_test.shape, y_test.mean())

del tmp, tmp_train, tmp_val, tmp_test

Train (25118, 34) (25118,) 0.5019507922605303
Val (8373, 34) (8373,) 0.4951630240057327
Test (8373, 34) (8373,) 0.4968350650901708


# Training

## Regression

In [0]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report

In [6]:
from sklearn.linear_model import LogisticRegression

model = LogisticRegression(random_state=0, solver='lbfgs', multi_class='auto', max_iter=1000).fit(X_train, y_train)

print(model.score(X_train, y_train))
print(model.score(X_val, y_val))
print(model.score(X_test, y_test))

# confusion_matrix(y_val, model.predict(X_val))

print(classification_report(y_val, model.predict(X_val)))

print(model.intercept_, model.coef_)


0.9371765267935345
0.9390899319240416
0.9361041442732593
              precision    recall  f1-score   support

         0.0       0.91      0.97      0.94      4227
         1.0       0.97      0.90      0.94      4146

   micro avg       0.94      0.94      0.94      8373
   macro avg       0.94      0.94      0.94      8373
weighted avg       0.94      0.94      0.94      8373

[3.00705902] [[ 3.29048868e-01  1.28142872e+00 -9.11747294e-02  3.27218577e-01
   8.95672260e-01 -2.53840460e+00 -1.15891015e-01 -2.73165027e-01
  -3.54217601e-01 -1.63730908e-01  1.37846257e-01  1.08912929e+00
   3.58490368e-01  4.53682081e-01 -1.78663676e-03 -3.16846355e-01
  -2.33700309e-01  2.43935817e+01  2.01671057e+01  1.91913009e+01
   2.23131176e+01  8.38295185e+00 -8.00448108e+00 -6.71054376e+00
  -8.68249393e+00 -3.72614547e+00  2.42710776e+00  9.11797004e+00
  -1.87704717e+01 -1.72799439e+01 -9.01745569e+00 -1.14341964e+01
   2.58584997e+00  1.24522597e+00]]


In [7]:
# can use these coef_ and intercept_ for javascript

def predict(x):
    tmp = (x * model.coef_).sum() + model.intercept_
    tmp = 1 / (1 + np.exp(-tmp))
    return tmp

for i in range(0, 20):
    print(ab_name_val[i], predict(X_val[i]), model.predict_proba(X_val)[i][1])

model.coef_[0].shape

a7 020 [0.08539954] 0.0853995412193786
a1 540 [0.22460002] 0.22460002289268655
b4 813 m  [0.02802288] 0.028022881694207153
b10 816 [0.98933965] 0.989339648353709
a6 431 m  [0.39762508] 0.3976250772342188
b10 083 m  [0.85136408] 0.8513640820851378
b9 0042 [0.84968416] 0.8496841608975633
a5 836 [0.29135142] 0.2913514219928503
b6 138 m  [0.96556088] 0.9655608760193223
a2 360 [0.12825749] 0.12825748620843244
a4 372 m  [0.27582101] 0.2758210104866193
a4 041 [0.17465062] 0.17465061636159862
b3 094 m  [0.99176588] 0.9917658777877499
a6 246 [0.22968684] 0.22968683967131415
b11 0041 m  [0.52913162] 0.5291316189009646
b8 084 [0.92240829] 0.9224082900787459
a5 377 m  [0.1957198] 0.1957198043772266
a2 295 [0.13467896] 0.13467896059249196
a7 075 [0.08684284] 0.08684284046133575
b8 896 m  [0.97357451] 0.9735745137178458


(34,)

## KNN

In [8]:
%%time

from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier(n_neighbors=5).fit(X_train, y_train)

print(model.score(X_train, y_train))
print(model.score(X_val, y_val))
print(model.score(X_test, y_test))

# confusion_matrix(y_val, model.predict(X_val))

print(classification_report(y_val, model.predict(X_val)))

0.9991639461740585
0.9989251164457184
0.9990445479517497
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00      4227
         1.0       1.00      1.00      1.00      4146

   micro avg       1.00      1.00      1.00      8373
   macro avg       1.00      1.00      1.00      8373
weighted avg       1.00      1.00      1.00      8373

CPU times: user 5.03 s, sys: 0 ns, total: 5.03 s
Wall time: 5.03 s


## Basic neural network

In [8]:
from keras.models import Sequential
from keras.layers import Dense, Activation, Conv2D, SeparableConv2D, MaxPooling2D, AveragePooling2D, GlobalMaxPooling2D, BatchNormalization, Flatten, Dropout, InputLayer
from keras.optimizers import Adam, Adamax, RMSprop
from keras.callbacks import EarlyStopping, ReduceLROnPlateau
from keras.preprocessing.image import ImageDataGenerator

Using TensorFlow backend.


In [0]:
def train(make_model, n = 5, optimizer = lambda: 'rmsprop', callbacks = lambda: [EarlyStopping(patience=5, verbose=1)], verbose=0):
    # given a model, train it for n times and plot the associated metrics
    # make_model, optimizer and callbacks should be provided as a function as each time the functions are called, brand new instances are created in the for loop below. Use this because can't use deepcopy

    models = [] # to hold the model weights
    hists = [] # contains all the history
    
    make_model(None).summary()
    
    plt.figure(figsize=(4 * (n + 2), 8)) # the figure

    for i in range(n):
        model = make_model(i)
        model.compile(loss='binary_crossentropy', optimizer=optimizer(), metrics=['accuracy'])
        hist = model.fit(X_train, y_train, batch_size=256, epochs=100, validation_data=(X_val, y_val), callbacks=callbacks(), verbose=verbose)        
        hists.append(hist)
        
        models.append(model) # store the model
#         model.set_weights(initial_weights) # restore to the original weights

        r = range(2, len(hist.history['acc']) + 1) # starting from epoch 2, ignore the first epoch
        plt.subplot(2, n + 1, i + 2) # plot the loss history, starting with subplot 3
        plt.plot(r, hist.history['loss'][1:], '.-', label='Train loss') # ignore the first epoch
        plt.plot(r, hist.history['val_loss'][1:], '.-', label='Val loss')
        plt.legend()
        
        plt.subplot(2, n + 1, i + 2 + n + 1) # plot the acc history, starting with subplot 3
        plt.plot(r, hist.history['acc'][1:], '.-', label='Train acc')
        plt.plot(r, hist.history['val_acc'][1:], '.-', label='Val acc')
        plt.legend()

    
    plt.subplot(2, n + 1, 1) # plot the loss summary at the first subplot
    metrics = ['loss'] * n + ['val_loss'] * n 
    values = np.concatenate([
        [i.history['loss'][-1] for i in hists],
        [i.history['val_loss'][-1] for i in hists],
    ])
    plt.plot(metrics, values, '.')
    plt.ylabel('Loss')
    values = values.reshape(2, -1)
    print('Loss', *values)
    print('Mean', values.mean(1), 'Std', values.std(1))
    
    plt.subplot(2, n + 1, n + 2) # plot the acc summary at the second subplot
    metrics = ['acc'] * n + ['val_acc'] * n
    values = np.concatenate([
        [i.history['acc'][-1] for i in hists],
        [i.history['val_acc'][-1] for i in hists]
    ])
    plt.plot(metrics, values, '.')
    plt.ylabel('Accuracy')
    values = values.reshape(2, -1)
    print('\nAcc', *values)
    print('Mean', values.mean(1), 'Std', values.std(1))

    plt.tight_layout()
    
    return models, hists


In [0]:
%%time

# just 1 hidden layer

callbacks = lambda: [
    ReduceLROnPlateau(patience=3, verbose=1, factor=0.5, min_lr=1e-5),
    EarlyStopping(patience=5, verbose=1)
]

_ = train(lambda x: Sequential([
    Dense(64, input_shape=(X.shape[1],), activation='relu'),
    Dense(1, activation='sigmoid')
]), callbacks=callbacks, verbose=1)

In [0]:
%%time

# 3 hidden layers
callbacks = lambda: [
    ReduceLROnPlateau(patience=3, verbose=1, factor=0.5, min_lr=1e-5),
    EarlyStopping(patience=5, verbose=0)
]

_ = train(lambda x: Sequential([
    Dense(64, input_shape=(X.shape[1],), activation='relu'),
    Dense(64, activation='relu'),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
]), callbacks=callbacks, verbose=0)

## Convolution network

2

In [13]:
# del generate_img

subprocess b'12G'
length 13107


In [0]:
# convert the data to 2D

batch_size = 128 # can't use 256, memory crash in colab

def generate_img(features, targets, multiplier = 480):
    while 1:
        yield_features = []
        yield_targets = []
        for feature, target in zip(np.int32(features * 480).clip(0, 479), targets):
            tmp = np.zeros((480, 270, 1)) # pic of 480 * 270
            for x, y in zip(feature[:17], feature[17:]): tmp[x, y] = [1]
            yield_features.append(tmp)
            yield_targets.append(target)
    #         print('subprocess', subprocess.check_output(['free', '-h']).split()[7])
    #         print('getsizeof', sys.getsizeof(tmp) // 1048576, 'length', len(yield_features))
            if (len(yield_features) == batch_size):
                yield np.array(yield_features), np.array(yield_targets)
                yield_features = []
                yield_targets = []

model = Sequential()
model.add(Conv2D(32, kernel_size=3, strides=1, activation='relu', input_shape=(480, 270, 1)))
model.add(Flatten())
model.add(Dense(1, activation='sigmoid'))

model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
model.summary()
model.fit_generator(
    generate_img(X_train, y_train),
    steps_per_epoch=len(X_train) // batch_size,
    epochs=10,
    validation_data=generate_img(X_val, y_val),
    validation_steps=len(X_val) // batch_size,
    verbose=1
)


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_14 (Conv2D)           (None, 478, 268, 32)      320       
_________________________________________________________________
flatten_14 (Flatten)         (None, 4099328)           0         
_________________________________________________________________
dense_14 (Dense)             (None, 1)                 4099329   
Total params: 4,099,649
Trainable params: 4,099,649
Non-trainable params: 0
_________________________________________________________________
Epoch 1/10
Epoch 2/10
Epoch 3/10
 26/196 [==>...........................] - ETA: 1:18 - loss: 0.0030 - acc: 0.9991

0

In [0]:
def train2(make_model, n = 5, optimizer = lambda: 'rmsprop', callbacks = lambda: [EarlyStopping(patience=5, verbose=1)], verbose=0):
    # given a model, train it for n times and plot the associated metrics
    # make_model, optimizer and callbacks should be provided as a function as each time the functions are called, brand new instances are created in the for loop below. Use this because can't use deepcopy

    models = [] # to hold the model weights
    hists = [] # contains all the history
    
    make_model(None).summary()
    
    plt.figure(figsize=(4 * (n + 2), 8)) # the figure

    for i in range(n):
        model = make_model(i)
        model.compile(loss='binary_crossentropy', optimizer=optimizer(), metrics=['accuracy'])
        hist = model.fit(X_train2, y_train2, batch_size=256, epochs=100, validation_data=(X_val2, y_val2), callbacks=callbacks(), verbose=verbose)        
        hists.append(hist)
        
        models.append(model) # store the model
#         model.set_weights(initial_weights) # restore to the original weights

        r = range(2, len(hist.history['acc']) + 1) # starting from epoch 2, ignore the first epoch
        plt.subplot(2, n + 1, i + 2) # plot the loss history, starting with subplot 3
        plt.plot(r, hist.history['loss'][1:], '.-', label='Train loss') # ignore the first epoch
        plt.plot(r, hist.history['val_loss'][1:], '.-', label='Val loss')
        plt.legend()
        
        plt.subplot(2, n + 1, i + 2 + n + 1) # plot the acc history, starting with subplot 3
        plt.plot(r, hist.history['acc'][1:], '.-', label='Train acc')
        plt.plot(r, hist.history['val_acc'][1:], '.-', label='Val acc')
        plt.legend()

    
    plt.subplot(2, n + 1, 1) # plot the loss summary at the first subplot
    metrics = ['loss'] * n + ['val_loss'] * n 
    values = np.concatenate([
        [i.history['loss'][-1] for i in hists],
        [i.history['val_loss'][-1] for i in hists],
    ])
    plt.plot(metrics, values, '.')
    plt.ylabel('Loss')
    values = values.reshape(2, -1)
    print('Loss', *values)
    print('Mean', values.mean(1), 'Std', values.std(1))
    
    plt.subplot(2, n + 1, n + 2) # plot the acc summary at the second subplot
    metrics = ['acc'] * n + ['val_acc'] * n
    values = np.concatenate([
        [i.history['acc'][-1] for i in hists],
        [i.history['val_acc'][-1] for i in hists]
    ])
    plt.plot(metrics, values, '.')
    plt.ylabel('Accuracy')
    values = values.reshape(2, -1)
    print('\nAcc', *values)
    print('Mean', values.mean(1), 'Std', values.std(1))

    plt.tight_layout()
    
    return models, hists


In [0]:
%%time
_ = train(lambda x: Sequential([
    Conv2D(32, kernel_size=3, strides=1, activation='relu', input_shape=(480, 270, 1)),
    Flatten(),
    Dense(1, activation='sigmoid')    
]))

<480x270 sparse matrix of type '<class 'numpy.float64'>'
	with 17 stored elements in Compressed Sparse Row format>