## Model for Drone Steering

In [24]:
from sklearn.base import BaseEstimator, TransformerMixin
#from category_encoders.one_hot import OneHotEncoder
#from sklearn.feature_extraction import DictVectorizer
from sklearn.pipeline import make_pipeline, make_union
from sklearn.preprocessing import Imputer
#from category_encoders.ordinal import OrdinalEncoder

from __future__ import print_function
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasClassifier
from keras.optimizers import SGD
from keras.constraints import maxnorm

import matplotlib.pyplot as plt 
import pandas as pd
import numpy as np
import seaborn as sns

from pandas.api.types import is_numeric_dtype

import warnings
warnings.filterwarnings("ignore")
import ipytest.magics
import pytest
# set the file name (required)
__file__ = 'drone_pos_model.ipynb'

In [2]:
class Shuffler(BaseEstimator, TransformerMixin):
    
    def __init__(self):
        pass
        
    def fit(self, x, y = None):
        return self
    
    def transform(self, x): #x is df
        x=x.loc[np.random.permutation(x.index)]
        
        return x
############################################################################################
class XCentralizer(BaseEstimator, TransformerMixin):
    
    def __init__(self, x_columns):
        self.x_columns = x_columns
        
    def fit(self, x, y = None):
        return self
    
    def transform(self, x): #x is df
        shift=x[["rightShoulder_x","leftShoulder_x","leftHip_x","rightHip_x"]].sum(axis=1)/4
        for col in self.x_columns:
            x[col] = x[col] - shift
        return x
############################################################################################
    
class YCentralizer(BaseEstimator, TransformerMixin):
    
    def __init__(self, y_columns):
        self.y_columns = y_columns
        
    def fit(self, x, y = None):
        return self
    
    def transform(self, x): #x is df
        shift=x[["rightShoulder_y","leftShoulder_y","leftHip_y","rightHip_y"]].sum(axis=1)/4
        for col in list(set(self.y_columns)-set(["label"])):
            x[col] = x[col] - shift
        return x
############################################################################################

class YScaler(BaseEstimator, TransformerMixin):
    
    def __init__(self):
        pass
        
    def fit(self, x, y = None):
        return self
    
    def transform(self, x): #x is df
        shoulder_y = x[["rightShoulder_y","leftShoulder_y"]].sum(axis=1)/2
        hip_y = x[["leftHip_y","rightHip_y"]].sum(axis=1)/2
        y_dist = hip_y - shoulder_y
        
        for col in list(set(x.columns)-set(["label"])):
            x[col] /= y_dist
        return x

### Inspect Train Data

In [3]:
!pwd

/Users/lsafari/drone_steering/models


In [4]:
#df = pd.read_csv("video_001.csv", delimiter=',')
# Christian's video is less noisy. Therefore I only train the model with his data at the moment. 
# acc increased 5 % taking his video camparing to all videos.
#path = "all_videos_posture_steptime50_checksum8160"
#path = "video_Christian_posture_steptime50_checksum8160"
path = "video_all_posture_steptime50_checksum8160"
df = pd.read_csv("/Users/lsafari/drone_steering/data/" + path + ".csv",low_memory=False)
#df=df.drop([5557], axis=0)
#type(df.leftShoulder_x)
#df.info()
df.head(3)


Unnamed: 0,leftShoulder_x,leftShoulder_y,rightShoulder_x,rightShoulder_y,leftElbow_x,leftElbow_y,rightElbow_x,rightElbow_y,leftWrist_x,leftWrist_y,rightWrist_x,rightWrist_y,leftHip_x,leftHip_y,rightHip_x,rightHip_y,label
0,0.4925,0.1875,0.4,0.1825,0.505,0.26,0.34375,0.195,0.50875,0.33875,0.26625,0.16875,0.465,0.34375,0.41125,0.34625,1
1,0.4925,0.18875,0.4025,0.18625,0.5075,0.25875,0.3325,0.1975,0.5,0.3375,0.27625,0.175,0.4675,0.33625,0.40875,0.3375,1
2,0.49125,0.19,0.4025,0.17875,0.505,0.26125,0.335,0.19875,0.51125,0.335,0.26375,0.16875,0.46375,0.33875,0.40875,0.3375,1


In [5]:
#df[["leftShoulder_y","rightShoulder_y","leftHip_y","rightHip_y"]]
#df.describe()
#df.info()
#df.label.unique()

In [6]:
df.shape

(3837, 17)

In [7]:
#for i in df:
    #print(i, df[i].isnull().sum())    

In [8]:
df1=df.dropna().drop_duplicates()
#df1
#df1
df1.shape

(3719, 17)

In [9]:
# Checking that we don't have any null values
assert df1.isnull().all().all() == False

In [10]:
#df1.describe()

In [12]:
x=df1.copy()
#x=x.drop(["label"], axis = 1)
x_cols = ['leftShoulder_x', 'rightShoulder_x',
        'leftElbow_x', 'rightElbow_x',
        'leftWrist_x', 'rightWrist_x',
        'leftHip_x', 'rightHip_x']
#xtrans = XCentralizer(x_cols)
#x = xtrans.transform(x)

y_cols = list(set(x.columns)-set(x_cols))
#print(y_cols)
#ytrans = YCentralizer(y_cols)
#x = ytrans.transform(x)

#ytrans = YScaler()
#x = ytrans.transform(x)
x[:2]

Unnamed: 0,leftShoulder_x,leftShoulder_y,rightShoulder_x,rightShoulder_y,leftElbow_x,leftElbow_y,rightElbow_x,rightElbow_y,leftWrist_x,leftWrist_y,rightWrist_x,rightWrist_y,leftHip_x,leftHip_y,rightHip_x,rightHip_y,label
0,0.4925,0.1875,0.4,0.1825,0.505,0.26,0.34375,0.195,0.50875,0.33875,0.26625,0.16875,0.465,0.34375,0.41125,0.34625,1
1,0.4925,0.18875,0.4025,0.18625,0.5075,0.25875,0.3325,0.1975,0.5,0.3375,0.27625,0.175,0.4675,0.33625,0.40875,0.3375,1


In [13]:
from sklearn.pipeline import make_pipeline, make_union

processing_pipeline = make_pipeline(
    XCentralizer(x_cols),
    YCentralizer(y_cols), 
    YScaler(),
    Shuffler()
    )

In [27]:
processed_df = processing_pipeline.fit_transform(x)

In [28]:
%%run_pytest[clean]
def test_processingpipeline():
    # remember, this first pipeline only acts on the features, not the target.
    processed_df = processing_pipeline.fit_transform(x)
    
    # check for data leakage
    assert x.shape[0] == processed_df.shape[0]

platform darwin -- Python 3.6.5, pytest-3.5.1, py-1.5.3, pluggy-0.6.0
rootdir: /Users/lsafari/drone_steering/models, inifile:
plugins: remotedata-0.2.1, openfiles-0.3.0, doctestplus-0.1.3, arraydiff-0.2
collected 1 item

drone_pos_model.py .                                                     [100%]

None
  Module already imported so cannot be rewritten: pytest_remotedata
  Module already imported so cannot be rewritten: pytest_openfiles
  Module already imported so cannot be rewritten: pytest_doctestplus
  Module already imported so cannot be rewritten: pytest_arraydiff



In [32]:
#shuff=Shuffler()
#df_shuffled=shuff.transform(df1)
df_train = processed_df.iloc[:int(processed_df.shape[0]*0.8)]
df_val = processed_df.iloc[int(processed_df.shape[0]*0.8):]

In [33]:
assert df_train.shape[0] + df_val.shape[0] == processed_df.shape[0]

In [71]:
processed_df.head(5)

Unnamed: 0,leftShoulder_x,leftShoulder_y,rightShoulder_x,rightShoulder_y,leftElbow_x,leftElbow_y,rightElbow_x,rightElbow_y,leftWrist_x,leftWrist_y,rightWrist_x,rightWrist_y,leftHip_x,leftHip_y,rightHip_x,rightHip_y,label
3477,0.336576,-0.538911,-0.270428,-0.461089,0.375486,-0.064202,-0.620623,-0.110895,0.523346,-1.379377,-1.01751,0.099222,0.149805,0.480545,-0.215953,0.519455,3
1699,0.295045,-0.486486,-0.29955,-0.513514,0.358108,0.027027,-0.434685,-0.009009,0.412162,0.504505,-0.443694,0.495495,0.222973,0.522523,-0.218468,0.477477,4
2312,0.173507,-0.559701,-0.274254,-0.440299,0.755597,-0.649254,-0.341418,-0.029851,0.531716,0.276119,-0.31903,0.522388,0.233209,0.492537,-0.132463,0.507463,1
1960,0.298673,-0.477876,-0.276549,-0.522124,0.378319,0.044248,-0.488938,-0.00885,0.440265,0.380531,-0.462389,0.40708,0.183628,0.486726,-0.205752,0.513274,0
3585,0.297348,-0.454545,-0.270833,-0.545455,0.403409,0.05303,-0.399621,-0.030303,0.350379,0.575758,-0.535985,0.257576,0.176136,0.537879,-0.202652,0.462121,1


In [94]:
y_train = df_train['label']
x_train = df_train.drop('label', axis = 1)

y_val = df_val['label']
x_val = df_val.drop('label', axis = 1)

In [73]:
#df_shuffled.columns
#y_train
#x_train

In [89]:
x_train.shape, y_train.shape, x_val.shape, y_val.shape

((2975, 16), (2975,), (744, 16), (744,))

In [91]:
x_train.values

array([[ 0.33657588, -0.53891051, -0.27042802, ...,  0.48054475,
        -0.21595331,  0.51945525],
       [ 0.29504505, -0.48648649, -0.29954955, ...,  0.52252252,
        -0.21846847,  0.47747748],
       [ 0.17350746, -0.55970149, -0.27425373, ...,  0.49253731,
        -0.13246269,  0.50746269],
       ...,
       [ 0.29147982, -0.5044843 , -0.26457399, ...,  0.49103139,
        -0.21973094,  0.50896861],
       [ 0.2689243 , -0.48406375, -0.28884462, ...,  0.49601594,
        -0.18525896,  0.50398406],
       [ 0.29955947, -0.47797357, -0.27312775, ...,  0.49118943,
        -0.21145374,  0.50881057]])

In [75]:
x_train=x_train.values
y_train=y_train.values
x_val=x_val.values
y_val=y_val.values

In [38]:
x_train.shape, y_train.shape, x_val.shape, y_val.shape

((2975, 16), (2975,), (744, 16), (744,))

In [76]:
#x_train
print("x_train=", x_train[0],"\n y_train=", y_train[0])

x_train= [ 0.33657588 -0.53891051 -0.27042802 -0.46108949  0.37548638 -0.06420233
 -0.62062257 -0.11089494  0.5233463  -1.37937743 -1.01750973  0.09922179
  0.14980545  0.48054475 -0.21595331  0.51945525] 
 y_train= 3


### Training Model

In [77]:
#Load data from a text file, with missing values handled as specified
#data_csv = np.genfromtxt('/Users/lsafari/drone_steering/data/video_all_posture_steptime50_checksum8160.csv', delimiter=',')
#train_x = data_csv[1:2000,0:16] 
#train_y = data_csv[1:2000,16:17]
#test_x = data_csv[2000:,0:16] 
#test_y = data_csv[2000:,16:17]

In [119]:
x_train = df1.drop("label", axis=1)
y_train = df1["label"]

In [78]:
from keras.utils import to_categorical
y_train = to_categorical(y_train)
y_val = to_categorical(y_val)

In [93]:
y_train

3477    3
1699    4
2312    1
1960    0
3585    1
3471    3
1974    0
929     2
1043    0
2547    4
2369    1
1129    0
1385    3
2727    4
888     2
2081    0
390     3
2191    0
614     4
736     1
2334    1
110     4
1547    3
1731    4
2102    0
1095    0
3720    2
2672    4
1512    3
561     4
       ..
2503    4
3209    3
3763    2
1056    0
511     4
1029    0
3629    1
2918    4
604     4
158     2
3732    2
2257    2
2851    4
868     2
696     1
278     1
904     2
1288    3
2034    0
766     1
1444    3
1507    3
2946    4
2745    4
2721    4
919     2
3181    3
1177    0
3402    3
1962    0
Name: label, Length: 2975, dtype: int64

In [80]:
print(x_train[0])
print(y_train[0])

[ 0.33657588 -0.53891051 -0.27042802 -0.46108949  0.37548638 -0.06420233
 -0.62062257 -0.11089494  0.5233463  -1.37937743 -1.01750973  0.09922179
  0.14980545  0.48054475 -0.21595331  0.51945525]
[0. 0. 0. 1. 0.]


# Model Architecture

In [81]:
from keras import models, layers
from keras.models import Model
from keras.layers import Input, Dense
from keras import optimizers, losses, metrics

def creat_model():
    #default vaues
    #activation="relu"
    #optimizer="adam"
    lr=0.01
    #momentum=0
    #creat model
    model = models.Sequential()
    model.add(layers.Dense(
        32, 
        activation="relu", 
        input_shape=(16, )))
    model.add(layers.Dense(15, activation="relu"))
    model.add(layers.Dense(5, activation="softmax")) #is a fast rectifier
    #model.summary()   

    model.compile(
    optimizer=optimizers.RMSprop(lr=0.01),
    loss=losses.categorical_crossentropy,
    metrics=["accuracy"] 
    )
    return model

In [92]:
model_classifier = KerasClassifier(build_fn=creat_model)
type(model_classifier)
#model_classifier.fit()

keras.wrappers.scikit_learn.KerasClassifier

In [86]:
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.model_selection import cross_val_predict
from sklearn.ensemble import RandomForestClassifier
from math import sqrt
import matplotlib.pyplot as plt

In [120]:
finalpipeline = (make_pipeline(processing_pipeline,RandomForestClassifier(random_state=1, 
                                                                          n_jobs=-1, 
                                                                          n_estimators=100)))
# Fitting the pipeline
#finalpipeline.fit(x_train, y_train)
#finalpipeline

In [121]:
#Use sckit_learn to grid search
activation = ['relu']
lr = [0.0001, 0.001, 0.1]
neurons = [1, 5, 10, 15, 20, 25, 30]
optimizer = ['RMSprop','Adam'] #['SGD', 'Adgrad', 'Adadelta', 'Adamax', 'Nadam']
######## grid serach epochs, batch_size
epochs = [100]
batch_size = [20]
param_grid = dict(epochs=epochs, batch_size=batch_size)


#--------------------------- Random Forest --------------------------------------------
# Number of trees in random forest
n_estimators = [100, 500, 1000]
# Number of features to consider at every split
max_features = [5, 10, 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(5, 110, num = 4)]
max_depth.append(None)
random_grid = {'randomforestclassifier__n_estimators': n_estimators}
'''               
               ,
               'randomforestclassifier__max_features': max_features,
               'randomforestclassifier__max_depth': max_depth,
              }
'''

#######################################################
grid_search = GridSearchCV(estimator=finalpipeline, param_grid=random_grid, n_jobs=-1)


In [122]:
grid_result = grid_search.fit(x_train, y_train)

In [123]:
grid_result.cv_results_

{'mean_fit_time': array([ 4.44905106,  8.00048868, 10.46329761]),
 'std_fit_time': array([0.09472258, 0.21949691, 3.49098227]),
 'mean_score_time': array([8.67363262, 6.64293098, 3.18102137]),
 'std_score_time': array([0.09388991, 0.13842127, 1.16149164]),
 'param_randomforestclassifier__n_estimators': masked_array(data=[100, 500, 1000],
              mask=[False, False, False],
        fill_value='?',
             dtype=object),
 'params': [{'randomforestclassifier__n_estimators': 100},
  {'randomforestclassifier__n_estimators': 500},
  {'randomforestclassifier__n_estimators': 1000}],
 'split0_test_score': array([0.23771152, 0.24012893, 0.23126511]),
 'split1_test_score': array([0.23163842, 0.23567393, 0.23567393]),
 'split2_test_score': array([0.24374496, 0.2582728 , 0.26472962]),
 'mean_test_score': array([0.23769831, 0.24468943, 0.24388276]),
 'std_test_score': array([0.00494115, 0.00977181, 0.01484456]),
 'rank_test_score': array([3, 1, 2], dtype=int32),
 'split0_train_score': arr

In [None]:
##############################################################
# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

In [None]:
history = model_classifier.fit(x_train, y_train, epochs=100, batch_size=30)

In [None]:

plt.plot(history.history["loss"], label=["loss"]) #play with hyperparameters to see the changes
plt.legend()
plt.show()
plt.close()

plt.plot(history.history["acc"],  label=["acc"])
plt.legend()
plt.show()
plt.close()



In [None]:
test_loss, test_acc = model_classifier.evaluate(x_val, y_val)
print("Loss / Accuracy Evaluation")
print("--------------------------")
print("Loss:     " + str(round(test_loss,5)))
print("Accuracy: " + str(round(test_acc,5)))

In [None]:
val_y_pred = model_classifier.predict(x_val)

In [None]:
val_y_pred[10]

In [None]:
y_val[10]

### Save Model in tensorflow.js Format

The tensorflowjs library can't be installed directly with pip / conda due to conflicting dependencies. Best is to set up a new environment explicitly for this and install tensorflowjs with the following commands:

```
pip install tensorflow==1.11.0rc2 h5py numpy keras
pip install --no-deps tensorflowjs
```

In [None]:
! pip install tensorflow==1.11.0rc2 h5py numpy keras
! pip install --no-deps tensorflowjs

In [None]:
import tensorflowjs as tfjs

In [None]:
tfjs.converters.save_keras_model(model, 'model_tfjs')

We need to adapt the two files as follows in order for them to work on Azure:
* add a file extension .pb to the file with no extension (otherwise Azure blocks it from viewing)
* adapt the automatically generated model.json to reflect the extension .pb