In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
import pandas as pd
import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc
from pandas.plotting import register_matplotlib_converters

%matplotlib inline
%config InlineBackend.figure_format='retina'

register_matplotlib_converters()
sns.set(style='whitegrid', palette='muted', font_scale=1.5)

rcParams['figure.figsize'] = 22, 10

RANDOM_SEED = 42

np.random.seed(RANDOM_SEED)
#tf.random.set_random_seed(RANDOM_SEED)

  import pandas.util.testing as tm


In [2]:
tf.__version__

'2.5.0'

In [3]:
# Tensorflow 2

In [4]:
#!pip3 install --upgrade tensorflow

In [5]:
column_names = ['user_id', 'activity', 'timestamp', 'x_axis', 'y_axis', 'z_axis']

df = pd.read_csv('../data/MobiAct/raw_data.csv')

In [6]:
df = df.rename(columns={'User': 'user_id', 'Activity':'activity', 
                   'Time': 'timestamp', 'x':'x_axis', 'y':'y_axis', 'z':'z_axis'})
df.head(2)

Unnamed: 0,timestamp,x_axis,y_axis,z_axis,activity,user_id
0,235423306000,-0.172383,9.883265,-0.45011,STD,39
1,235450722000,-0.220267,9.921572,-0.440533,STD,39


In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5275371 entries, 0 to 5275370
Data columns (total 6 columns):
 #   Column     Dtype  
---  ------     -----  
 0   timestamp  int64  
 1   x_axis     float64
 2   y_axis     float64
 3   z_axis     float64
 4   activity   object 
 5   user_id    int64  
dtypes: float64(3), int64(2), object(1)
memory usage: 241.5+ MB


In [8]:
df.dropna(axis=0, how='any', inplace=True)

In [9]:
df.head()

Unnamed: 0,timestamp,x_axis,y_axis,z_axis,activity,user_id
0,235423306000,-0.172383,9.883265,-0.45011,STD,39
1,235450722000,-0.220267,9.921572,-0.440533,STD,39
2,235460429000,-0.258574,9.892841,-0.459687,STD,39
3,235470358000,-0.248997,9.873688,-0.45011,STD,39
4,235490379000,-0.229843,9.825804,-0.411803,STD,39


In [10]:
df.user_id.unique()

array([39, 41, 20, 65, 58, 22, 67, 43, 47,  9, 26, 63, 24, 61, 19, 45, 66,
       23, 42, 40, 38, 59, 64, 21, 18, 60, 25, 44, 46, 62, 27,  8, 16, 53,
        4, 32, 48, 29,  6, 51,  2, 10, 55, 34, 36, 12, 57, 49, 50,  7, 28,
        5, 52, 33, 37,  1, 56, 54, 11,  3, 35])

In [11]:
df.user_id.nunique()

61

In [12]:
df_train = df[df['user_id'] <= 50]
df_test = df[df['user_id'] > 50]

In [13]:
df_train.shape[0] / df.shape[0]

0.7188400588318812

In [14]:
df_test.shape

(1483223, 6)

In [15]:
from sklearn.preprocessing import RobustScaler

scale_columns = ['x_axis', 'y_axis', 'z_axis']

scaler = RobustScaler()

scaler = scaler.fit(df_train[scale_columns])

df_train.loc[:, scale_columns] = scaler.transform(df_train[scale_columns].to_numpy())
df_test.loc[:, scale_columns] = scaler.transform(df_test[scale_columns].to_numpy())

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[item] = s


In [16]:
from scipy import stats

def create_dataset(X, y, time_steps=1, step=1):
    Xs, ys = [], []
    for i in range(0, len(X) - time_steps, step):
        v = X.iloc[i:(i + time_steps)].values
        labels = y.iloc[i: i + time_steps]
        Xs.append(v)        
        ys.append(stats.mode(labels)[0][0])
    return np.array(Xs), np.array(ys).reshape(-1, 1)

TIME_STEPS = 200
STEP = 40

X_train, y_train = create_dataset(
    df_train[['x_axis', 'y_axis', 'z_axis']], 
    df_train.activity, 
    TIME_STEPS, 
    STEP
)

X_test, y_test = create_dataset(
    df_test[['x_axis', 'y_axis', 'z_axis']], 
    df_test.activity, 
    TIME_STEPS, 
    STEP
)

In [17]:
print(X_train.shape, y_train.shape)

(94799, 200, 3) (94799, 1)


In [18]:
from sklearn.preprocessing import OneHotEncoder

enc = OneHotEncoder(handle_unknown='ignore', sparse=False)

enc = enc.fit(y_train)

y_train = enc.transform(y_train)
y_test = enc.transform(y_test)

In [19]:
print(X_train.shape, y_train.shape)

(94799, 200, 3) (94799, 11)


In [20]:
momentum = [0, 0.2, 0.4, 0.6, 0.8, 0.9]
learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]   
batch_size = [64, 128]
epochs = [20, 50]

In [21]:
model = keras.Sequential()
model.add(
    keras.layers.Bidirectional(
      keras.layers.LSTM(
          units=128, 
          input_shape=[X_train.shape[1], X_train.shape[2]]
      )
    )
)
model.add(keras.layers.Dropout(rate=0.5))
model.add(keras.layers.Dense(units=128, activation='relu'))
model.add(keras.layers.Dense(y_train.shape[1], activation='softmax'))
#model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'], learning_rate=0.1)
#model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'], learn_rate =0.1)
model.compile(loss='categorical_crossentropy', optimizer=keras.optimizers.Adam(learning_rate=0.01), metrics=['acc'])
history = model.fit(X_train, y_train, epochs=10, batch_size=64, validation_split=0.1, shuffle=True)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
model.optimizer.get_config()

In [22]:
print(model.optimizer.lr)

<tf.Variable 'Adam/learning_rate:0' shape=() dtype=float32, numpy=0.01>


In [23]:
model.evaluate(X_test, y_test)



[5.596107006072998, 0.7160966396331787]

In [24]:
model.save('saved_models/lstm.h5')

In [None]:
model = tf.keras.models.load_model('saved_models/lstm.h5')

In [25]:
y_pred = model.predict(X_test)

In [26]:
input_shape = [X_train.shape[1], X_train.shape[2]]

In [None]:
# Save the model.
with open('../tflite_models/lstm.', 'wb') as f:
  f.write(tflite_model)

In [27]:
y_pred

array([[1.32804979e-38, 1.06071132e-35, 0.00000000e+00, ...,
        9.93608846e-04, 1.31371559e-03, 1.78992973e-10],
       [1.95587747e-37, 1.26437791e-34, 1.34905178e-37, ...,
        1.24109932e-03, 1.66124641e-03, 3.79007603e-10],
       [1.25960479e-20, 3.86151898e-19, 1.00807112e-20, ...,
        2.86046248e-02, 4.57158759e-02, 1.76000467e-05],
       ...,
       [1.19860753e-21, 1.61337119e-20, 1.44624907e-21, ...,
        4.06885371e-02, 3.50342616e-02, 9.22889113e-01],
       [1.36814862e-13, 7.20992927e-13, 1.84400780e-13, ...,
        1.01091444e-01, 1.42108724e-01, 7.37684309e-01],
       [2.28779594e-20, 2.55626572e-19, 2.65788156e-20, ...,
        4.50406596e-02, 4.03689891e-02, 9.12792146e-01]], dtype=float32)

In [28]:
enc.inverse_transform(y_pred)

array([['STD'],
       ['STD'],
       ['STD'],
       ...,
       ['WAL'],
       ['WAL'],
       ['WAL']], dtype='<U3')

## TFLITE CONVERTER

In [30]:
TFLITE_MODEL = '../tflite_models/mobiact/model_lstm_lite.tflite'
TFLITE_DYNAMIC_MODEL = '../tflite_models/mobiact/model_lstm_lite_dynamic.tflite'
TFLITE_FLOAT16_MODEL = '../tflite_models/mobiact/model_lstm_lite_float16.tflite'

In [31]:
# Convert the model 
converter = tf.lite.TFLiteConverter.from_keras_model(model) # path to the SavedModel directory
tflite_model = converter.convert()



INFO:tensorflow:Assets written to: /var/folders/7r/4tyskpd90772kcc14kpglgdrz0lbv6/T/tmpyinpq_af/assets


INFO:tensorflow:Assets written to: /var/folders/7r/4tyskpd90772kcc14kpglgdrz0lbv6/T/tmpyinpq_af/assets


In [32]:
# Save the model.
with open(TFLITE_MODEL, 'wb') as f:
  f.write(tflite_model)

In [33]:
# Convert the model 
# Dynamic
converter = tf.lite.TFLiteConverter.from_keras_model(model) # path to the SavedModel directory
converter.optimizations = [tf.lite.Optimize.DEFAULT]
tflite_quant_model_dynamic = converter.convert()



INFO:tensorflow:Assets written to: /var/folders/7r/4tyskpd90772kcc14kpglgdrz0lbv6/T/tmpt_rze001/assets


INFO:tensorflow:Assets written to: /var/folders/7r/4tyskpd90772kcc14kpglgdrz0lbv6/T/tmpt_rze001/assets


In [34]:
# Save the model.
with open(TFLITE_DYNAMIC_MODEL, 'wb') as f:
  f.write(tflite_quant_model_dynamic)

In [35]:
# Convert the model 
# Float16
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.target_spec.supported_types = [tf.float16]
tflite_quant_model_float16 = converter.convert()



INFO:tensorflow:Assets written to: /var/folders/7r/4tyskpd90772kcc14kpglgdrz0lbv6/T/tmp8axwi5ta/assets


INFO:tensorflow:Assets written to: /var/folders/7r/4tyskpd90772kcc14kpglgdrz0lbv6/T/tmp8axwi5ta/assets


In [36]:
# Save the model.
with open(TFLITE_FLOAT16_MODEL, 'wb') as f:
  f.write(tflite_quant_model_float16)

## TFLITE INTERPRETER

## TFLITE

In [64]:
# Load the TFLite model and allocate tensors.  
interpreter = tf.lite.Interpreter(model_path=TFLITE_MODEL, num_threads=2)    #(model_content=tflite_model) 
interpreter.allocate_tensors()

In [65]:
# Get input and output tensors.    
input_det = interpreter.get_input_details()[0]
output_det = interpreter.get_output_details()[0]

In [66]:
input_index = input_det["index"]
output_index = output_det["index"]
input_shape = input_det["shape"]
output_shape = output_det["shape"]
input_dtype = input_det["dtype"]
output_dtype = output_det["dtype"]

In [67]:
X_train.shape, y_train.shape, X_test.shape, y_test.shape

((94799, 200, 3), (94799, 11), (37076, 200, 3), (37076, 11))

In [68]:
def predict(inp, input_dtype, output_shape, output_dtype, interpreter, input_index, output_index):
    inp = inp.astype(input_dtype)
    count = inp.shape[0]
    out = np.zeros((count, output_shape[1]), dtype=output_dtype)
    for i in range(count):
        interpreter.set_tensor(input_index, inp[i:i+1])
        interpreter.invoke()
        out[i] = interpreter.get_tensor(output_index)[0]
    return out

In [None]:
%time y_pred_lite = predict(X_test, input_dtype, output_shape, output_dtype, interpreter, input_index, output_index)

In [None]:
y_pred_lite.shape

In [None]:
#y_pred_lite = np.argmax(y_pred_lite, axis=1) 
y_pred_lite

## TFLITE DYNAMIC

In [45]:
# Load the TFLite model and allocate tensors.  
interpreter = tf.lite.Interpreter(model_path=TFLITE_DYNAMIC_MODEL, num_threads=4)    #(model_content=tflite_model) 
interpreter.allocate_tensors()

# Get input and output tensors.    
input_det = interpreter.get_input_details()[0]
output_det = interpreter.get_output_details()[0]

input_index = input_det["index"]
output_index = output_det["index"]
input_shape = input_det["shape"]
output_shape = output_det["shape"]
input_dtype = input_det["dtype"]
output_dtype = output_det["dtype"]

In [46]:
%time y_pred_lite_dynamic = predict(X_test, input_dtype, output_shape, output_dtype, interpreter, input_index, output_index)

CPU times: user 7min 58s, sys: 1.51 s, total: 8min
Wall time: 8min 2s


In [47]:
y_pred_lite_dynamic = np.argmax(y_pred_lite_dynamic, axis=1) 
y_pred_lite_dynamic

array([ 7,  7,  7, ..., 10, 10, 10])

## TFLITE FLOAT16

In [48]:
# Load the TFLite model and allocate tensors.  
interpreter = tf.lite.Interpreter(model_path=TFLITE_FLOAT16_MODEL, num_threads=2)    #(model_content=tflite_model) 
interpreter.allocate_tensors()

# Get input and output tensors.    
input_det = interpreter.get_input_details()[0]
output_det = interpreter.get_output_details()[0]

input_index = input_det["index"]
output_index = output_det["index"]
input_shape = input_det["shape"]
output_shape = output_det["shape"]
input_dtype = input_det["dtype"]
output_dtype = output_det["dtype"]

In [49]:
%time y_pred_lite_float16 = predict(X_test, input_dtype, output_shape, output_dtype, interpreter, input_index, output_index)

CPU times: user 8min 5s, sys: 1.08 s, total: 8min 6s
Wall time: 8min 7s


In [50]:
y_pred_lite_float16 = np.argmax(y_pred_lite_float16, axis=1) 
y_pred_lite_float16

array([ 7,  7,  7, ..., 10, 10, 10])

In [51]:
## Ploting Confusion Matrix

In [52]:
# Accuracy
from sklearn.metrics import accuracy_score
print(accuracy_score(enc.inverse_transform(y_test), enc.inverse_transform(y_pred)))

0.7160966663070449


In [None]:
#from sklearn.metrics import accuracy_score
#print(accuracy_score(enc.inverse_transform(y_test), enc.inverse_transform(y_pred)))

print(accuracy_score(enc.inverse_transform(y_test), enc.inverse_transform(y_pred_lite)))

#print(accuracy_score(enc.inverse_transform(y_test), enc.inverse_transform(y_pred_lite_dynamic)))

In [63]:
enc.inverse_transform(y_pred)

array([['STD'],
       ['STD'],
       ['STD'],
       ...,
       ['WAL'],
       ['WAL'],
       ['WAL']], dtype='<U3')

In [None]:
y_test

In [None]:
enc.inverse_transform(y_test)

In [None]:
# Confusion Matrix
from sklearn.metrics import confusion_matrix
print(confusion_matrix(enc.inverse_transform(y_test), enc.inverse_transform(y_pred)))
# Accuracy
from sklearn.metrics import accuracy_score
print(accuracy_score(enc.inverse_transform(y_test), enc.inverse_transform(y_pred)))
# Recall
from sklearn.metrics import recall_score
print(recall_score(enc.inverse_transform(y_test), enc.inverse_transform(y_pred), average=None))
# Precision
from sklearn.metrics import precision_score
print(precision_score(enc.inverse_transform(y_test), enc.inverse_transform(y_pred), average=None))
# F1 score
from sklearn.metrics import f1_score
f1_score(enc.inverse_transform(y_test), enc.inverse_transform(y_pred), average=None)

In [None]:
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.metrics import f1_score

binarizer = MultiLabelBinarizer()

# This should be your original approach
#binarizer.fit(your actual true output consisting of all labels)

# In this case, I am considering only the given labels.
binarizer.fit(enc.inverse_transform(y_test))

f1_score(binarizer.transform(enc.inverse_transform(y_test)), 
         binarizer.transform(enc.inverse_transform(y_pred)), 
         average='weighted')

In [None]:
enc.inverse_transform(y_pred)

In [None]:
f1_score(binarizer.transform(enc.inverse_transform(y_test)), 
         binarizer.transform(enc.inverse_transform(y_pred)), 
         average=None)