In [9]:
from __future__ import absolute_import, division, print_function, unicode_literals
import tensorflow as tf
from tensorflow import keras
from os import listdir, path
import numpy as np
from collections import defaultdict
import datetime
import random

random.seed(42) # Keep the order stable everytime shuffling the files while creating training datasets

In [10]:
# !pip install tensorflow

## Global variables

In [12]:
seq_length  = 36 # This will be used to keep the fixed input size for the first CNN layer
dim         = 6  # Number of datapoints in a single reading accX,accY,accZ,gyrX,gyrY,gyrZ
num_classes = 10 # Number of output classes [0,9] 

## Sequence Padding
#### When collecting sequence data, individual samples have different lengths. Since the input data for a convolutional neural network  must be a single tensor, samples need to be padded. The sequence are padded at the beginning and at the end with neighboring values.

In [19]:
def padding(data):
    padded_data = []
    noise_level = [ 20, 20, 20, 0.2, 0.2, 0.2 ]
    
    tmp_data = (np.random.rand(seq_length, dim) - 0.5) * noise_level + data[0]
    tmp_data[(seq_length - min(len(data), seq_length)):] = data[:min(len(data), seq_length)]
    padded_data.append(tmp_data)

    tmp_data = (np.random.rand(seq_length, dim) - 0.5) * noise_level + data[-1]
    tmp_data[:min(len(data), seq_length)] = data[:min(len(data), seq_length)]
    
    padded_data.append(tmp_data)
    return padded_data

## Convert to TensorFlow dataset, keeps data and labels together


In [22]:
def build_dataset(data, label):
    # Add 2 padding, initialize data and label
    padded_num = 2
    length = len(data) * padded_num
    features = np.zeros((length, seq_length, dim))
    labels = np.zeros(length)
    # Get padding for train, valid and test
    for idx, (data, label) in enumerate(zip(data, label)):
        padded_data = padding(data)
        for num in range(padded_num):
            features[padded_num * idx + num] = padded_data[num]
            labels[padded_num * idx + num] = label
    # Turn into tf.data.Dataset
    dataset = tf.data.Dataset.from_tensor_slices((features, labels.astype("int32")))
    return length, dataset

## Time Warping

In [25]:
def time_warping(molecule, denominator, data):
  tmp_data = [[0 for i in range(len(data[0]))] for j in range((int(len(data) / molecule) - 1) * denominator)]
    
  for i in range(int(len(data) / molecule) - 1):
    for j in range(len(data[i])):
      for k in range(denominator):
        tmp_data[denominator * i + k][j] = (data[molecule * i + k][j] * (denominator - k) 
                                            + data[molecule * i + k + 1][j] * k) / denominator
  return tmp_data


## Data augmentation

In [28]:
def augment_data(original_data, original_label):
  new_data = []
  new_label = []
  for idx, (data, label) in enumerate(zip(original_data, original_label)):  # pylint: disable=unused-variable
    # Original data
    new_data.append(data)
    new_label.append(label)
    # Shift Sequence
    for num in range(5):  # pylint: disable=unused-variable
      new_data.append((np.array(data, dtype=np.float32) +
                       (random.random() - 0.5) * 200).tolist())
      new_label.append(label)
    # Add Random noise
    tmp_data = [[0 for i in range(len(data[0]))] for j in range(len(data))]
    for num in range(5):
      for i in range(len(tmp_data)):
        for j in range(len(tmp_data[i])):
          tmp_data[i][j] = data[i][j] + 5 * random.random()
      new_data.append(tmp_data)
      new_label.append(label)
    # Time warping
    fractions = [(3, 2), (5, 3), (2, 3), (3, 4), (9, 5), (6, 5), (4, 5)]
    for molecule, denominator in fractions:
      new_data.append(time_warping(molecule, denominator, data))
      new_label.append(label)
    # Movement amplification
    for molecule, denominator in fractions:
      new_data.append(
          (np.array(data, dtype=np.float32) * molecule / denominator).tolist())
      new_label.append(label)
  return new_data, new_label

## Load data from files

In [31]:
def load_data(data_type, files):
    data   = []
    labels = []
    random.shuffle(files)
   
    for file in files:
        with open(file) as f:
            label = path.splitext(file)[0][-1]
            labels.append(label)
            readings = []
            for line in f:
                reading = line.strip().split(',')
                readings.append([float(i) for i in reading[0:6]])

            data.append(readings)
            
    if data_type == 'train':
        data, labels = augment_data(data, labels)
    
    return build_dataset(data, labels)

## Prepare training, validation, and test datasets

In [34]:
files_path = defaultdict(list)
dir = './data'
for filename in listdir(dir):
    if filename.endswith('.csv'):
        digit = path.splitext(filename)[0][-1]
        files_path[digit].append(path.join(dir, filename))

train_files      = []
validation_files = []
test_files       = []

for digit in files_path:
    random.shuffle(files_path[digit])
    
    train_split = int(len(files_path[digit]) * 0.6) # 60%
    validation_split = train_split + int(len(files_path[digit]) * 0.2) # 20%

    train_files += files_path[digit][:train_split]
    validation_files += files_path[digit][train_split:validation_split]
    # remaining 20%
    test_files += files_path[digit][validation_split:]

train_length, train_data = load_data('train', train_files)
validation_length, validation_data = load_data('validation', validation_files)
test_length, test_data = load_data('test', test_files )

print('train_length={} validation_length={} test_length{}'.format(train_length, validation_length, test_length))

train_length=22200 validation_length=290 test_length312


In [54]:
files_path

defaultdict(list,
            {'9': ['./data\\readings_994_digit_9.csv',
              './data\\readings_1037_digit_9.csv',
              './data\\readings_1012_digit_9.csv',
              './data\\readings_950_digit_9.csv',
              './data\\readings_1045_digit_9.csv',
              './data\\readings_1050_digit_9.csv',
              './data\\readings_951_digit_9.csv',
              './data\\readings_973_digit_9.csv',
              './data\\readings_1040_digit_9.csv',
              './data\\readings_1051_digit_9.csv',
              './data\\readings_964_digit_9.csv',
              './data\\readings_947_digit_9.csv',
              './data\\readings_1046_digit_9.csv',
              './data\\readings_1010_digit_9.csv',
              './data\\readings_1049_digit_9.csv',
              './data\\readings_1036_digit_9.csv',
              './data\\readings_961_digit_9.csv',
              './data\\readings_1043_digit_9.csv',
              './data\\readings_1044_digit_9.csv',
              '

In [52]:
test_files

['./data\\readings_590_digit_4.csv',
 './data\\readings_30_digit_0.csv',
 './data\\readings_163_digit_5.csv',
 './data\\readings_724_digit_6.csv',
 './data\\readings_123_digit_5.csv',
 './data\\readings_85_digit_0.csv',
 './data\\readings_237_digit_2.csv',
 './data\\readings_388_digit_3.csv',
 './data\\readings_109_digit_5.csv',
 './data\\readings_260_digit_2.csv',
 './data\\readings_352_digit_3.csv',
 './data\\readings_171_digit_5.csv',
 './data\\readings_26_digit_0.csv',
 './data\\readings_616_digit_4.csv',
 './data\\readings_191_digit_5.csv',
 './data\\readings_5_digit_0.csv',
 './data\\readings_235_digit_2.csv',
 './data\\readings_334_digit_3.csv',
 './data\\readings_100_digit_0.csv',
 './data\\readings_526_digit_4.csv',
 './data\\readings_889_digit_8.csv',
 './data\\readings_661_digit_6.csv',
 './data\\readings_400_digit_3.csv',
 './data\\readings_648_digit_6.csv',
 './data\\readings_696_digit_6.csv',
 './data\\readings_180_digit_5.csv',
 './data\\readings_536_digit_4.csv',
 './da

## Build a sequential model

In [36]:
model = tf.keras.Sequential([
      tf.keras.layers.Conv2D(8, (3, 3), padding="same", activation="relu", input_shape=(seq_length, dim, 1)),
      tf.keras.layers.Conv2D(8, (3, 3), padding="same", activation="relu"),
      tf.keras.layers.MaxPool2D((2, 2)),
      tf.keras.layers.Dropout(0.1),
      tf.keras.layers.Conv2D(8, (3, 3), padding="same", activation="relu"),
      tf.keras.layers.MaxPool2D((2, 2), padding="same"),
      tf.keras.layers.Dropout(0.1),
      tf.keras.layers.Conv2D(16, (3, 3), padding="same", activation="relu"),
      tf.keras.layers.MaxPool2D((2, 2), padding="same"),
      tf.keras.layers.Dropout(0.1),
      tf.keras.layers.Conv2D(16, (3, 3), padding="same", activation="relu"),
      tf.keras.layers.Flatten(),
      tf.keras.layers.Dense(64, activation="relu"),
      tf.keras.layers.Dropout(0.2),
      tf.keras.layers.Dense(32, activation="relu"),
      tf.keras.layers.Dropout(0.2),
      tf.keras.layers.Dense(num_classes, activation="softmax")
  ])

model.summary()

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


## Compile and start training

In [38]:
epochs = 100
batch_size = 64
steps_per_epoch=1000

model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])

def reshape_function(data, label):
  reshaped_data = tf.reshape(data, [-1, dim, 1])
  return reshaped_data, label

train_data = train_data.map(reshape_function)
validation_data = validation_data.map(reshape_function)

train_data = train_data.batch(batch_size).repeat()
validation_data = validation_data.batch(batch_size)

logdir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logdir)

# Uncomment the ines below if you like to see how training proceeds
# %load_ext tensorboard
# %tensorboard --logdir logdir

model.fit(
  train_data,
  epochs=epochs,
  validation_data=validation_data,
  steps_per_epoch=steps_per_epoch,
  validation_steps=int((validation_length - 1) / batch_size + 1),
  callbacks=[tensorboard_callback])

Epoch 1/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 21ms/step - accuracy: 0.1293 - loss: 4.0672 - val_accuracy: 0.2621 - val_loss: 2.0991
Epoch 2/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - accuracy: 0.2134 - loss: 2.0777
Epoch 3/100
[1m   7/1000[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m19s[0m 20ms/step - accuracy: 0.2321 - loss: 1.9950

  self.gen.throw(value)


[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 21ms/step - accuracy: 0.3141 - loss: 1.8734 - val_accuracy: 0.5345 - val_loss: 1.4660
Epoch 4/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - accuracy: 0.4324 - loss: 1.6223
Epoch 5/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 19ms/step - accuracy: 0.5378 - loss: 1.3686 - val_accuracy: 0.7414 - val_loss: 0.8957
Epoch 6/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 19ms/step - accuracy: 0.6128 - loss: 1.1568
Epoch 7/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 19ms/step - accuracy: 0.6695 - loss: 0.9949 - val_accuracy: 0.8517 - val_loss: 0.5177
Epoch 8/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 20ms/step - accuracy: 0.7090 - loss: 0.8777
Epoch 9/100
[1m1000/1000[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 19ms/step - accuracy: 0.7368 - loss: 0.7912 - val_accur


KeyboardInterrupt



## Evaluate the trained model on test dataset

In [41]:
test_data = test_data.map(reshape_function)
test_labels = np.zeros(test_length)

# There is no easy function to get the labels back from the tf.data.Dataset :(
# Need to iterate over dataset
idx = 0
for data, label in test_data:
    test_labels[idx] = label.numpy()
    idx += 1
    
test_data = test_data.batch(batch_size)

loss, acc = model.evaluate(test_data)
pred = np.argmax(model.predict(test_data), axis=1)

# Create a confusion matrix to see how model predicts
confusion = tf.math.confusion_matrix(labels=tf.constant(test_labels), predictions=tf.constant(pred), num_classes=num_classes)
print(confusion)

[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - accuracy: 0.8946 - loss: 0.4391 
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 41ms/step
tf.Tensor(
[[35  0  0  0  0  2  3  0  0  0]
 [ 0 36  0  0  0  0  0  0  0  0]
 [ 0  0 37  3  0  0  0  0  0  0]
 [ 0  0  0 40  0  0  0  0  0  0]
 [ 0  0  0  0 44  0  0  0  0  0]
 [ 0  0  0  0  0 40  0  0  0  0]
 [ 7  0  0  0  0  2 33  0  0  0]
 [ 0  0  0  1  0  0  0  9  0  0]
 [ 0  0  4  2  0  0  0  0  4  0]
 [ 0  0  0  0  6  0  0  0  0  4]], shape=(10, 10), dtype=int32)


In [64]:
test_length

312

In [66]:
len(pred)

312

## Convert model to TFLite format 
### Note: Currently quantized TFLite format does not work with TFLite Micro library

In [44]:
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()
open("model.tflite", "wb").write(tflite_model)

# Convert the model to the TensorFlow Lite format with quantization
converter = tf.lite.TFLiteConverter.from_keras_model(model)
converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
tflite_model = converter.convert()
open("model_quantized.tflite", "wb").write(tflite_model)


INFO:tensorflow:Assets written to: C:\Users\Thinkpad\AppData\Local\Temp\tmpg64_wcvj\assets


INFO:tensorflow:Assets written to: C:\Users\Thinkpad\AppData\Local\Temp\tmpg64_wcvj\assets


Saved artifact at 'C:\Users\Thinkpad\AppData\Local\Temp\tmpg64_wcvj'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 36, 6, 1), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  1719291767376: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1719291768912: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1719291768720: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1719291767568: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1719291768336: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1719291770832: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1719291769872: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1719291770256: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1719291771792: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1719291771408: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1719



INFO:tensorflow:Assets written to: C:\Users\Thinkpad\AppData\Local\Temp\tmpf0qpouoh\assets


INFO:tensorflow:Assets written to: C:\Users\Thinkpad\AppData\Local\Temp\tmpf0qpouoh\assets


Saved artifact at 'C:\Users\Thinkpad\AppData\Local\Temp\tmpf0qpouoh'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 36, 6, 1), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  1719291767376: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1719291768912: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1719291768720: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1719291767568: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1719291768336: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1719291770832: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1719291769872: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1719291770256: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1719291771792: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1719291771408: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1719



24208

In [56]:
demo_length, demo_data = load_data('test', ['./data\\readings_323_digit_3.csv'] )

In [76]:
import numpy as np
import pandas as pd
import tensorflow as tf

# def reshape_function(data, label):
#   reshaped_data = tf.reshape(data, [-1, dim, 1])
#   return reshaped_data, label

# def padding(data):
#     padded_data = []
#     noise_level = [ 20, 20, 20, 0.2, 0.2, 0.2 ]
    
#     tmp_data = (np.random.rand(seq_length, dim) - 0.5) * noise_level + data[0]
#     tmp_data[(seq_length - min(len(data), seq_length)):] = data[:min(len(data), seq_length)]
#     padded_data.append(tmp_data)

#     tmp_data = (np.random.rand(seq_length, dim) - 0.5) * noise_level + data[-1]
#     tmp_data[:min(len(data), seq_length)] = data[:min(len(data), seq_length)]
    
#     padded_data.append(tmp_data)
#     return padded_data

# def load_data(data_type, files):
#     data   = []
#     labels = []
#     random.shuffle(files)
   
#     for file in files:
#         with open(file) as f:
#             label = path.splitext(file)[0][-1]
#             labels.append(label)
#             readings = []
#             for line in f:
#                 reading = line.strip().split(',')
#                 readings.append([float(i) for i in reading[0:6]])

#             data.append(readings)
            
#     if data_type == 'train':
#         data, labels = augment_data(data, labels)
    
#     return build_dataset(data, labels)

# def build_dataset(data, label):
#     # Add 2 padding, initialize data and label
#     padded_num = 2
#     length = len(data) * padded_num
#     features = np.zeros((length, seq_length, dim))
#     labels = np.zeros(length)
#     # Get padding for train, valid and test
#     for idx, (data, label) in enumerate(zip(data, label)):
#         padded_data = padding(data)
#         for num in range(padded_num):
#             features[padded_num * idx + num] = padded_data[num]
#             labels[padded_num * idx + num] = label
#     # Turn into tf.data.Dataset
#     dataset = tf.data.Dataset.from_tensor_slices((features, labels.astype("int32")))
#     return length, dataset

# def augment_data(original_data, original_label):
#   new_data = []
#   new_label = []
#   for idx, (data, label) in enumerate(zip(original_data, original_label)):  # pylint: disable=unused-variable
#     # Original data
#     new_data.append(data)
#     new_label.append(label)
#     # Shift Sequence
#     for num in range(5):  # pylint: disable=unused-variable
#       new_data.append((np.array(data, dtype=np.float32) +
#                        (random.random() - 0.5) * 200).tolist())
#       new_label.append(label)
#     # Add Random noise
#     tmp_data = [[0 for i in range(len(data[0]))] for j in range(len(data))]
#     for num in range(5):
#       for i in range(len(tmp_data)):
#         for j in range(len(tmp_data[i])):
#           tmp_data[i][j] = data[i][j] + 5 * random.random()
#       new_data.append(tmp_data)
#       new_label.append(label)
#     # Time warping
#     fractions = [(3, 2), (5, 3), (2, 3), (3, 4), (9, 5), (6, 5), (4, 5)]
#     for molecule, denominator in fractions:
#       new_data.append(time_warping(molecule, denominator, data))
#       new_label.append(label)
#     # Movement amplification
#     for molecule, denominator in fractions:
#       new_data.append(
#           (np.array(data, dtype=np.float32) * molecule / denominator).tolist())
#       new_label.append(label)
#   return new_data, new_label

# def time_warping(molecule, denominator, data):
#   tmp_data = [[0 for i in range(len(data[0]))] for j in range((int(len(data) / molecule) - 1) * denominator)]
    
#   for i in range(int(len(data) / molecule) - 1):
#     for j in range(len(data[i])):
#       for k in range(denominator):
#         tmp_data[denominator * i + k][j] = (data[molecule * i + k][j] * (denominator - k) 
#                                             + data[molecule * i + k + 1][j] * k) / denominator
#   return tmp_data


# Load and preprocess data from CSV
csv_file_path = 'data/readings_323_digit_3.csv'  # Replace with your actual CSV path

demo_length, demo_data = load_data('test', ['./data\\readings_1_digit_0.csv'] )

demo_data = demo_data.map(reshape_function)
# demo_labels = np.zeros(test_length)
demo_data = demo_data.batch(batch_size)

predictions = np.argmax(model.predict(demo_data), axis=1)

# Output predictions
print("Predictions:", predictions)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step
Predictions: [0 5]
