# PDIOT ML

In [50]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [51]:
import os
path = "/content/drive/MyDrive/Colab Notebooks/pdiot-ml"
os.chdir(path)

# task_index = 1 # action
# task_index = 2 # abnormal
task_index = 3 # other
# task_index = 4 # increase case 3 with gyro and accl
# task_index = 5 # one model for all

# sensor = 'thingy'
sensor = 'respeck'

# tainable = False
tainable = True

In [52]:
import pandas as pd
import numpy as np
import os

def process_file(file_path, file_name, task_index, label_action_dict, label_type_dict):
    if file_name.endswith('.csv'):
        df = pd.read_csv(os.path.join(file_path, file_name))

        label_action, label_type = file_name.replace('.csv', '').split("_")[2:4]
        label_action = label_action_dict[label_action] if label_action in label_action_dict.keys() else label_action
        label_type = label_type_dict[label_type] if label_type in label_type_dict.keys() else label_type

        match task_index:
            case 1:
                label = label_action
            case 2:
                if label_action in ["normal walking", "ascending stairs", "descending stairs", "shuffle walking", "running", "miscellaneous movements"]:
                    return None, None
                if label_type not in ["normal", "coughing", "hyperventilating"]:
                    return None, None
                label = " ".join([label_action, label_type])
            case 3:
                if label_action in ["normal walking", "ascending stairs", "descending stairs", "shuffle walking", "running", "miscellaneous movements"]:
                    return None, None
                if label_type in ["talking", "eating", "singing", "laughing"]:
                    label_type = "other"
                label = " ".join([label_action, label_type])
            case 4:
                # increase case 3
                if label_action in ["normal walking", "ascending stairs", "descending stairs", "shuffle walking", "running", "miscellaneous movements"]:
                    return None, None
                if label_type in ["talking", "eating", "singing", "laughing"]:
                    label_type = "other"
                label = " ".join([label_action, label_type])

            case 5:
                # if label_type in ["coughing", "hyperventilating"]:
                #     label_type = "abnormal"
                # if label_type in ["talking", "eating", "singing", "laughing"]:
                #     label_type = "other"
                label = " ".join([label_action, label_type])

        match task_index:
            case 4:
                data = df[['accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y', 'gyro_z']]
            case 5:
                data = df[['accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y', 'gyro_z']]
            case _:
                data = df[['accel_x', 'accel_y', 'accel_z']]

        return label, data

    return None, None

In [53]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
import concurrent.futures

# Read multiple CSV files and label them
# data_folder = "pdiot-data/updated_anonymized_dataset_2023/Thingy"
data_folder = "pdiot-data/updated_anonymized_dataset_2023/Respeck"
files = []
for folder_name in os.listdir(data_folder):
    if folder_name == ".gitkeep":
        continue
    files += [(os.path.join(data_folder, folder_name), file_name) for file_name in os.listdir(os.path.join(data_folder, folder_name))]

data_list = []
labels = []

label_action_dict = {
    "ascending": "ascending stairs",
    "descending": "descending stairs",
    "lyingRight": "lying down on right",
    "lyingLeft": "lying down on left",
    "lyingBack": "lying down on back",
    "lyingStomach": "lying down on stomach",
    "miscMovement": "miscellaneous movements",
    "shuffleWalking": "shuffle walking",
    "normalWalking": "normal walking",
    "sitting": "sitting/standing",
    "standing": "sitting/standing"
}

label_type_dict = {
    "breathingNormal": "normal",
}

# Using ThreadPoolExecutor for parallel processing
with concurrent.futures.ThreadPoolExecutor() as executor:
    future_to_file = {executor.submit(process_file, file_path, file_name, task_index, label_action_dict, label_type_dict): (file_path, file_name) for file_path, file_name in files}

    for future in concurrent.futures.as_completed(future_to_file):
        file_path, file_name = future_to_file[future]
        try:
            label, data = future.result()
            if label is not None and data is not None:
                labels.append(label)
                data_list.append(data)
        except Exception as exc:
            print(f'{file_path}/{file_name} generated an exception: {exc}')


# # Stand the accel data
# tag += "_stand"
# for i in range(len(data_list)):
#     for col in ['accel_x', 'accel_y', 'accel_z']:
#         data_list[i][col] = (data_list[i][col] - data_list[i][col].mean()) / data_list[i][col].std()


match task_index:
    case 4:
        # norm the gyro data
        tag = f'accl_gyro_norm'

        for i in range(len(data_list)):
            for col in ['gyro_x', 'gyro_y', 'gyro_z']:
                # https://pdf1.alldatasheet.com/datasheet-pdf/view/678850/AD/ADXRS300_15.html
                min_val = -300
                max_val = 300

                data_list[i][col] = (data_list[i][col] - min_val) / (max_val - min_val)
    case 5:
        # norm the gyro data
        tag = "accl_gyro_norm"
        for i in range(len(data_list)):
            for col in ['gyro_x', 'gyro_y', 'gyro_z']:
                # https://pdf1.alldatasheet.com/datasheet-pdf/view/678850/AD/ADXRS300_15.html
                min_val = -300
                max_val = 300

                data_list[i][col] = (data_list[i][col] - min_val) / (max_val - min_val)
    case _:
        tag = f'accl_only_no_norm'


In [54]:
from scipy.signal import spectrogram

window_size = 50  # Define the size of the window
stride = 5  # Define the stride of the window

X_spectrograms = []
y_spectrograms = []

for i in range(len(data_list)):
    sequence = data_list[i].values
    label = labels[i]
    for j in range(0, len(sequence) - window_size, stride):
        window = sequence[j:j + window_size]

        # Compute the spectrogram of the window
        # f, t, Sxx = spectrogram(window, axis=0)

        # X_spectrograms.append(Sxx)
        X_spectrograms.append(window)
        y_spectrograms.append(label)

# Convert to numpy array
X_spectrograms = np.array(X_spectrograms)

# Label encode and one-hot encode labels
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(y_spectrograms)
y_spectrograms = to_categorical(integer_encoded)


# Split data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_spectrograms, y_spectrograms, test_size=0.2, random_state=42)


In [55]:
integer_mapping = {i: l for i, l in enumerate(label_encoder.classes_)}
# print(integer_mapping)
match task_index:
    case 1:
        result = integer_mapping == {
            0: 'ascending stairs',
            1: 'descending stairs',
            2: 'lying down on back',
            3: 'lying down on left',
            4: 'lying down on right',
            5: 'lying down on stomach',
            6: 'miscellaneous movements',
            7: 'normal walking',
            8: 'running',
            9: 'shuffle walking',
            10: 'sitting/standing'
        }

    case 2:
        result = integer_mapping == {
            0: 'lying down on back coughing',
            1: 'lying down on back hyperventilating',
            2: 'lying down on back normal',
            3: 'lying down on left coughing',
            4: 'lying down on left hyperventilating',
            5: 'lying down on left normal',
            6: 'lying down on right coughing',
            7: 'lying down on right hyperventilating',
            8: 'lying down on right normal',
            9: 'lying down on stomach coughing',
            10: 'lying down on stomach hyperventilating',
            11: 'lying down on stomach normal',
            12: 'sitting/standing coughing',
            13: 'sitting/standing hyperventilating',
            14: 'sitting/standing normal'
        }

    case 3:
        result = integer_mapping == {
            0: 'lying down on back coughing',
            1: 'lying down on back hyperventilating',
            2: 'lying down on back normal',
            3: 'lying down on back other',
            4: 'lying down on left coughing',
            5: 'lying down on left hyperventilating',
            6: 'lying down on left normal',
            7: 'lying down on left other',
            8: 'lying down on right coughing',
            9: 'lying down on right hyperventilating',
            10: 'lying down on right normal',
            11: 'lying down on right other',
            12: 'lying down on stomach coughing',
            13: 'lying down on stomach hyperventilating',
            14: 'lying down on stomach normal',
            15: 'lying down on stomach other',
            16: 'sitting/standing coughing',
            17: 'sitting/standing hyperventilating',
            18: 'sitting/standing normal',
            19: 'sitting/standing other'
        }

    case 4:
        result = integer_mapping == {
            0: 'lying down on back coughing',
            1: 'lying down on back hyperventilating',
            2: 'lying down on back normal',
            3: 'lying down on back other',
            4: 'lying down on left coughing',
            5: 'lying down on left hyperventilating',
            6: 'lying down on left normal',
            7: 'lying down on left other',
            8: 'lying down on right coughing',
            9: 'lying down on right hyperventilating',
            10: 'lying down on right normal',
            11: 'lying down on right other',
            12: 'lying down on stomach coughing',
            13: 'lying down on stomach hyperventilating',
            14: 'lying down on stomach normal',
            15: 'lying down on stomach other',
            16: 'sitting/standing coughing',
            17: 'sitting/standing hyperventilating',
            18: 'sitting/standing normal',
            19: 'sitting/standing other'
        }

    case 5:
        result = integer_mapping == {
            0: 'ascending stairs normal',
            1: 'descending stairs normal',
            2: 'lying down on back coughing',
            3: 'lying down on back hyperventilating',
            4: 'lying down on back laughing',
            5: 'lying down on back normal',
            6: 'lying down on back singing',
            7: 'lying down on back talking',
            8: 'lying down on left coughing',
            9: 'lying down on left hyperventilating',
            10: 'lying down on left laughing',
            11: 'lying down on left normal',
            12: 'lying down on left singing',
            13: 'lying down on left talking',
            14: 'lying down on right coughing',
            15: 'lying down on right hyperventilating',
            16: 'lying down on right laughing',
            17: 'lying down on right normal',
            18: 'lying down on right singing',
            19: 'lying down on right talking',
            20: 'lying down on stomach coughing',
            21: 'lying down on stomach hyperventilating',
            22: 'lying down on stomach laughing',
            23: 'lying down on stomach normal',
            24: 'lying down on stomach singing',
            25: 'lying down on stomach talking',
            26: 'miscellaneous movements normal',
            27: 'normal walking normal',
            28: 'running normal',
            29: 'shuffle walking normal',
            30: 'sitting/standing coughing',
            31: 'sitting/standing eating',
            32: 'sitting/standing hyperventilating',
            33: 'sitting/standing laughing',
            34: 'sitting/standing normal',
            35: 'sitting/standing singing',
            36: 'sitting/standing talking'
        }
result

True

In [56]:
import tensorflow as tf
from tensorflow.python.client import device_lib

def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']
get_available_gpus()

['/device:GPU:0']

In [57]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Dropout, BatchNormalization
from tensorflow.keras.models import load_model

# Load the model
model_path = f'model/model_{sensor}_{tag}_task_{task_index}_{window_size}.h5'

if os.path.exists(model_path):
    model = load_model(model_path)
else:
    model = Sequential()

    # First LSTM layer with Dropout
    model.add(LSTM(128, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    # Second LSTM layer with Dropout
    model.add(LSTM(128, return_sequences=True))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    # Third LSTM layer with Dropout
    model.add(LSTM(128))
    model.add(Dropout(0.2))
    model.add(BatchNormalization())

    # Fully connected layers
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.2))

    model.add(Dense(y_train.shape[1], activation='softmax'))

    # Compile the model
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [58]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

Test Loss: 2.998481035232544, Test Accuracy: 0.03143436461687088


In [60]:
if tainable:
    # Train the model
    model.fit(X_train, y_train, epochs=80, batch_size=2048)
    # model.fit(X_train, y_train, epochs=40, batch_size=2048)
    # model.fit(X_train, y_train, epochs=20, batch_size=512)

    # Evaluate the model
    loss, accuracy = model.evaluate(X_test, y_test)
    print(f"Test Loss: {loss}, Test Accuracy: {accuracy}")

Epoch 1/80
Epoch 2/80
Epoch 3/80
Epoch 4/80
Epoch 5/80
Epoch 6/80
Epoch 7/80
Epoch 8/80
Epoch 9/80
Epoch 10/80
Epoch 11/80
Epoch 12/80
Epoch 13/80
Epoch 14/80
Epoch 15/80
Epoch 16/80
Epoch 17/80
Epoch 18/80
Epoch 19/80
Epoch 20/80
Epoch 21/80
Epoch 22/80
Epoch 23/80
Epoch 24/80
Epoch 25/80
Epoch 26/80
Epoch 27/80
Epoch 28/80
Epoch 29/80
Epoch 30/80
Epoch 31/80
Epoch 32/80
Epoch 33/80
Epoch 34/80
Epoch 35/80
Epoch 36/80
Epoch 37/80
Epoch 38/80
Epoch 39/80
Epoch 40/80
Epoch 41/80
Epoch 42/80
Epoch 43/80
Epoch 44/80
Epoch 45/80
Epoch 46/80
Epoch 47/80
Epoch 48/80
Epoch 49/80
Epoch 50/80
Epoch 51/80
Epoch 52/80
Epoch 53/80
Epoch 54/80
Epoch 55/80
Epoch 56/80
Epoch 57/80
Epoch 58/80
Epoch 59/80
Epoch 60/80
Epoch 61/80
Epoch 62/80
Epoch 63/80
Epoch 64/80
Epoch 65/80
Epoch 66/80
Epoch 67/80
Epoch 68/80
Epoch 69/80
Epoch 70/80
Epoch 71/80
Epoch 72/80
Epoch 73/80
Epoch 74/80
Epoch 75/80
Epoch 76/80
Epoch 77/80
Epoch 78/80
Epoch 79/80
Epoch 80/80
Test Loss: 0.128868967294693, Test Accuracy: 0.95

In [61]:
if tainable:
    # Save the entire model to a HDF5 file
    run_model = tf.function(lambda x: model(x))
    # This is important, let's fix the input size.
    concrete_func = run_model.get_concrete_function(
        tf.TensorSpec((1, X_train.shape[1], X_train.shape[2]), model.inputs[0].dtype))

    # model directory.
    model.save(model_path, save_format="tf", signatures=concrete_func)

    model.save('temp', save_format="tf", signatures=concrete_func)
    converter = tf.lite.TFLiteConverter.from_saved_model('temp')

    model_tflite_path = '.'.join([model_path.split('.')[0], 'tflite'])
    tflite_model = converter.convert()
    open(model_tflite_path, "wb").write(tflite_model)

In [63]:
import os
import pandas as pd
import numpy as np
from tensorflow.keras.models import load_model
from scipy import stats
from sklearn.preprocessing import LabelEncoder
from scipy.signal import spectrogram

# Load the model
if os.path.exists(model_path):
    model = load_model(model_path)

# Read and preprocess the new CSV file
# new_csv_file = "/content/drive/MyDrive/Colab Notebooks/pdiot-ml/pdiot-data/test_data/Respeck_s1911593_Sitting_Normal_clean_27-09-2023_14-00-59.csv"
# new_csv_file = "/content/drive/MyDrive/Colab Notebooks/pdiot-ml/pdiot-data/updated_anonymized_dataset_2023/Respeck/s100/s100_respeck_ascending_breathingNormal.csv"
new_csv_file = "/content/drive/MyDrive/Colab Notebooks/pdiot-ml/pdiot-data/updated_anonymized_dataset_2023/Respeck/s100/s100_respeck_sitting_breathingNormal.csv"
# new_csv_file = "/content/drive/MyDrive/Colab Notebooks/pdiot-ml/pdiot-data/updated_anonymized_dataset_2023/Respeck/s100/s100_respeck_sitting_coughing.csv"
# # new_csv_file = ""


match task_index:
    case 4:
        new_df = pd.read_csv(new_csv_file)[['accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y', 'gyro_z']]
    case 5:
        new_df = pd.read_csv(new_csv_file)[['accel_x', 'accel_y', 'accel_z', 'gyro_x', 'gyro_y', 'gyro_z']]
    case _:
        new_df = pd.read_csv(new_csv_file)[['accel_x', 'accel_y', 'accel_z']]


match task_index:
    case 4:
        # norm the gyro data
        for col in ['gyro_x', 'gyro_y', 'gyro_z']:
            # https://pdf1.alldatasheet.com/datasheet-pdf/view/678850/AD/ADXRS300_15.html
            min_val = -300
            max_val = 300

            new_df[col] = (new_df[col] - min_val) / (max_val - min_val)
    case 5:
        # norm the gyro data
        for col in ['gyro_x', 'gyro_y', 'gyro_z']:
            # https://pdf1.alldatasheet.com/datasheet-pdf/view/678850/AD/ADXRS300_15.html
            min_val = -300
            max_val = 300

            new_df[col] = (new_df[col] - min_val) / (max_val - min_val)

# Window the new sequence
window_size = 50  # Define the size of the window
stride = 10  # Define the stride of the window

new_windows = []
new_sequence = new_df.values
for j in range(0, len(new_sequence) - window_size, stride):
    window = new_sequence[j:j + window_size]

    # Compute the spectrogram of the window
    # _, _, Sxx = spectrogram(window, axis=0)

    # new_windows.append(Sxx)
    new_windows.append(window)

new_windows = np.array(new_windows)

# Make predictions
predictions = model.predict(new_windows)
predicted_label_indices = np.argmax(predictions, axis=1)

# Assuming label_encoder is already fitted on the labels during training
predicted_labels = label_encoder.inverse_transform(predicted_label_indices)

# # Choose the most frequent label as the final prediction using NumPy
# unique_labels, counts = np.unique(predicted_labels, return_counts=True)
# final_prediction = unique_labels[np.argmax(counts)]

print(f"The predicted label for the new data is: {predicted_labels}")





The predicted label for the new data is: ['sitting/standing normal' 'sitting/standing normal'
 'sitting/standing normal' 'sitting/standing normal'
 'sitting/standing normal' 'sitting/standing normal'
 'sitting/standing normal' 'sitting/standing normal'
 'sitting/standing normal' 'sitting/standing normal'
 'sitting/standing normal' 'sitting/standing normal'
 'sitting/standing normal' 'sitting/standing normal'
 'sitting/standing normal' 'sitting/standing normal'
 'sitting/standing normal' 'sitting/standing normal'
 'sitting/standing normal' 'sitting/standing normal'
 'sitting/standing normal' 'sitting/standing normal'
 'sitting/standing normal' 'sitting/standing normal'
 'sitting/standing normal' 'sitting/standing normal'
 'sitting/standing normal' 'sitting/standing normal'
 'sitting/standing normal' 'sitting/standing normal'
 'sitting/standing normal' 'sitting/standing normal'
 'sitting/standing normal' 'sitting/standing normal'
 'sitting/standing normal' 'sitting/standing normal'
 'sit