https://github.com/dbouchabou/Fully-Convolutional-Network-Smart-Homes/tree/master

In [None]:
from google.colab import drive
drive.mount('/content/drive' , force_remount = True)

Mounted at /content/drive


In [None]:
import os
import numpy as np
import pandas as pd
import pickle
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

In [None]:
cd "/content/drive/MyDrive/PAMAP2_Dataset"

In [None]:
def generate_three_IMU(name):
    x = name +'_x'
    y = name +'_y'
    z = name +'_z'
    return [x,y,z]

def generate_four_IMU(name):
    x = name +'_x'
    y = name +'_y'
    z = name +'_z'
    w = name +'_w'
    return [x,y,z,w]

def generate_cols_IMU(name):
    # temp
    temp = name+'_temperature'
    output = [temp]
    # acceleration 16
    acceleration16 = name+'_3D_acceleration_16'
    acceleration16 = generate_three_IMU(acceleration16)
    output.extend(acceleration16)
    # acceleration 6
    acceleration6 = name+'_3D_acceleration_6'
    acceleration6 = generate_three_IMU(acceleration6)
    output.extend(acceleration6)
    # gyroscope
    gyroscope = name+'_3D_gyroscope'
    gyroscope = generate_three_IMU(gyroscope)
    output.extend(gyroscope)
    # magnometer
    magnometer = name+'_3D_magnetometer'
    magnometer = generate_three_IMU(magnometer)
    output.extend(magnometer)
    # oreintation
    oreintation = name+'_4D_orientation'
    oreintation = generate_four_IMU(oreintation)
    output.extend(oreintation)
    return output

def load_IMU():
    output = ['time_stamp','activity_id', 'heart_rate']
    hand = 'hand'
    hand = generate_cols_IMU(hand)
    output.extend(hand)
    chest = 'chest'
    chest = generate_cols_IMU(chest)
    output.extend(chest)
    ankle = 'ankle'
    ankle = generate_cols_IMU(ankle)
    output.extend(ankle)
    return output

def load_subjects(root='./Protocol/subject'):
    output = pd.DataFrame()
    cols = load_IMU()

    for i in range(101,110): #110
        path = root + str(i) +'.dat'
        subject = pd.read_table(path, header=None, sep='\s+')
        subject.columns = cols
        subject['id'] = i
        output = pd.concat([output, subject], ignore_index=True)
    output.reset_index(drop=True, inplace=True)
    return output

data = load_subjects()

In [None]:
data = data.drop(['hand_4D_orientation_x', 'hand_4D_orientation_y',
                  'hand_4D_orientation_z', 'hand_4D_orientation_w',
                'chest_4D_orientation_x', 'chest_4D_orientation_y',
                  'chest_4D_orientation_z', 'chest_4D_orientation_w',
                'ankle_4D_orientation_x', 'ankle_4D_orientation_y',
                  'ankle_4D_orientation_z', 'ankle_4D_orientation_w'],
        axis = 1)

def fix_data(data):
    data = data.drop(data[data['activity_id']==0].index)
    data = data.interpolate()
    for colName in data.columns:
        data[colName] = data[colName].fillna(data[colName].mean())
    activity_mean = data.groupby(['activity_id']).mean().reset_index()
    return data

In [None]:
data = fix_data(data)

In [None]:
print('Df shape', data.shape)
print('No of subjects', len(data.groupby(['id']).mean().reset_index()))
print('No of total activities',len(data.groupby(['activity_id']).mean().reset_index()))

In [None]:
data.to_pickle('pamap2_for_lstm.pkl')

In [None]:
data = pd.read_pickle('pamap2_for_lstm.pkl')

In [None]:
activities = data.activity_id.unique()
activities.sort()

filename = "pamap2_fnc_lstm.pickle"

dictActivities = {}
for i, activity in enumerate(activities):
	dictActivities[activity] = i

pickle_out = open(filename,"wb")
pickle.dump(dictActivities, pickle_out)
pickle_out.close()

In [None]:
def segment_activities(df):
    activitiesSeq = []

    ponentialIndex = df.activity_id.ne(df.activity_id.shift())

    ii = np.where(ponentialIndex == True)[0]

    for i,end in enumerate(ii):
        if i > 0 :

          dftmp = df[ii[i-1]:end]
          activitiesSeq.append(dftmp)
    return activitiesSeq

In [None]:
activitySequences = segment_activities(data)

In [None]:
activitySequences

[       time_stamp  activity_id  heart_rate  hand_temperature  \
 2928        37.66            1  107.487925           30.3750   
 2929        37.67            1  107.487925           30.3750   
 2930        37.68            1  107.487925           30.3750   
 2931        37.69            1  107.487925           30.3750   
 2932        37.70            1  100.000000           30.3750   
 ...           ...          ...         ...               ...   
 30110      309.48            1   90.000000           31.9375   
 30111      309.49            1   90.000000           31.9375   
 30112      309.50            1   90.000000           31.9375   
 30113      309.51            1   90.000000           31.9375   
 30114      309.52            1   90.000000           31.9375   
 
        hand_3D_acceleration_16_x  hand_3D_acceleration_16_y  \
 2928                     2.21530                    8.27915   
 2929                     2.29196                    7.67288   
 2930                     

#### original

In [None]:
def generate_sentence(df2):

    sentence = ""

    val = ""

    #extract sensors list
    sensors = df2.sensor.values

    values = df2.value.values

    #iterate on sensors list
    for i in range(len(sensors)):

        val = values[i]

        if i == len(sensors) - 1:
            sentence += "{}{}".format(sensors[i],val)
        else:
            sentence += "{}{} ".format(sensors[i],val)

    return sentence

#### with my columns

In [None]:
def generate_sentence(df2):
    sentence = ""

    # Define the column names you want to extract values from
    columns_of_interest = ['heart_rate', 'hand_temperature', 'hand_3D_acceleration_16_x', 'hand_3D_acceleration_16_y',
                           'hand_3D_acceleration_16_z', 'hand_3D_acceleration_6_x', 'hand_3D_acceleration_6_y',
                           'hand_3D_acceleration_6_z', 'hand_3D_gyroscope_x', 'hand_3D_gyroscope_y',
                           'hand_3D_gyroscope_z', 'hand_3D_magnetometer_x', 'hand_3D_magnetometer_y',
                           'hand_3D_magnetometer_z', 'chest_temperature', 'chest_3D_acceleration_16_x',
                           'chest_3D_acceleration_16_y', 'chest_3D_acceleration_16_z', 'chest_3D_acceleration_6_x',
                           'chest_3D_acceleration_6_y', 'chest_3D_acceleration_6_z', 'chest_3D_gyroscope_x',
                           'chest_3D_gyroscope_y', 'chest_3D_gyroscope_z', 'chest_3D_magnetometer_x',
                           'chest_3D_magnetometer_y', 'chest_3D_magnetometer_z', 'ankle_temperature',
                           'ankle_3D_acceleration_16_x', 'ankle_3D_acceleration_16_y', 'ankle_3D_acceleration_16_z',
                           'ankle_3D_acceleration_6_x', 'ankle_3D_acceleration_6_y', 'ankle_3D_acceleration_6_z',
                           'ankle_3D_gyroscope_x', 'ankle_3D_gyroscope_y', 'ankle_3D_gyroscope_z',
                           'ankle_3D_magnetometer_x', 'ankle_3D_magnetometer_y', 'ankle_3D_magnetometer_z']

    # Iterate over the columns of interest
    for column in columns_of_interest:
        # Get the value for the current column
        value = df2[column].values[0]  # Directly access the single value in the column

        # Add column name and value to the sentence
        sentence += "{}{}".format(column, value)

        # Add a space if it's not the last column
        if column != columns_of_interest[-1]:
            sentence += " "

    return sentence

In [None]:
def sequencesToSentences(activitySequences):
	sentences = []
	label_sentences = []

	for i in range(len(activitySequences)):

		sentence = generate_sentence(activitySequences[i])

		sentences.append(sentence)
		label_sentences.append(activitySequences[i].activity_id.values[0])

	return sentences, label_sentences

In [None]:
sentences, label_sentences = sequencesToSentences(activitySequences)

In [None]:
len(sentences)

106

sentences indexization

In [None]:
tokenizer = Tokenizer(filters='!"#$%&()*+,-/:;<=>?@[\\]^_`{|}~\t\n')
tokenizer.fit_on_texts(sentences)
word_index = tokenizer.word_index
indexed_sentences = tokenizer.texts_to_sequences(sentences)

In [None]:
len(tokenizer.word_index)

4038

In [None]:
len(indexed_sentences)

106

sliding windows

In [None]:
def slidingWindow(sequence,winSize,step=1):

    try: it = iter(sequence)
    except TypeError:
        raise Exception("**ERROR** sequence must be iterable.")
    if not ((type(winSize) == type(0)) and (type(step) == type(0))):
        raise Exception("**ERROR** type(winSize) and type(step) must be int.")
    if step > winSize:
        raise Exception("**ERROR** step must not be larger than winSize.")

    numOfChunks = int(((len(sequence)-winSize)/step)+1)

    # Do the work
    if winSize > len(sequence):
        yield sequence[0:len(sequence)]
    else:
        for i in range(0,numOfChunks*step,step):
            yield sequence[i:i+winSize]

In [None]:
X_windowed = []
Y_windowed = []
winSize = 50
step = 1

for i,s in enumerate(indexed_sentences):
	chunks = slidingWindow(s,winSize,step)
	for chunk in chunks:
		X_windowed.append(chunk)
		Y_windowed.append(label_sentences[i])

In [None]:
padded_windows = pad_sequences(X_windowed)

In [None]:
Y_windowed = np.array(Y_windowed)

## Save files ##
print("STEP 8: save sliding windows and labels")
np.save("{}_{}_padded_x.npy".format("lstm",winSize), padded_windows)
np.save("{}_{}_padded_y.npy".format("lstm",winSize), Y_windowed)

STEP 8: save sliding windows and labels


In [None]:
padded_windows.shape

(14593, 50)

In [None]:
padded_windows[0].shape

(50,)

### main

In [None]:
padded_x = np.load("{}_{}_padded_x.npy".format("lstm", winSize))

# Load padded y array
padded_y = np.load("{}_{}_padded_y.npy".format("lstm", winSize))

In [None]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(padded_x, padded_y, test_size=(1 - 0.8), random_state=42)

# Now split the test set further into test and validation sets
x_test, x_val, y_test, y_val = train_test_split(x_test, y_test, test_size=0.1 / (0.1 + 0.1), random_state=42)

In [None]:
y_train.shape

(11674,)

#### FCN embedded

In [None]:
pip install tensorflow

In [None]:
class LSTMModel(tf.keras.Model):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(LSTMModel, self).__init__()
        self.embedding = tf.keras.layers.Embedding(vocab_size, embedding_dim)
        self.lstm = tf.keras.layers.LSTM(hidden_dim)
        self.fc = tf.keras.layers.Dense(output_dim, activation='softmax')

    def call(self, inputs):
        embedded = self.embedding(inputs)
        lstm_out = self.lstm(embedded)
        output = self.fc(lstm_out)
        return output

In [None]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dropout,Activation, Input, Embedding, LSTM, Dense, Conv1D, BatchNormalization, GlobalAveragePooling1D

def modelFCNEmbedded(trainx,trainy,vocab_size):

    nb_classes = trainy.shape[0]

    n_timesteps = trainx.shape[0]


    input_layer = Input(shape=((n_timesteps,)))

    embedding = Embedding(input_dim = vocab_size+1, output_dim = 64, input_length=n_timesteps, mask_zero=True) (input_layer)

    conv1 = Conv1D(filters=128, kernel_size=8, padding='same')(embedding)
    conv1 = BatchNormalization()(conv1)
    conv1 = Activation(activation='relu')(conv1)

    conv2 = Conv1D(filters=256, kernel_size=5, padding='same')(conv1)
    conv2 = BatchNormalization()(conv2)
    conv2 = Activation('relu')(conv2)

    conv3 = Conv1D(128, kernel_size=3,padding='same')(conv2)
    conv3 = BatchNormalization()(conv3)
    conv3 = Activation('relu')(conv3)

    gap_layer = GlobalAveragePooling1D()(conv3)

    x = Dropout(0.5)(gap_layer)

    output_layer = Dense(nb_classes, activation='softmax')(x)

    model = Model(inputs=input_layer, outputs=output_layer, name="FCN_Embedded")


In [None]:
flat_x_train = [token for sublist in x_train for token in sublist]
vocab_size = len(set(flat_x_train))
embedding_dim = 100
hidden_dim = 128
output_dim = 25

In [None]:
model = LSTMModel(vocab_size+22, embedding_dim, hidden_dim, output_dim)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.fit(x_train, y_train, epochs=3, batch_size=20, validation_data=(x_val, y_val))

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.src.callbacks.History at 0x7c1ac5251e70>

#### predict

In [None]:
predictions = model.predict(x_test)



In [None]:
predictions.shape

(1459, 25)

In [None]:
import numpy as np

class_predictions = np.argmax(predictions, axis=1)

In [None]:
class_predictions

array([16,  6,  3, ...,  2, 24,  6])

In [None]:
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, class_predictions)
print("Accuracy:", accuracy)

Accuracy: 0.9993145990404386
