In [20]:
GYRO = False
SEQUENCE_LENGTH = 6
SEQUENCE_OVERLAP = 5
BATCH_SIZE = 10
EPOCHS = 50
MODEL_NAME = f"physical_epochs:{EPOCHS}_batch:{BATCH_SIZE}_gyro:{GYRO}_window:{SEQUENCE_LENGTH}_overlap:{SEQUENCE_OVERLAP}"

TRAINING_SIZE = 0.8
DEV_SIZE = 0.1
TEST_SIZE = 0.1

In [21]:
import file_tagger
import numpy as np
import sequence_genrator
from keras import layers, Sequential
from sklearn.model_selection import train_test_split


In [22]:
DATA_DIRECTORY = "./all_respeck"

SITTING_OR_STANDING = {
    "sitting&normal_breathing",
    "sitting&hyperventilating",
    "sitting&coughing",
    "sitting&laughing",
    "sitting&singing",
    "sitting&talking",
    "sitting&eating",
    "standing&normal_breathing",
    "standing&hyperventilating",
    "standing&coughing",
    "standing&laughing",
    "standing&singing",
    "standing&talking",
    "standing&eating",
}


BACK = {
    "lying_down_back&normal_breathing",
    "lying_down_back&hyperventilating",
    "lying_down_back&coughing",
    "lying_down_back&laughing",
    "lying_down_back&singing",
    "lying_down_back&talking",
}


STOMACH = {
    "lying_down_stomach&normal_breathing",
    "lying_down_stomach&hyperventilating",
    "lying_down_stomach&coughing",
    "lying_down_stomach&laughing",
    "lying_down_stomach&singing",
    "lying_down_stomach&talking",
}


LEFT = {
    "lying_down_left&normal_breathing",
    "lying_down_left&hyperventilating",
    "lying_down_left&coughing",
    "lying_down_left&laughing",
    "lying_down_left&singing",
    "lying_down_left&talking",
}


RIGHT = {
    "lying_down_right&normal_breathing",
    "lying_down_right&hyperventilating",
    "lying_down_right&coughing",
    "lying_down_right&laughing",
    "lying_down_right&singing",
    "lying_down_right&talking",
}

WALKING = {"walking&normal_breathing"}

RUNNING = {"running&normal_breathing"}

ASCENDING = {"ascending_stairs&normal_breathing"}

DESCENDING = {"descending_stairs&normal_breathing"}

SHUFFLE = {"shuffle_walking&normal_breathing"}

MISC = {"misc_movements&normal_breathing"}

In [23]:
CLASS_SETS = [SITTING_OR_STANDING, BACK, STOMACH, LEFT, RIGHT, WALKING, RUNNING, DESCENDING, ASCENDING, SHUFFLE, MISC]
UNIQUE_LABELS = [i for i,_ in enumerate(CLASS_SETS)]

def get_label(activity):
    for i,c in enumerate(CLASS_SETS):
        if activity in c:
            return i

In [24]:
def generateTrainingData(directory, sequenceLength, overlap):
    tagged_data = []

    csv_dictionary = file_tagger.tag_directory(directory)

    for recordingType in csv_dictionary:
        for csv_file in csv_dictionary[recordingType]:
            sequences = sequence_genrator.generate_sequences_from_file_without_gyroscope(directory + "/" + csv_file, sequenceLength, overlap)
            for sequence in sequences:
                tagged_data.append((get_label(recordingType), sequence))
            

    print ("there are " + str(len(tagged_data)) + " tagged sequences in the dataset")    
    return tagged_data

In [25]:
# split data into training, dev, and test sets
def train_dev_test_split(data, labels, dev_size, test_size, random_state=42):
    # Split the data into training and temporary (dev + test) sets
    train_data, temp_data, train_labels, temp_labels = train_test_split(data, labels, test_size=(dev_size + test_size), random_state=random_state)
    
    # Split the temporary data into dev and test sets
    dev_data, test_data, dev_labels, test_labels = train_test_split(temp_data, temp_labels, 
                                                                 test_size=(test_size / (dev_size + test_size)), random_state=random_state)
    
    return train_data, dev_data, test_data, train_labels, dev_labels, test_labels

In [26]:
def train_model_CNN(input_data, labels, unique_labels, epochs, batch_size, validation_data):
    if GYRO:
        width = 6
    else:
        width = 3
    
    # Define the CNN model for your specific input shape
    model = Sequential([
        layers.Conv1D(32, 3, activation='relu', input_shape=(25*SEQUENCE_LENGTH, width)),
        layers.MaxPooling1D(2),
        layers.Conv1D(64, 3, activation='relu'),
        layers.Conv1D(64, 3, activation='relu'),
        layers.MaxPooling1D(2),
        layers.Conv1D(128, 3, activation='relu'),
        layers.MaxPooling1D(2),
        layers.Dropout(0.5),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dense(64, activation='relu'),
        layers.Dense(len(unique_labels), activation='softmax')
    ])

    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    # Train the CNN model
    model.fit(input_data, labels, epochs=epochs, batch_size=batch_size, validation_data=validation_data)


    return model

In [27]:
tagged_sequences = generateTrainingData(DATA_DIRECTORY, SEQUENCE_LENGTH, SEQUENCE_OVERLAP)

# Get lists of training data and labels
sequences = [sequence for _, sequence in tagged_sequences]
labels = [label for label, _ in tagged_sequences]


# Convert to numpy arrays
sequences = np.array(sequences, dtype=np.float32)
labels = np.array(labels)

train_data, dev_data, test_data, train_labels, dev_labels, test_labels = train_dev_test_split(sequences, labels, dev_size=DEV_SIZE, test_size=TEST_SIZE) 


model = train_model_CNN(train_data, train_labels, UNIQUE_LABELS, epochs=EPOCHS, batch_size=BATCH_SIZE, validation_data=(dev_data, dev_labels))

test_loss, test_accuracy = model.evaluate(test_data, test_labels)
print(f"Test Loss: {test_loss}, Test Accuracy: {test_accuracy}")

# Save the trained model
model.save(f"models/Task1/{MODEL_NAME}_{test_accuracy}.keras")

there are 77276 tagged sequences in the dataset
Epoch 1/50