In [1]:
from preprocessing import *
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm import tqdm
import tensorflow as tf
from sklearn.model_selection import train_test_split

2024-04-06 11:34:04.472793: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


parent_dir = './Data/In-lab/'
paths = sorted(list(os.listdir(parent_dir)))

data = []
interval_duration = 1
vec_size = int(15_000 * interval_duration)

for path in tqdm(paths):
    temp = get_data(parent_dir+path, interval_duration=interval_duration)
    dataX = temp[:, :vec_size].astype(float)    
    remaining = temp[:, vec_size:]
    # # z score normalization
    # dataX -= dataX.mean()
    # dataX /= np.std(dataX)
    # 0-1 normalization
    dataX -= dataX.min()
    dataX /= dataX.max()
    temp = np.concatenate([dataX, remaining], axis=1)
    data.append(temp)


data = np.concatenate(data, axis=0)


def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]


dataX, dataY = data[:,:vec_size].astype(float), data[:,-1].astype(float)
trainX, valX, trainY, valY = train_test_split(dataX, dataY)

In [2]:
parent_dir = './Data/In-lab/'
paths = sorted(list(os.listdir(parent_dir)))
paths = np.array(paths)
np.random.shuffle(paths)
training_paths = paths[:-4]
validation_paths = paths[-4:]
interval_duration = 0.25


def unison_shuffled_copies(a, b):
    assert len(a) == len(b)
    p = np.random.permutation(len(a))
    return a[p], b[p]


def return_dataset(paths):
    data = []
    activities_list = []
    labels_list = []
    ema_list = []
    for path in tqdm(paths):
        dataX, ema, labels, activities = get_data_activity_chunks(parent_dir+path, sampling=5)
        data.extend(dataX)
        ema_list.append(ema)
        labels_list.append(labels)
        activities_list.append(activities)

    return data, np.concatenate(ema_list, axis=0), np.concatenate(labels_list, axis=0), np.concatenate(activities_list, axis=0) 


trainX, trainEMA, trainY, trainActivities = return_dataset(training_paths)
valX, valEMA, valY, valActivities = return_dataset(validation_paths)

100%|██████████| 12/12 [00:04<00:00,  2.76it/s]
100%|██████████| 4/4 [00:01<00:00,  2.81it/s]


In [3]:
print(len(trainX))
print(len(trainX[0]))
print(len(trainX[1]))
print(len(trainX[10]))


180
14976
14975
11978


In [4]:
len(trainX), trainX[0].shape, trainY.shape, trainEMA.shape, trainActivities.shape

(180, (14976,), (180,), (180, 14), (180,))

plt.figure(figsize=(20, 6))
plt.plot(np.arange(trainX[0, :1000].size), trainX[0, :1000])
plt.show()

In [5]:
from model import get_model
import tensorflow.keras as keras


In [6]:
model = get_model(input_size=None)

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-2),
              loss=keras.losses.BinaryCrossentropy(),
              metrics=['accuracy'],
              )

model.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, None, 1)]    0           []                               
                                                                                                  
 conv1d (Conv1D)                (None, None, 4)      16          ['input_1[0][0]']                
                                                                                                  
 conv1d_1 (Conv1D)              (None, None, 8)      104         ['conv1d[0][0]']                 
                                                                                                  
 conv1d_2 (Conv1D)              (None, None, 16)     400         ['conv1d_1[0][0]']               
                                                                                              

2024-04-06 11:34:11.607437: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


                                                                                                  
 dropout (Dropout)              (None, 24)           0           ['dense[0][0]']                  
                                                                                                  
 dense_1 (Dense)                (None, 4)            100         ['dropout[0][0]']                
                                                                                                  
 dense_2 (Dense)                (None, 1)            5           ['dense_1[0][0]']                
                                                                                                  
Total params: 39,273
Trainable params: 39,273
Non-trainable params: 0
__________________________________________________________________________________________________


In [7]:
from sklearn.utils import class_weight

weights = class_weight.compute_class_weight('balanced',
                                            classes=np.unique(trainY),
                                            y=trainY)
weights = {
    0: weights[0],
    1: weights[1]
}


In [8]:
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5,
                              patience=5, min_lr=1e-7, min_delta=1e-4)

# history = model.fit(trainX,
#                     trainY,
#                     validation_data=(valX, valY),
#                     verbose=1,
#                     epochs=100,
#                     class_weight=weights,
#                     callbacks=[reduce_lr],
#                     batch_size=1)

In [9]:
trainX[1].shape

(14975,)

In [10]:
print(trainY.sum(), trainY.shape, trainY.sum()/trainY.size)

108 (180,) 0.6


In [None]:
num_epochs = 100

for epoch in tqdm(range(num_epochs)):
    print(f'Epoch: {epoch+1}')
    overall_loss, overall_acc = [], []
    for i in range(len(trainX)):
        dX, dY = trainX[i].reshape((1,) + trainX[i].shape + (1,)), trainY[i:i+1]
        loss, acc = model.train_on_batch(dX, dY, class_weight=weights)
        overall_acc.append(acc)
        overall_loss.append(loss)
    print(f'accuracy: {np.mean(overall_acc)}, loss: {np.mean(overall_loss)}')
    
    val_overall_loss, val_overall_acc = [], []    
    for i in range(len(valX)):
        dX, dY = valX[i].reshape((1,) + valX[i].shape + (1,)), valY[i:i+1]
        loss, acc = model.test_on_batch(dX, dY)
        val_overall_acc.append(acc)
        val_overall_loss.append(loss)
    print(f'val accuracy: {np.mean(val_overall_acc)}, val loss: {np.mean(val_overall_loss)}')
    

  0%|          | 0/100 [00:00<?, ?it/s]

Epoch: 1
