In [22]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten
from sklearn.preprocessing import LabelEncoder, StandardScaler

from scipy import stats
from sklearn.metrics import classification_report

In [2]:
# read datasets
train_df = pd.read_csv('../train_data.csv')
validation_df = pd.read_csv('../validation_data.csv')
test_df = pd.read_csv('../test_data.csv')

In [3]:
# BUILD SLIDING WINDOW
# df - dataframe used
# window_size - size of the sliding window, by default 11s if not mentioned otherwise
# step_size - starting point for the current window given the previous, by default 5
# feature_cols - features to be used in the sliding window
def create_windows(dataset, window_size=11, step_size=5, feature_cols=['ACC_X', 'ACC_Y', 'ACC_Z']):
    X = []
    y = []
    window = []

    for person_id in dataset['PERSON_ID'].unique():
        person_data = dataset[dataset['PERSON_ID'] == person_id]
        feature_values = person_data[feature_cols].values
        activity = person_data['ACTIVITY']

        max_window_end = len(person_data)

        for i in range(0, max_window_end - window_size, step_size):
            window = feature_values[i:i+window_size]
            window_label = activity[i:i+window_size].mode(dropna=False).iloc[0]

            # Ensure the window is of the correct size
            if len(window) != window_size:
                continue  # Skip this window if it's the wrong shape

            X.append(window)
            y.append(window_label)

    print(len(X))

    return np.array(X), np.array(y)

In [7]:
# train_dataset = pd.read_csv('../train_data.csv')
# validation_dataset = pd.read_csv('../validation_data.csv')
# test_dataset = pd.read_csv('../test_data.csv')

# min_count_train = min(train_dataset['ACTIVITY'].value_counts())
# min_count_validation = min(validation_dataset['ACTIVITY'].value_counts())
# min_count_test = min(test_dataset['ACTIVITY'].value_counts())

# balanced_train_data = train_dataset.groupby('ACTIVITY').head(min_count_train).reset_index(drop=True)
# balanced_validation_data = validation_dataset.groupby('ACTIVITY').head(min_count_validation).reset_index(drop=True)
# balanced_test_data = test_dataset.groupby('ACTIVITY').head(min_count_test).reset_index(drop=True)

In [4]:
window_size = 60
step_size = 15

In [5]:
X_train, y_train = create_windows(train_df, window_size=window_size, step_size=step_size)
X_val, y_val = create_windows(validation_df, window_size=window_size, step_size=step_size)
X_test, y_test = create_windows(test_df, window_size=window_size, step_size=step_size)

591698
166621
86205


In [6]:
print(X_train)

[[[ 3.1000000e+01  8.0000000e+00  5.5000000e+01]
  [ 1.6000000e+01 -3.0000000e+00  5.6000000e+01]
  [-5.8000000e+01  1.1000000e+01  2.0000000e+01]
  ...
  [-6.4000000e+01  3.0000000e+00 -8.0000000e+00]
  [-6.4000000e+01  2.0000000e+00 -1.0000000e+01]
  [-6.4000000e+01  2.0000000e+00 -9.0000000e+00]]

 [[-6.3000000e+01  3.0000000e+00 -1.6000000e+01]
  [-6.1000000e+01  4.0000000e+00 -2.0000000e+01]
  [-6.3000000e+01  1.0000000e+00 -1.7000000e+01]
  ...
  [-6.6000000e+01  3.0000000e+00 -6.0000000e+00]
  [-6.5000000e+01  2.0000000e+00 -7.0000000e+00]
  [-6.4000000e+01  3.0000000e+00 -7.0000000e+00]]

 [[-6.3000000e+01  3.0000000e+00 -2.0000000e+01]
  [-6.2000000e+01  0.0000000e+00 -1.9000000e+01]
  [-6.2000000e+01  4.0000000e+00 -2.1000000e+01]
  ...
  [-6.8000000e+01  7.0000000e+00 -2.0000000e+01]
  [ 0.0000000e+00  2.0000000e+00  6.2000000e+01]
  [-5.0000000e+00  8.0000000e+00  6.5000000e+01]]

 ...

 [[ 5.7969257e-02 -1.0859760e-01  1.0003531e+00]
  [ 5.8094885e-02 -9.2813770e-02  1.000

In [7]:
unique_values, counts = np.unique(y_train, return_counts=True)
count_dict = {str(k): int(v) for k, v in zip(unique_values, counts)}
count_dict_sorted = {k: v for k, v in sorted(count_dict.items(), key=lambda item: item[1], reverse=True)}
print(count_dict_sorted)

{'sleep': 311264, 'sitting': 161927, 'household-chores': 28316, 'walking': 27028, 'vehicle': 16050, 'mixed-activity': 15950, 'standing': 14603, 'bicycling': 4730, 'manual-work': 3544, 'sports': 1793, 'writing': 453, 'jogging': 449, 'drinking': 448, 'eating pasta': 443, 'dribbling (basket ball)': 441, 'eating chips': 437, 'eating sandwich': 436, 'brushing teeth': 434, 'clapping': 433, 'kicking (soccer ball)': 433, 'eating soup': 431, 'playing catch (tennis ball)': 431, 'typing': 430, 'stairs': 424, 'folding clothes': 370}


In [8]:
weights = {}

for class_name, class_weight in count_dict.items():
    # weight_for_class_i = (total_number_of_rows_in_dataset) / (number_of_classes * total_number_of_rows_with_class_i)
    weight_value = (np.size(y_train) / (len(count_dict) * class_weight))
    # initialize weights dictionary in the format class_new_weight: weight_value 
    weights[class_name] = weight_value

print(weights)

{'bicycling': 5.003788583509514, 'brushing teeth': 54.53437788018433, 'clapping': 54.660323325635105, 'dribbling (basket ball)': 53.66875283446712, 'drinking': 52.83017857142857, 'eating chips': 54.16, 'eating pasta': 53.42645598194131, 'eating sandwich': 54.28422018348624, 'eating soup': 54.91396751740139, 'folding clothes': 63.967351351351354, 'household-chores': 0.835849696284786, 'jogging': 52.71251670378619, 'kicking (soccer ball)': 54.660323325635105, 'manual-work': 6.678306997742664, 'mixed-activity': 1.4838821316614421, 'playing catch (tennis ball)': 54.91396751740139, 'sitting': 0.14616413569077424, 'sleep': 0.07603808985298653, 'sports': 13.200178471834914, 'stairs': 55.82056603773585, 'standing': 1.6207573786208314, 'typing': 55.04167441860465, 'vehicle': 1.4746367601246106, 'walking': 0.8756815154654433, 'writing': 52.247064017660044}


In [9]:
# NORMALIZE DATA FOR THIS MODEL
scaler = StandardScaler()

n_samples = X_train.shape[0]
n_timesteps = X_train.shape[1]
n_features = X_train.shape[2]

In [10]:
X_train_flat = X_train.reshape(-1, X_train.shape[-1])  # Flatten each window into a 1D array
X_val_flat = X_val.reshape(-1, X_val.shape[-1])
X_test_flat = X_test.reshape(-1, X_test.shape[-1])

In [11]:
print(X_train_flat)

[[ 3.1000000e+01  8.0000000e+00  5.5000000e+01]
 [ 1.6000000e+01 -3.0000000e+00  5.6000000e+01]
 [-5.8000000e+01  1.1000000e+01  2.0000000e+01]
 ...
 [ 5.7969257e-02 -9.2892240e-02  1.0003531e+00]
 [ 5.7969257e-02 -9.2892240e-02  9.8446584e-01]
 [ 5.7969257e-02 -9.2892240e-02  1.0003531e+00]]


In [12]:
X_train_scaled = scaler.fit_transform(X_train_flat).reshape(n_samples, n_timesteps, n_features)
X_val_scaled = scaler.transform(X_val_flat).reshape(X_val.shape[0], n_timesteps, n_features)
X_test_scaled = scaler.transform(X_test_flat).reshape(X_test.shape[0], n_timesteps, n_features)

In [13]:
print(X_test_scaled)

[[[-3.141615    1.21467173 -0.73317939]
  [-2.34705198  1.21467173 -0.47060262]
  [-2.98270239  0.97440868 -1.83600183]
  ...
  [-2.61190632  0.55394835 -0.20802585]
  [-3.24755673  0.67407987 -0.57563333]
  [-3.3005276   0.61401411 -0.99575616]]

 [[-2.66487718  1.63513206 -1.67845577]
  [-2.87676066  1.51500054 -1.83600183]
  [-2.55893545  1.5750663  -1.99354789]
  ...
  [-3.77726541  0.67407987 -2.0985786 ]
  [-2.61190632 -0.88762993  2.05013439]
  [-2.98270239 -0.4671696  -0.57563333]]

 [[-3.56538194  1.87539511 -0.57563333]
  [-2.71784805  2.11565815 -0.99575616]
  [-3.141615    1.69519782 -1.10078687]
  ...
  [-2.77081892 -0.10677503  0.94731195]
  [-3.19458586  0.43381683  1.05234266]
  [-3.35349847  0.01335649 -0.2605412 ]]

 ...

 [[ 0.21018153  0.04467495 -0.26907997]
  [ 0.21018153  0.04467495 -0.26907997]
  [ 0.21018153  0.04467495 -0.26907997]
  ...
  [ 0.21103031  0.04559894 -0.26903131]
  [ 0.21103031  0.04465458 -0.26903131]
  [ 0.21103031  0.04465458 -0.26903131]]

 [

In [14]:
# ENCODE LABELS
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_val_enc = le.transform(y_val)
y_test_enc = le.transform(y_test)

# Convert labels to one-hot encoding
y_train_cat = to_categorical(y_train_enc)
y_val_cat = to_categorical(y_val_enc)
y_test_cat = to_categorical(y_test_enc)

num_classes = y_train_cat.shape[1]  # Number of unique classes

In [15]:
print(y_test_cat)

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [None]:
# # build model
# # Build the LSTM model
# model = Sequential([
#     LSTM(128, input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2]), return_sequences=False),
#     Dropout(0.5),
#     Dense(64, activation='relu'),
#     Dense(num_classes, activation='softmax')
# ])

I0000 00:00:1748546320.123043   15947 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2253 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1650 Ti, pci bus id: 0000:01:00.0, compute capability: 7.5
  super().__init__(**kwargs)


In [None]:
# model = Sequential([
#     LSTM(128, input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2]), return_sequences=True),
#     Dropout(0.3),
#     LSTM(64, return_sequences=False), # Returns only the last output
#     Dropout(0.5),
#     Dense(64, activation='relu'),
#     Dense(num_classes, activation='softmax')
# ])

I0000 00:00:1749149151.771259   59728 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 2222 MB memory:  -> device: 0, name: NVIDIA GeForce GTX 1650 Ti, pci bus id: 0000:01:00.0, compute capability: 7.5
  super().__init__(**kwargs)


In [23]:
model = Sequential([
    Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2])),
    Conv1D(filters=64, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Dropout(0.3),
    LSTM(128, return_sequences=False),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(num_classes, activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [24]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True)

In [25]:
# compile model
model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [26]:
# train model
history = model.fit(
    X_train_scaled, y_train_cat,
    validation_data=(X_val_scaled, y_val_cat),
    epochs=100,
    batch_size=64,
    verbose=1,
    callbacks=[early_stopping]
    # class_weight=weights
)

Epoch 1/100
[1m9246/9246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 14ms/step - accuracy: 0.6523 - loss: 0.9492 - val_accuracy: 0.7340 - val_loss: 0.8044
Epoch 2/100
[1m9246/9246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 14ms/step - accuracy: 0.7712 - loss: 0.6686 - val_accuracy: 0.7610 - val_loss: 0.7372
Epoch 3/100
[1m9246/9246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 14ms/step - accuracy: 0.7900 - loss: 0.6190 - val_accuracy: 0.7621 - val_loss: 0.7477
Epoch 4/100
[1m9246/9246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m130s[0m 14ms/step - accuracy: 0.7986 - loss: 0.5976 - val_accuracy: 0.7690 - val_loss: 0.7305
Epoch 5/100
[1m9246/9246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 14ms/step - accuracy: 0.8036 - loss: 0.5841 - val_accuracy: 0.7676 - val_loss: 0.7499
Epoch 6/100
[1m9246/9246[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m131s[0m 14ms/step - accuracy: 0.8057 - loss: 0.5752 - val_accuracy: 0.7709 - val_loss:

In [27]:
# evaluate
test_loss, test_acc = model.evaluate(X_test_scaled, y_test_cat, verbose=0)
print(f"Test accuracy: {test_acc:.4f} | Test loss: {test_loss:.4f}")

Test accuracy: 0.7607 | Test loss: 0.7300


In [28]:
y_pred = model.predict(X_test_scaled)
y_pred_labels = le.inverse_transform(np.argmax(y_pred, axis=1))
y_true_labels = le.inverse_transform(np.argmax(y_test_cat, axis=1))

print(classification_report(y_true_labels, y_pred_labels))

[1m2694/2694[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 3ms/step
                             precision    recall  f1-score   support

                  bicycling       0.26      0.38      0.31       713
             brushing teeth       0.61      0.81      0.69       434
                   clapping       0.85      0.75      0.80       433
    dribbling (basket ball)       0.97      0.20      0.33       441
                   drinking       0.44      0.54      0.49       448
               eating chips       0.72      0.20      0.32       437
               eating pasta       0.35      0.53      0.43       443
            eating sandwich       0.31      0.25      0.28       436
                eating soup       0.46      0.49      0.48       431
            folding clothes       0.57      0.89      0.70       370
           household-chores       0.51      0.59      0.54      5596
                    jogging       0.95      0.89      0.92       449
      kicking (soccer bal