In [1]:
import pandas as pd
import numpy as np

In [2]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.callbacks import EarlyStopping

from scipy import stats

2025-05-29 17:56:46.478041: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748530606.495295   28321 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748530606.500965   28321 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1748530606.514869   28321 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1748530606.514891   28321 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1748530606.514892   28321 computation_placer.cc:177] computation placer alr

In [3]:
from collections import Counter

In [4]:
train_df = pd.read_csv('./train_data_without_dreamt.csv')
val_df = pd.read_csv('./validation_data_without_dreamt.csv')
test_df = pd.read_csv('./test_data_without_dreamt.csv')

In [5]:
# BUILD SLIDING WINDOW
# df - dataframe used
# window_size - size of the sliding window, by default 11s if not mentioned otherwise
# step_size - starting point for the current window given the previous, by default 5
# feature_cols - features to be used in the sliding window
def create_windows(dataset, window_size=11, step_size=5, feature_cols=['ACC_X', 'ACC_Y', 'ACC_Z']):
    X = []
    y = []
    window = []

    for person_id in dataset['PERSON_ID'].unique():
        person_data = dataset[dataset['PERSON_ID'] == person_id]
        feature_values = person_data[feature_cols].values
        activity = person_data['ACTIVITY']

        max_window_end = len(person_data)

        for i in range(0, max_window_end - window_size, step_size):
            window = feature_values[i:i+window_size]
            window_label = activity[i:i+window_size].mode(dropna=False).iloc[0]

            # Ensure the window is of the correct size
            if len(window) != window_size:
                continue  # Skip this window if it's the wrong shape

            X.append(window)
            y.append(window_label)

    print(len(X))

    return np.array(X), np.array(y)

In [6]:
window_size = 60
step_size = 15

In [7]:
X_train, y_train = create_windows(train_df, window_size, step_size)
X_val, y_val = create_windows(val_df, window_size, step_size)
X_test, y_test = create_windows(test_df, window_size, step_size)

442815
124699
65346


In [8]:
def print_window_distribution(y_labels):
    class_counts = Counter(y_labels)
    sorted_counts = sorted(class_counts.items(), key=lambda x: x[1], reverse=True)

    print("Window count per class (descending):")
    for label, count in sorted_counts:
        print(f"{label:20} {count}")

In [9]:
print_window_distribution(y_train)

Window count per class (descending):
sleep                162381
sitting              161927
household-chores     28316
walking              27028
vehicle              16050
mixed-activity       15950
standing             14603
bicycling            4730
manual-work          3544
sports               1793
writing              453
jogging              449
drinking             448
eating pasta         443
dribbling (basket ball) 441
eating chips         437
eating sandwich      436
brushing teeth       434
kicking (soccer ball) 433
clapping             433
eating soup          431
playing catch (tennis ball) 431
typing               430
stairs               424
folding clothes      370


In [29]:
sample_weights = np.full(len(X_train), 1) # initialize weights array

In [30]:
def downsample_upweight_majority_class(X_train, y_train, downsample_factor, majority_class, sample_weights):
    np.random.seed(42)

    majority_class_indices = np.where(y_train == majority_class)[0]

    X_majority_class = X_train[majority_class_indices]
    y_majority_class = y_train[majority_class_indices]

    print(f"total number of rows on X for majority class {majority_class}: {len(X_majority_class)}")
    print(f"total number of rows on y for majority class {majority_class}: {len(y_majority_class)}")

    number_of_majority_samples = len(X_majority_class)
    number_of_samples_to_extract = number_of_majority_samples // downsample_factor

    random_chosen_indices = np.random.choice(number_of_majority_samples, number_of_samples_to_extract, replace=False)

    # downsampled_X = X_majority_class[random_chosen_indices]
    # downsampled_y = y_majority_class[random_chosen_indices]

    new_X_train = []
    new_y_train = []
    new_sample_weights = []

    selected_majority_indices = majority_class_indices[random_chosen_indices]

    for index in range(0, len(X_train)):
        if index in selected_majority_indices:
            new_X_train.append(X_train[index])
            new_sample_weights.append(sample_weights[index] * downsample_factor)
            new_y_train.append(y_train[index])
        elif index in majority_class_indices:
            continue
        else:
            new_X_train.append(X_train[index])
            new_sample_weights.append(sample_weights[index])
            new_y_train.append(y_train[index])
    
    return np.array(new_X_train), np.array(new_y_train), np.array(new_sample_weights)

In [10]:
# NORMALIZE DATA FOR THIS MODEL
scaler = StandardScaler()

n_samples = X_train.shape[0]
n_timesteps = X_train.shape[1]
n_features = X_train.shape[2]

In [11]:
X_train_flat = X_train.reshape(-1, X_train.shape[-1])  # Flatten each window into a 1D array
X_val_flat = X_val.reshape(-1, X_val.shape[-1])
X_test_flat = X_test.reshape(-1, X_test.shape[-1])

In [12]:
X_train_scaled = scaler.fit_transform(X_train_flat).reshape(n_samples, n_timesteps, n_features)
X_val_scaled = scaler.transform(X_val_flat).reshape(X_val.shape[0], n_timesteps, n_features)
X_test_scaled = scaler.transform(X_test_flat).reshape(X_test.shape[0], n_timesteps, n_features)

In [13]:
# ENCODE LABELS
le = LabelEncoder()
y_train_enc = le.fit_transform(y_train)
y_val_enc = le.transform(y_val)
y_test_enc = le.transform(y_test)

# Convert labels to one-hot encoding
y_train_cat = to_categorical(y_train_enc)
y_val_cat = to_categorical(y_val_enc)
y_test_cat = to_categorical(y_test_enc)

num_classes = y_train_cat.shape[1]  # Number of unique classes

In [19]:
early_stopping = EarlyStopping(
    monitor='val_loss',    # Monitor validation loss
    patience=5,            # Stop after 5 epochs of no improvement in val_loss
    restore_best_weights=True, # Restore weights from the epoch with the best val_loss
    verbose=1,
)

In [14]:
# build model
# Build the LSTM model
model = Sequential([
    LSTM(128, input_shape=(X_train_scaled.shape[1], X_train_scaled.shape[2]), return_sequences=False),
    Dropout(0.5),
    Dense(64, activation='relu'),
    Dense(num_classes, activation='softmax')
])

2025-05-29 17:57:42.693506: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: CUDA_ERROR_UNKNOWN: unknown error
2025-05-29 17:57:42.693534: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:178] verbose logging is disabled. Rerun with verbose logging (usually --v=1 or --vmodule=cuda_diagnostics=1) to get more diagnostic output from this module
2025-05-29 17:57:42.693538: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:183] retrieving CUDA diagnostic information for host: frenovo
2025-05-29 17:57:42.693541: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:190] hostname: frenovo
2025-05-29 17:57:42.693622: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:197] libcuda reported version is: 570.133.7
2025-05-29 17:57:42.693639: I external/local_xla/xla/stream_executor/cuda/cuda_diagnostics.cc:201] kernel reported version is: 570.133.7
2025-

In [15]:
# compile model
model.compile(
    loss='categorical_crossentropy',
    optimizer='adam',
    metrics=['accuracy']
)

In [16]:
# train model
history = model.fit(
    X_train_scaled, y_train_cat,
    validation_data=(X_val_scaled, y_val_cat),
    epochs=20,
    batch_size=64,
    verbose=1,
    # callbacks=[early_stopping]
)

Epoch 1/20
[1m6919/6919[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m304s[0m 44ms/step - accuracy: 0.4566 - loss: 1.5032 - val_accuracy: 0.5912 - val_loss: 1.2151
Epoch 2/20
[1m6919/6919[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m302s[0m 44ms/step - accuracy: 0.6548 - loss: 1.0201 - val_accuracy: 0.6688 - val_loss: 1.0182
Epoch 3/20
[1m6919/6919[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m308s[0m 44ms/step - accuracy: 0.7182 - loss: 0.8420 - val_accuracy: 0.6817 - val_loss: 0.9843
Epoch 4/20
[1m6919/6919[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m314s[0m 45ms/step - accuracy: 0.7442 - loss: 0.7699 - val_accuracy: 0.6949 - val_loss: 0.9694
Epoch 5/20
[1m6919/6919[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m312s[0m 45ms/step - accuracy: 0.7575 - loss: 0.7294 - val_accuracy: 0.7012 - val_loss: 0.9519
Epoch 6/20
[1m6919/6919[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m311s[0m 45ms/step - accuracy: 0.7622 - loss: 0.7135 - val_accuracy: 0.6985 - val_loss: 0.967

In [17]:
# evaluate
test_loss, test_acc = model.evaluate(X_test_scaled, y_test_cat, verbose=0)
print(f"Test accuracy: {test_acc:.4f} | Test loss: {test_loss:.4f}")

Test accuracy: 0.7026 | Test loss: 0.9359


In [18]:
y_pred = model.predict(X_test_scaled)
y_pred_labels = le.inverse_transform(np.argmax(y_pred, axis=1))
y_true_labels = le.inverse_transform(np.argmax(y_test_cat, axis=1))

from sklearn.metrics import classification_report
print(classification_report(y_true_labels, y_pred_labels))

[1m2043/2043[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 16ms/step
                             precision    recall  f1-score   support

                  bicycling       0.33      0.36      0.34       713
             brushing teeth       0.79      0.71      0.75       434
                   clapping       0.96      0.89      0.92       433
    dribbling (basket ball)       0.78      0.96      0.86       441
                   drinking       0.71      0.71      0.71       448
               eating chips       0.55      0.71      0.62       437
               eating pasta       0.69      0.73      0.71       443
            eating sandwich       0.48      0.37      0.42       436
                eating soup       0.78      0.82      0.80       431
            folding clothes       0.87      0.90      0.88       370
           household-chores       0.53      0.55      0.54      5596
                    jogging       0.88      0.99      0.93       449
      kicking (soccer b