# TDOST-Based HAR Model using Aruba Dataset
This notebook implements the layout-agnostic Human Activity Recognition model using the TDOST methodology as described in the provided document.

In [2]:
%pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.18.0-cp312-cp312-macosx_12_0_arm64.whl.metadata (4.0 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.1.0-py3-none-any.whl.metadata (2.3 kB)
Collecting astunparse>=1.6.0 (from tensorflow)
  Downloading astunparse-1.6.3-py2.py3-none-any.whl.metadata (4.4 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-24.3.25-py2.py3-none-any.whl.metadata (850 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.6.0-py3-none-any.whl.metadata (1.3 kB)
Collecting google-pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-1-py2.py3-none-macosx_11_0_arm64.whl.metadata (5.2 kB)
Collecting opt-einsum>=2.3.2 (from tensorflow)
  Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Collecting termcolor>=1.1.0 (from tensorflow)
  Downloa

In [3]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from sentence_transformers import SentenceTransformer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Bidirectional
from tensorflow.keras.utils import to_categorical

## Step 1: Load and Preprocess Data

In [11]:
"""
def load_aruba_data(filepath):
    columns = ['Date', 'Time', 'Sensor', 'Value', 'Activity', 'Begin_End']
    data = pd.read_csv(filepath, sep=" ", header=None, names=columns, engine='python',
                       parse_dates=[[0, 1]], keep_date_col=False)
    data['Activity'] = data['Activity'].fillna(method='ffill')
    data['Begin_End'] = data['Begin_End'].fillna('')
    return data

aruba_data = load_aruba_data('/Users/harrisonkirstein/Desktop/CSCI-4380-Honors-Option-Repo/CSCI 4380 Honors Option Project/Datasets/aruba/data')
aruba_data.head()

"""


# Step 1: Load and Preprocess Dataset

# Define file path
aruba_data_path = '/Users/harrisonkirstein/Desktop/CSCI-4380-Honors-Option-Repo/CSCI 4380 Honors Option Project/Datasets/aruba/data'

# Load dataset with variable columns
aruba_data = pd.read_csv(
    aruba_data_path, 
    header=None, 
    names=['Date', 'Time', 'Sensor', 'Value', 'Activity', 'Begin_End'], 
    delim_whitespace=True,
    engine='python'
)

# Combine Date and Time into a single timestamp column
aruba_data['Date_Time'] = pd.to_datetime(aruba_data['Date'] + ' ' + aruba_data['Time'], errors='coerce')
aruba_data.drop(columns=['Date', 'Time'], inplace=True)

# Fill missing columns with NaN for rows without activity labels
aruba_data.fillna('', inplace=True)

# Preview the dataset
aruba_data.head()

  aruba_data = pd.read_csv(


Unnamed: 0,Sensor,Value,Activity,Begin_End,Date_Time
0,M003,ON,Sleeping,begin,2010-11-04 00:03:50.209589
1,M003,OFF,,,2010-11-04 00:03:57.399391
2,T002,21.5,,,2010-11-04 00:15:08.984841
3,T003,21,,,2010-11-04 00:30:19.185547
4,T004,21,,,2010-11-04 00:30:19.385336


## Step 2: Generate TDOST Descriptions

In [16]:
# Generate TDOST descriptions
def generate_tdost(data):
    descriptions = []
    for i, row in data.iterrows():
        if row['Activity']:
            desc = f"{row['Sensor']} sensor in {row['Value']} triggered at {row['Date_Time'].strftime('%H:%M:%S')}."
            descriptions.append(desc)
    return descriptions

aruba_data['TDOST_Basic'] = generate_tdost(aruba_data)
aruba_data.head()

ValueError: Length of values (12954) does not match length of index (1719558)

## Step 3: Encode TDOST Descriptions and Labels

In [None]:
# Load pre-trained Sentence Transformer
model = SentenceTransformer('all-distilroberta-v1')

# Encode TDOST descriptions
aruba_data['Encoded_TDOST'] = aruba_data['TDOST'].apply(lambda x: model.encode(x) if isinstance(x, str) else None)

# Encode labels
label_encoder = LabelEncoder()
aruba_data['Encoded_Activity'] = label_encoder.fit_transform(aruba_data['Activity'])
num_classes = len(label_encoder.classes_)
aruba_data.head()

## Step 4: Train-Test Split

In [None]:
# Split data
X = np.stack(aruba_data['Encoded_TDOST'].dropna())
y = to_categorical(aruba_data['Encoded_Activity'].dropna(), num_classes=num_classes)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Step 5: Build and Train the Model

In [None]:
# Build LSTM model
def build_model(input_shape, num_classes):
    model = Sequential([
        Bidirectional(LSTM(64, return_sequences=False), input_shape=input_shape),
        Dense(128, activation='relu'),
        Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    return model

input_shape = (X_train.shape[1],)
model = build_model(input_shape, num_classes)

# Train model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=32)

## Step 6: Evaluate and Test the Model

In [None]:
# Evaluate model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

# Test on new data (replace with actual test data)
new_sample = "motion sensor in bedroom triggered at 08:15:23."
new_embedding = model.encode(new_sample)
prediction = model.predict(np.array([new_embedding]))
predicted_label = label_encoder.inverse_transform([np.argmax(prediction)])
print(f"Predicted Activity: {predicted_label[0]}")