In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import io
import tensorflow as tf

2025-04-25 12:31:58.695611: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-25 12:31:59.555164: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-25 12:32:07.252433: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-25 12:32:12.794763: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745584335.886612     469 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745584336.35

In [4]:
def Load_WISDM_dataset(file_path):
    cleaned_rows = []
    with open(file_path, 'r') as f:
        for line in f:
            parts = line.strip().split(',')
            # Line ends with ;, so we may get it as part of the last value
            if len(parts) == 6 or len(parts) == 7:  # Acceptable lines
                try:
                    user = int(parts[0])
                    activity = parts[1]
                    timestamp = int(parts[2])
                    x = float(parts[3])
                    y = float(parts[4])
                    z = float(parts[5].replace(';', ''))  # Clean the ;
                    cleaned_rows.append([user, activity, timestamp, x, y, z])
                except ValueError:
                    continue  # skip malformed lines

    columns = ['user', 'activity', 'timestamp', 'x', 'y', 'z']
    df = pd.DataFrame(cleaned_rows, columns=columns)
    return df

In [5]:
df = Load_WISDM_dataset('datasets/WISDM/WISDM_raw.txt')

In [6]:
df = df.iloc[:10000].copy()
print(df['activity'].value_counts())
print(df.shape)
df.head()

activity
Jogging       2946
Walking       2654
Upstairs      2214
Downstairs    2186
Name: count, dtype: int64
(10000, 6)


Unnamed: 0,user,activity,timestamp,x,y,z
0,33,Jogging,49105962326000,-0.694638,12.680544,0.503953
1,33,Jogging,49106062271000,5.012288,11.264028,0.953424
2,33,Jogging,49106112167000,4.903325,10.882658,-0.081722
3,33,Jogging,49106222305000,-0.612916,18.496431,3.023717
4,33,Jogging,49106332290000,-1.18497,12.108489,7.205164


In [7]:
def extract_features_from_window(window):
    features = []

    for axis in ['x', 'y', 'z']:
        data = window[axis]
        features += [
            data.mean(),
            data.std(),
            data.min(),
            data.max(),
            data.median(),
            np.sqrt(np.sum(data**2)),  # Energy
        ]
    return features


In [8]:
def create_feature_dataset(df, window_size=50, step_size=25):
    X_features = []
    y_labels = []

    for user_id in df['user'].unique():
        user_data = df[df['user'] == user_id]

        for i in range(0, len(user_data) - window_size, step_size):
            window = user_data.iloc[i:i+window_size]
            if len(window['activity'].unique()) == 1:
                features = extract_features_from_window(window)
                X_features.append(features)
                y_labels.append(window['activity'].iloc[0])

    return np.array(X_features), np.array(y_labels)


In [10]:
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report

In [9]:

# Generate features
X, y = create_feature_dataset(df, window_size=50, step_size=25)
print(X.shape, y.shape)


(373, 18) (373,)


In [11]:
from sklearn.preprocessing import LabelEncoder

# Encode activity labels to integers
le = LabelEncoder()
y_encoded = le.fit_transform(y)

In [12]:
# Train/Test split with encoded labels
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [13]:
# Normalize
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train SVM
svm = SVC(kernel='rbf', C=1)
svm.fit(X_train, y_train)

# Evaluate
y_pred = svm.predict(X_test)
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.94      1.00      0.97        17
           1       1.00      1.00      1.00        20
           2       1.00      0.94      0.97        16
           3       1.00      1.00      1.00        22

    accuracy                           0.99        75
   macro avg       0.99      0.98      0.98        75
weighted avg       0.99      0.99      0.99        75

