In [1]:
import os
import glob
import numpy as np
import pandas as pd

# Path to the dataset folder (adjust as needed)
dataset_path = 'D:/Projects/Module 3/m3_h1/M3_H5/data'

# Activity types based on folder names
activities = ['idle', 'running', 'stairs', 'walking']

# Function to load and label data from each activity type
def load_activity_data(activity, path):
    activity_files = glob.glob(os.path.join(path, activity, '*.csv'))
    df_list = []

    for file in activity_files:
        df = pd.read_csv(file)
        df['activity'] = activity  # Add a column for the activity label
        df_list.append(df)

    return pd.concat(df_list, ignore_index=True)

# Aggregating data from all activities
all_data = pd.concat([load_activity_data(activity, dataset_path) for activity in activities], ignore_index=True)

# Displaying the first few rows of the aggregated dataframe
all_data

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z,activity
0,1.000776,4.616021,8.576031,idle
1,0.718261,4.209007,8.446744,idle
2,-0.909797,-0.282516,9.203311,idle
3,5.099650,0.148441,8.418014,idle
4,1.762132,-0.162806,9.251195,idle
...,...,...,...,...
193855,5.109226,-15.452178,-1.470040,walking
193856,6.373365,-11.741165,-8.226476,walking
193857,3.289633,-9.993398,-0.383072,walking
193858,-2.978387,-3.050213,1.273715,walking


In [2]:
from sklearn.preprocessing import MinMaxScaler
# Assuming 'all_data' contains the combined accelerometer data from all activities
columns_to_normalize = ['accelerometer_X', 'accelerometer_Y', 'accelerometer_Z']

# Initialize the MinMaxScaler
scaler = MinMaxScaler()

# Apply the scaler to the data
all_data[columns_to_normalize] = scaler.fit_transform(all_data[columns_to_normalize])
all_data

Unnamed: 0,accelerometer_X,accelerometer_Y,accelerometer_Z,activity
0,0.512769,0.558895,0.609421,idle
1,0.509164,0.553702,0.607771,idle
2,0.488392,0.496395,0.617424,idle
3,0.565066,0.501894,0.607405,idle
4,0.522483,0.497923,0.618035,idle
...,...,...,...,...
193855,0.565188,0.302847,0.481244,walking
193856,0.581317,0.350196,0.395039,walking
193857,0.541972,0.372495,0.495112,walking
193858,0.461999,0.461083,0.516251,walking


In [3]:
from scipy import stats
from sklearn.model_selection import train_test_split

# Assuming 'all_data' is your normalized dataset with 'activity' as the target column
# Define window size and overlap
window_size = 128  
overlap = 0.5  # 50% overlap

# Function to create windows of data
def create_windows(data, window_size, overlap):
    windows = []
    labels = []
    step = int(window_size * (1 - overlap))
    
    for start in range(0, len(data) - window_size, step):
        end = start + window_size
        window_data = data.iloc[start:end]
        windows.append(window_data[['accelerometer_X', 'accelerometer_Y', 'accelerometer_Z']])
        # Use pandas' mode function to find the most frequent activity label in the window
        labels.append(window_data['activity'].mode()[0])
    
    return windows, labels

# Apply windowing function to the data
windows, labels = create_windows(all_data, window_size, overlap)

# Function to calculate features for a window
def extract_features(window):
    features = []
    # Calculate features for each axis
    for axis in window.columns:
        features.append(window[axis].mean())
        features.append(window[axis].std())
        features.append(window[axis].skew())
        features.append(window[axis].kurtosis())
        # Add more features as needed
    return features

# Extract features for all windows
feature_list = [extract_features(window) for window in windows]

# Convert to DataFrame
feature_data = pd.DataFrame(feature_list)
feature_data['activity'] = labels  # Add the labels as a new column

# Define features and target variable
X = feature_data.drop('activity', axis=1)  # Features
y = feature_data['activity']  # Labels

# Splitting the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [4]:
from sklearn.svm import SVC

# Creating SVM model
svm_model = SVC()
svm_model.fit(X_train, y_train)

In [5]:
from sklearn.ensemble import RandomForestClassifier

# Creating Random Forest model
rf_model = RandomForestClassifier(n_jobs=-1)
rf_model.fit(X_train, y_train)

In [6]:
# Comparing the accuracy of models
score_svm = svm_model.score(X_test, y_test)
score_rf = rf_model.score(X_test, y_test)

print(f"Accuracy of the SVM model: {score_svm}")
print(f"Accuracy of the RF model: {score_rf}")


Accuracy of the SVM model: 0.8327832783278328
Accuracy of the RF model: 0.9933993399339934


In [7]:
from sklearn.metrics import classification_report

# Predictions with SVM model
svm_predictions = svm_model.predict(X_test)

# Predictions with Random Forest model
rf_predictions = rf_model.predict(X_test)

# Evaluation of SVM model
print("SVM Classification Report:")
print(classification_report(y_test, svm_predictions, zero_division=1))

# Evaluation of Random Forest model
print("Random Forest Classification Report:")
print(classification_report(y_test, rf_predictions, zero_division=1))

SVM Classification Report:
              precision    recall  f1-score   support

        idle       0.88      0.53      0.66       160
     running       0.83      0.97      0.89       454
      stairs       1.00      0.00      0.00        23
     walking       0.82      0.85      0.84       272

    accuracy                           0.83       909
   macro avg       0.88      0.59      0.60       909
weighted avg       0.84      0.83      0.81       909

Random Forest Classification Report:
              precision    recall  f1-score   support

        idle       1.00      0.99      1.00       160
     running       1.00      1.00      1.00       454
      stairs       0.95      0.83      0.88        23
     walking       0.99      1.00      0.99       272

    accuracy                           0.99       909
   macro avg       0.98      0.95      0.97       909
weighted avg       0.99      0.99      0.99       909

