In [1]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from scipy.stats import skew, kurtosis
import numpy as np
from sklearn.metrics import accuracy_score

dataset_path = 'data'

# Function for data processing and computing time domain features
def process_data(dataset_path):
    activities = ['idle', 'running', 'stairs', 'walking']
    data = []
    labels = []
    
    for activity_idx, activity in enumerate(activities):
        activity_folder = os.path.join(dataset_path, activity)
        for file in os.listdir(activity_folder):
            if file.endswith(".csv"):
                file_path = os.path.join(activity_folder, file)
                df = pd.read_csv(file_path)
                
                # Calculate time domain features
                time_features = []
                for axis in ['X', 'Y', 'Z']:
                    series = df[f'accelerometer_{axis}']
                    mean = series.mean()
                    std = series.std()
                    median = series.median()
                    maximum = series.max()
                    minimum = series.min()
                    skewness = skew(series)
                    kurt = kurtosis(series)

                    time_features.extend([mean, std, median, maximum, minimum, skewness, kurt])

                data.append(time_features)
                labels.append(activity_idx)

    return np.array(data), np.array(labels)


X, y = process_data(dataset_path)

# Splitting into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# SVM modeling
svm_model = SVC(kernel='linear', C=1.0, random_state=42)
svm_model.fit(X_train_scaled, y_train)
svm_pred = svm_model.predict(X_test_scaled)

# Evaluating SVM model
activities = ['idle', 'running', 'stairs', 'walking'] 
print("Classification Report for SVM:")
print(classification_report(y_test, svm_pred, target_names=activities))

# Random Forest modeling
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train)
rf_pred = rf_model.predict(X_test_scaled)

# Evaluating Random Forest model
print("\nClassification Report for Random Forest:")
print(classification_report(y_test, rf_pred, target_names=activities))

# Predict using the first 100 sample from the test set, ensuring it's in 2D array form
svm_predicted_activity = svm_model.predict(X_test_scaled[0:100])
rf_predicted_activity = rf_model.predict(X_test_scaled[0:100])

# Printing the predicted activity
print(f"Predicted activity by SVM: {activities[svm_predicted_activity[0]]}")
print(f"Predicted activity by Random Forest: {activities[rf_predicted_activity[0]]}")

Classification Report for SVM:
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00       220
     running       1.00      1.00      1.00       689
      stairs       0.84      0.70      0.76        30
     walking       0.97      0.99      0.98       354

    accuracy                           0.99      1293
   macro avg       0.95      0.92      0.94      1293
weighted avg       0.99      0.99      0.99      1293


Classification Report for Random Forest:
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00       220
     running       1.00      1.00      1.00       689
      stairs       0.97      0.97      0.97        30
     walking       1.00      1.00      1.00       354

    accuracy                           1.00      1293
   macro avg       0.99      0.99      0.99      1293
weighted avg       1.00      1.00      1.00      1293

Predicted activity by SVM: running
Predicted activity by 

In [None]:
svm_accuracy = accuracy_score(y_test[0:100], svm_predicted_activity)
rf_accuracy = accuracy_score(y_test[0:100], rf_predicted_activity)
print(f"Accuracy of SVM on first 100 samples: {svm_accuracy:.2f}")
print(f"Accuracy of Random Forest on first 100 samples: {rf_accuracy:.2f}")

# Declaration of the activities list
activities = ['idle', 'running', 'stairs', 'walking']

# Printing the first 100 predicted activities for each model
print("First 100 predicted activities by SVM:")
for i in range(100):
    print(f"Sample {i+1}: {activities[svm_predicted_activity[i]]}")

print("\nFirst 100 predicted activities by Random Forest:")
for i in range(100):
    print(f"Sample {i+1}: {activities[rf_predicted_activity[i]]}")