In [34]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from pathlib import Path
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
from sklearn.ensemble import RandomForestClassifier
from scipy.stats import kurtosis, skew

# Klasyfikacja danych przy użyciu algorytmów SVM oraz lasu losowego
## Wersja z użyciem odczytów akcelerometru jako charakterystyki
Wczytanie danych:

In [23]:
def load_data(folder_path, label):
    data = []
    for file_path in Path(folder_path).iterdir():
        if file_path.name.endswith(".csv"):
            df = pd.read_csv(file_path)
            features = df.values.flatten()
            data.append(np.append(features, label))
    return np.array(data)

folders = {
    0: "data/idle",
    1: "data/running",
    2: "data/stairs",
    3: "data/walking",
}

data = []
for label, path in folders.items():
    folder_data = load_data(path, label)
    if folder_data.size > 0:
        data.append(folder_data)

data = np.vstack(data)


Przygotowywanie danych:

In [24]:
X = data[:, :-1]
y = data[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=92)

Trenowanie modeli i ich ewaluacja:

In [35]:
svm_model = SVC()
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)
print("SVM Classifier Report:")
print(classification_report(y_test, y_pred_svm))
print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))

rf_model = RandomForestClassifier(n_estimators=100)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
print("\n\nRandom Forest Classifier Report:")
print(classification_report(y_test, y_pred_rf))
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))

SVM Classifier Report:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       185
         1.0       1.00      1.00      1.00       678
         2.0       0.86      0.53      0.65        34
         3.0       0.96      0.99      0.98       396

    accuracy                           0.99      1293
   macro avg       0.95      0.88      0.91      1293
weighted avg       0.98      0.99      0.98      1293

SVM Accuracy: 0.9853054911059551


Random Forest Classifier Report:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       185
         1.0       1.00      1.00      1.00       678
         2.0       0.00      0.00      0.00        34
         3.0       0.92      1.00      0.96       396

    accuracy                           0.97      1293
   macro avg       0.73      0.75      0.74      1293
weighted avg       0.95      0.97      0.96      1293

Random Forest Accuracy: 0.9737045630317

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## Wersja z cechami domeny czasowej
Wczytanie danych:

In [36]:
def extract_features(file_path):
    df = pd.read_csv(file_path)
    features = []
    for axis in ["accelerometer_X", "accelerometer_Y", "accelerometer_Z"]:
        data = df[axis]
        features.extend([
            data.mean(),
            data.median(),
            data.std(),
            data.var(),
            kurtosis(data),
            skew(data),
            data.min(),
            data.max(),
            np.sqrt(np.mean(data**2)),
            np.sum(data**2)
        ])
    return np.array(features)

def load_data(folder_path, label):
    data = []
    for file_path in Path(folder_path).iterdir():
        if file_path.name.endswith(".csv"):
            features = extract_features(file_path)
            data.append(np.append(features, label))
    return np.array(data)

folders = {
    0: "data/idle",
    1: "data/running",
    2: "data/stairs",
    3: "data/walking",
}

data = []
for label, path in folders.items():
    folder_data = load_data(path, label)
    if folder_data.size > 0:
        data.append(folder_data)

data = np.vstack(data)

Przygotowywanie danych:

In [37]:
X = data[:, :-1]
y = data[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=92)

Trenowanie modeli i ich ewaluacja:

In [38]:
svm_model = SVC()
svm_model.fit(X_train, y_train)
y_pred_svm = svm_model.predict(X_test)
print("SVM Classifier Report:")
print(classification_report(y_test, y_pred_svm))
print("SVM Accuracy:", accuracy_score(y_test, y_pred_svm))

rf_model = RandomForestClassifier(n_estimators=100)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_test)
print("\n\nRandom Forest Classifier Report:")
print(classification_report(y_test, y_pred_rf))
print("Random Forest Accuracy:", accuracy_score(y_test, y_pred_rf))

SVM Classifier Report:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       185
         1.0       0.99      0.97      0.98       678
         2.0       0.00      0.00      0.00        34
         3.0       0.88      0.98      0.93       396

    accuracy                           0.95      1293
   macro avg       0.72      0.74      0.73      1293
weighted avg       0.93      0.95      0.94      1293

SVM Accuracy: 0.9528228924980665


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))




Random Forest Classifier Report:
              precision    recall  f1-score   support

         0.0       1.00      1.00      1.00       185
         1.0       1.00      1.00      1.00       678
         2.0       1.00      0.94      0.97        34
         3.0       0.99      1.00      1.00       396

    accuracy                           1.00      1293
   macro avg       1.00      0.99      0.99      1293
weighted avg       1.00      1.00      1.00      1293

Random Forest Accuracy: 0.9984532095901005
