# Import Libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report

# Preparing data

In [2]:
MAIN_DATA_DIR = Path("Data\\idle_run_stairs_walk")
if MAIN_DATA_DIR.is_dir():
        class_list = list(d.name for d in MAIN_DATA_DIR.iterdir())
        print(f"We has the next list of classes: {class_list}\nTotal number of clases is: {len(class_list)}")

We has the next list of classes: ['idle', 'running', 'stairs', 'walking']
Total number of clases is: 4


In [12]:
df_time_domain_features = pd.DataFrame()
df_flatten = pd.DataFrame()
df_flatten_time_domain_features = pd.DataFrame()

In [13]:
def add_time_domain_feature(frame):
    features = []
    for axis in ['X', 'Y', 'Z']:
        axis_data = frame[f'accelerometer_{axis}']
        features.append(pd.DataFrame([axis_data.max()], columns=[f'{axis}_max']))
        features.append(pd.DataFrame([axis_data.min()], columns=[f'{axis}_min']))
        features.append(pd.DataFrame([axis_data.mean()], columns=[f'{axis}_mean']))
        features.append(pd.DataFrame([axis_data.median()], columns=[f'{axis}_median']))
        features.append(pd.DataFrame([axis_data.std()], columns=[f'{axis}_std']))
        features.append(pd.DataFrame([(axis_data - axis_data.mean()).abs().mean()], columns=[f'{axis}_mean_of_absolute_deviation']))
        features.append(pd.DataFrame([(np.sqrt(np.mean(axis_data**2)))], columns=[f'{axis}_root_mean_square_error']))
    return pd.concat(features, axis=1)

In [18]:
for act in class_list:
    csv_files = sorted((MAIN_DATA_DIR / act).glob("*.csv"))
    print(f"Reading data from Directory {MAIN_DATA_DIR / act}")

    for file in csv_files:
        df_from_file = pd.read_csv(file)

        df2 = add_time_domain_feature(df_from_file)
        df2['activity'] = act
        df_time_domain_features = pd.concat([df_time_domain_features, df2], ignore_index=True)


        colunms = [f"{c}_{i+1}" for c in df_from_file.columns for i in range(df_from_file.shape[0])]
        df1 = pd.DataFrame(df_from_file.values.reshape(1,-1), columns=colunms)
        df1['activity'] = act
        df_flatten = pd.concat([df_flatten, df1], ignore_index=True)




Reading data from Directory Data\idle_run_stairs_walk\idle
Reading data from Directory Data\idle_run_stairs_walk\running
Reading data from Directory Data\idle_run_stairs_walk\stairs
Reading data from Directory Data\idle_run_stairs_walk\walking


In [32]:
df_flatten_time_domain_features = pd.concat([df_flatten.drop(columns=['activity']), df_time_domain_features], axis=1)

In [15]:
df.head()

Unnamed: 0,accelerometer_X_1,accelerometer_X_2,accelerometer_X_3,accelerometer_X_4,accelerometer_X_5,accelerometer_X_6,accelerometer_X_7,accelerometer_X_8,accelerometer_X_9,accelerometer_X_10,...,accelerometer_Z_22,accelerometer_Z_23,accelerometer_Z_24,accelerometer_Z_25,accelerometer_Z_26,accelerometer_Z_27,accelerometer_Z_28,accelerometer_Z_29,accelerometer_Z_30,activity
0,1.000776,4.616021,8.576031,0.718261,4.209007,8.446744,-0.909797,-0.282516,9.203311,5.09965,...,-0.124498,-0.114922,9.758766,-0.167594,-0.124498,9.667787,-0.11971,-0.124498,9.792285,idle
1,-0.100556,-0.124498,9.763555,-0.081403,-0.110133,9.744401,-0.124498,-0.153229,9.773131,-0.320823,...,-0.129287,-0.143652,9.77792,-0.191536,-0.057461,9.768343,-0.033519,-0.134075,9.758766,idle
2,-0.095768,0.234632,9.763555,-0.086191,0.234632,9.753978,-0.105345,0.225055,9.753978,-0.086191,...,-0.105345,0.21069,9.763555,-0.110133,0.234632,9.782708,-0.105345,0.23942,9.739613,idle
3,0.392649,-0.071826,9.768343,0.387861,-0.062249,9.753978,0.440533,0.033519,9.734824,0.42138,...,0.378284,-0.023942,9.730036,0.560243,-0.205901,9.797073,0.268151,0.086191,9.725247,idle
4,0.440533,0.033519,9.734824,0.42138,-0.052672,9.749189,0.426168,-0.019154,9.734824,0.45011,...,0.268151,0.086191,9.725247,0.368707,-0.004788,9.77792,0.411803,-0.057461,9.77792,idle


In [16]:
df_time_domain_features.head()

Unnamed: 0,X_max,X_min,X_mean,X_median,X_std,X_mean_of_absolute_deviation,X_root_mean_square_error,Y_max,Y_min,Y_mean,...,Y_mean_of_absolute_deviation,Y_root_mean_square_error,Z_max,Z_min,Z_mean,Z_median,Z_std,Z_mean_of_absolute_deviation,Z_root_mean_square_error,activity
0,5.09965,-0.909797,0.178448,-0.10295,1.036361,0.539706,1.03445,4.616021,-0.282516,0.167435,...,0.566011,1.150396,9.80665,8.418014,9.605697,9.770737,0.406903,0.275546,9.614024,idle
1,0.407014,-0.320823,-0.098641,-0.114922,0.125848,0.068251,0.15824,0.023942,-0.244209,-0.131202,...,0.028411,0.139451,9.80665,9.667787,9.771216,9.77792,0.025419,0.016664,9.771248,idle
2,-0.062249,-0.124498,-0.099918,-0.100556,0.015642,0.011982,0.101095,0.244209,0.177171,0.220905,...,0.012045,0.221492,9.792285,9.739613,9.768503,9.768343,0.014528,0.011673,9.768513,idle
3,0.560243,0.268151,0.400949,0.392649,0.052889,0.037903,0.404307,0.086191,-0.205901,-0.029528,...,0.037882,0.061203,9.797073,9.725247,9.752541,9.751583,0.017967,0.013886,9.752557,idle
4,0.560243,0.268151,0.400949,0.402226,0.053187,0.038627,0.404345,0.086191,-0.205901,-0.027134,...,0.036818,0.059628,9.797073,9.725247,9.753659,9.751583,0.018902,0.015004,9.753676,idle


In [33]:
df_flatten_time_domain_features.head()

Unnamed: 0,accelerometer_X_1,accelerometer_X_2,accelerometer_X_3,accelerometer_X_4,accelerometer_X_5,accelerometer_X_6,accelerometer_X_7,accelerometer_X_8,accelerometer_X_9,accelerometer_X_10,...,Y_mean_of_absolute_deviation,Y_root_mean_square_error,Z_max,Z_min,Z_mean,Z_median,Z_std,Z_mean_of_absolute_deviation,Z_root_mean_square_error,activity
0,1.000776,4.616021,8.576031,0.718261,4.209007,8.446744,-0.909797,-0.282516,9.203311,5.09965,...,0.566011,1.150396,9.80665,8.418014,9.605697,9.770737,0.406903,0.275546,9.614024,idle
1,-0.100556,-0.124498,9.763555,-0.081403,-0.110133,9.744401,-0.124498,-0.153229,9.773131,-0.320823,...,0.028411,0.139451,9.80665,9.667787,9.771216,9.77792,0.025419,0.016664,9.771248,idle
2,-0.095768,0.234632,9.763555,-0.086191,0.234632,9.753978,-0.105345,0.225055,9.753978,-0.086191,...,0.012045,0.221492,9.792285,9.739613,9.768503,9.768343,0.014528,0.011673,9.768513,idle
3,0.392649,-0.071826,9.768343,0.387861,-0.062249,9.753978,0.440533,0.033519,9.734824,0.42138,...,0.037882,0.061203,9.797073,9.725247,9.752541,9.751583,0.017967,0.013886,9.752557,idle
4,0.440533,0.033519,9.734824,0.42138,-0.052672,9.749189,0.426168,-0.019154,9.734824,0.45011,...,0.036818,0.059628,9.797073,9.725247,9.753659,9.751583,0.018902,0.015004,9.753676,idle


## Save new data

In [38]:
df_flatten.to_csv('Data\\activity_flatten.csv', index=False)
df_time_domain_features.to_csv('Data\\activity_time_domain_features.csv', index=False)
df_flatten_time_domain_features.to_csv('Data\\activity_flatten_time_domain_features.csv', index=False)

# Train

In [50]:
data = {
    'flatten': df_flatten,
    'time_domain_features': df_time_domain_features,
    'flatten_time_domain_features': df_flatten_time_domain_features
}

In [51]:
models = {
    'SVC': lambda: SVC(),
    'SVC_Linear': lambda: SVC(kernel='linear'),
    'RandomForestClassifier': lambda: RandomForestClassifier()
}
reports = {}

In [57]:
for key, df in data.items():
    X = df.drop(columns=['activity'])
    Y = df['activity']
    x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)
    scaler = StandardScaler()
    x_train = scaler.fit_transform(x_train)
    x_test = scaler.transform(x_test)

    if key not in reports:
        reports[key] = {}
    
    for name, classification in models.items():
        model = classification()
        print(f'Star train {name} on {key}\n')
        model.fit(x_train, y_train)
        y_pred = model.predict(x_test)
        reports[key][name] = classification_report(y_true=y_test, y_pred=y_pred)

Star train SVC on flatten

Star train SVC_Linear on flatten

Star train RandomForestClassifier on flatten

Star train SVC on time_domain_features

Star train SVC_Linear on time_domain_features

Star train RandomForestClassifier on time_domain_features

Star train SVC on flatten_time_domain_features

Star train SVC_Linear on flatten_time_domain_features

Star train RandomForestClassifier on flatten_time_domain_features



# Report

## Flatten

In [62]:
for name, results in reports['flatten'].items():
    print(f'Model: {name} on flatten data')
    print(results)

Model: SVC on flatten data
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00       607
     running       1.00      1.00      1.00      2085
      stairs       0.88      0.57      0.69        88
     walking       0.97      0.99      0.98      1098

    accuracy                           0.99      3878
   macro avg       0.96      0.89      0.92      3878
weighted avg       0.99      0.99      0.99      3878

Model: SVC_Linear on flatten data
              precision    recall  f1-score   support

        idle       0.99      1.00      0.99       607
     running       1.00      1.00      1.00      2085
      stairs       0.66      0.44      0.53        88
     walking       0.96      0.98      0.97      1098

    accuracy                           0.98      3878
   macro avg       0.90      0.86      0.87      3878
weighted avg       0.98      0.98      0.98      3878

Model: RandomForestClassifier on flatten data
              precision

## Time domain features

In [63]:
for name, results in reports['time_domain_features'].items():
    print(f'Model: {name} on time domain features data')
    print(results)

Model: SVC on time domain features data
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00       607
     running       1.00      1.00      1.00      2085
      stairs       0.90      0.80      0.84        88
     walking       0.98      0.99      0.99      1098

    accuracy                           0.99      3878
   macro avg       0.97      0.95      0.96      3878
weighted avg       0.99      0.99      0.99      3878

Model: SVC_Linear on time domain features data
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00       607
     running       1.00      1.00      1.00      2085
      stairs       0.82      0.75      0.79        88
     walking       0.98      0.99      0.98      1098

    accuracy                           0.99      3878
   macro avg       0.95      0.93      0.94      3878
weighted avg       0.99      0.99      0.99      3878

Model: RandomForestClassifier on time domai

## Flatten and time domain features

In [64]:
for name, results in reports['flatten_time_domain_features'].items():
    print(f'Model: {name} on flatten and time domain features data')
    print(results)

Model: SVC on flatten and time domain features data
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00       607
     running       1.00      1.00      1.00      2085
      stairs       0.89      0.67      0.77        88
     walking       0.97      0.99      0.98      1098

    accuracy                           0.99      3878
   macro avg       0.97      0.92      0.94      3878
weighted avg       0.99      0.99      0.99      3878

Model: SVC_Linear on flatten and time domain features data
              precision    recall  f1-score   support

        idle       1.00      1.00      1.00       607
     running       1.00      1.00      1.00      2085
      stairs       0.79      0.69      0.74        88
     walking       0.98      0.99      0.98      1098

    accuracy                           0.99      3878
   macro avg       0.94      0.92      0.93      3878
weighted avg       0.99      0.99      0.99      3878

Model: RandomForest