# Import Dependencies

In [3]:
import pandas as pd
import numpy as np
from sklearn import svm
from sklearn.model_selection import train_test_split
from scipy.signal import find_peaks
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.model_selection import GridSearchCV
from sklearn.decomposition import PCA
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

# Read Dataframe

In [4]:
boning_df = pd.read_csv('Boning.csv')
slicing_df = pd.read_csv('Slicing.csv')

In [5]:
boning_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54180 entries, 0 to 54179
Data columns (total 67 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Frame              54180 non-null  int64  
 1   L5 x               54180 non-null  float64
 2   L5 y               54180 non-null  float64
 3   L5 z               54180 non-null  float64
 4   L3 x               54180 non-null  float64
 5   L3 y               54180 non-null  float64
 6   L3 z               54180 non-null  float64
 7   T12 x              54180 non-null  float64
 8   T12 y              54180 non-null  float64
 9   T12 z              54180 non-null  float64
 10  T8 x               54180 non-null  float64
 11  T8 y               54180 non-null  float64
 12  T8 z               54180 non-null  float64
 13  Neck x             54180 non-null  float64
 14  Neck y             54180 non-null  float64
 15  Neck z             54180 non-null  float64
 16  Head x             541

In [6]:
slicing_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17880 entries, 0 to 17879
Data columns (total 67 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Frame              17880 non-null  int64  
 1   L5 x               17880 non-null  float64
 2   L5 y               17880 non-null  float64
 3   L5 z               17880 non-null  float64
 4   L3 x               17880 non-null  float64
 5   L3 y               17880 non-null  float64
 6   L3 z               17880 non-null  float64
 7   T12 x              17880 non-null  float64
 8   T12 y              17880 non-null  float64
 9   T12 z              17880 non-null  float64
 10  T8 x               17880 non-null  float64
 11  T8 y               17880 non-null  float64
 12  T8 z               17880 non-null  float64
 13  Neck x             17880 non-null  float64
 14  Neck y             17880 non-null  float64
 15  Neck z             17880 non-null  float64
 16  Head x             178

## Step 1: Data Collection

In [7]:
necessary_columns = ['Frame',
                     'Right Lower Leg x',
                     'Right Lower Leg y',
                     'Right Lower Leg z',
                     'Left Lower Leg x',
                     'Left Lower Leg y',
                     'Left Lower Leg z']

boning_df = boning_df[necessary_columns]
slicing_df = slicing_df[necessary_columns]

In [8]:
boning_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54180 entries, 0 to 54179
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Frame              54180 non-null  int64  
 1   Right Lower Leg x  54180 non-null  float64
 2   Right Lower Leg y  54180 non-null  float64
 3   Right Lower Leg z  54180 non-null  float64
 4   Left Lower Leg x   54180 non-null  float64
 5   Left Lower Leg y   54180 non-null  float64
 6   Left Lower Leg z   54180 non-null  float64
dtypes: float64(6), int64(1)
memory usage: 2.9 MB


In [9]:
slicing_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 17880 entries, 0 to 17879
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Frame              17880 non-null  int64  
 1   Right Lower Leg x  17880 non-null  float64
 2   Right Lower Leg y  17880 non-null  float64
 3   Right Lower Leg z  17880 non-null  float64
 4   Left Lower Leg x   17880 non-null  float64
 5   Left Lower Leg y   17880 non-null  float64
 6   Left Lower Leg z   17880 non-null  float64
dtypes: float64(6), int64(1)
memory usage: 977.9 KB


In [10]:
dataset_df = pd.concat([boning_df, slicing_df], ignore_index=True)

In [11]:
dataset_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72060 entries, 0 to 72059
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Frame              72060 non-null  int64  
 1   Right Lower Leg x  72060 non-null  float64
 2   Right Lower Leg y  72060 non-null  float64
 3   Right Lower Leg z  72060 non-null  float64
 4   Left Lower Leg x   72060 non-null  float64
 5   Left Lower Leg y   72060 non-null  float64
 6   Left Lower Leg z   72060 non-null  float64
dtypes: float64(6), int64(1)
memory usage: 3.8 MB


## Step 2: Composite Columns

### Computing for Set 1

In [12]:
dataset_df['Set1_rms_x_y'] = ((dataset_df['Right Lower Leg x']**2 + dataset_df['Right Lower Leg y']**2) / 2)**0.5
dataset_df['Set1_rms_y_z'] = ((dataset_df['Right Lower Leg y']**2 + dataset_df['Right Lower Leg z']**2) / 2)**0.5
dataset_df['Set1_rms_z_x'] = ((dataset_df['Right Lower Leg x']**2 + dataset_df['Right Lower Leg z']**2) / 2)**0.5
dataset_df['Set1_rms_x_y_z'] = ((dataset_df['Right Lower Leg x']**2 + dataset_df['Right Lower Leg y']**2 + dataset_df['Right Lower Leg z']**2) / 3)**0.5
dataset_df['Set1_roll'] = np.roll(
    180 * np.arctan2(dataset_df['Right Lower Leg y'],
    np.sqrt(dataset_df['Right Lower Leg x']**2 + dataset_df['Right Lower Leg z']**2)) / np.pi, 1
)
dataset_df['Set1_pitch'] = 180 * np.arctan2(
    dataset_df['Right Lower Leg x'],
    np.sqrt(dataset_df['Right Lower Leg y']**2 + dataset_df['Right Lower Leg z']**2)
    ) / np.pi

In [13]:
dataset_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72060 entries, 0 to 72059
Data columns (total 13 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Frame              72060 non-null  int64  
 1   Right Lower Leg x  72060 non-null  float64
 2   Right Lower Leg y  72060 non-null  float64
 3   Right Lower Leg z  72060 non-null  float64
 4   Left Lower Leg x   72060 non-null  float64
 5   Left Lower Leg y   72060 non-null  float64
 6   Left Lower Leg z   72060 non-null  float64
 7   Set1_rms_x_y       72060 non-null  float64
 8   Set1_rms_y_z       72060 non-null  float64
 9   Set1_rms_z_x       72060 non-null  float64
 10  Set1_rms_x_y_z     72060 non-null  float64
 11  Set1_roll          72060 non-null  float64
 12  Set1_pitch         72060 non-null  float64
dtypes: float64(12), int64(1)
memory usage: 7.1 MB


### Computing for Set 2

In [14]:
dataset_df['Set2_rms_x_y'] = ((dataset_df['Left Lower Leg x']**2 + dataset_df['Left Lower Leg y']**2) / 2)**0.5
dataset_df['Set2_rms_y_z'] = ((dataset_df['Left Lower Leg y']**2 + dataset_df['Left Lower Leg z']**2) / 2)**0.5
dataset_df['Set2_rms_z_x'] = ((dataset_df['Left Lower Leg x']**2 + dataset_df['Left Lower Leg z']**2) / 2)**0.5
dataset_df['Set2_rms_x_y_z'] = ((dataset_df['Left Lower Leg x']**2 + dataset_df['Left Lower Leg y']**2 + dataset_df['Left Lower Leg z']**2) / 3)**0.5
dataset_df['Set2_roll'] = np.roll(
    180 * np.arctan2(dataset_df['Left Lower Leg y'],
    np.sqrt(dataset_df['Left Lower Leg x']**2 + dataset_df['Left Lower Leg z']**2)) / np.pi, 1
)
dataset_df['Set2_pitch'] = 180 * np.arctan2(
    dataset_df['Left Lower Leg x'],
    np.sqrt(dataset_df['Left Lower Leg y']**2 + dataset_df['Left Lower Leg z']**2)
    ) / np.pi

In [15]:
dataset_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72060 entries, 0 to 72059
Data columns (total 19 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Frame              72060 non-null  int64  
 1   Right Lower Leg x  72060 non-null  float64
 2   Right Lower Leg y  72060 non-null  float64
 3   Right Lower Leg z  72060 non-null  float64
 4   Left Lower Leg x   72060 non-null  float64
 5   Left Lower Leg y   72060 non-null  float64
 6   Left Lower Leg z   72060 non-null  float64
 7   Set1_rms_x_y       72060 non-null  float64
 8   Set1_rms_y_z       72060 non-null  float64
 9   Set1_rms_z_x       72060 non-null  float64
 10  Set1_rms_x_y_z     72060 non-null  float64
 11  Set1_roll          72060 non-null  float64
 12  Set1_pitch         72060 non-null  float64
 13  Set2_rms_x_y       72060 non-null  float64
 14  Set2_rms_y_z       72060 non-null  float64
 15  Set2_rms_z_x       72060 non-null  float64
 16  Set2_rms_x_y_z     720

In [16]:
boning_df['label'] = 0
slicing_df['label'] = 1
labelDf = pd.concat([boning_df['label'], slicing_df['label']], ignore_index=True)
dataset_df['label'] = labelDf
dataset_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72060 entries, 0 to 72059
Data columns (total 20 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Frame              72060 non-null  int64  
 1   Right Lower Leg x  72060 non-null  float64
 2   Right Lower Leg y  72060 non-null  float64
 3   Right Lower Leg z  72060 non-null  float64
 4   Left Lower Leg x   72060 non-null  float64
 5   Left Lower Leg y   72060 non-null  float64
 6   Left Lower Leg z   72060 non-null  float64
 7   Set1_rms_x_y       72060 non-null  float64
 8   Set1_rms_y_z       72060 non-null  float64
 9   Set1_rms_z_x       72060 non-null  float64
 10  Set1_rms_x_y_z     72060 non-null  float64
 11  Set1_roll          72060 non-null  float64
 12  Set1_pitch         72060 non-null  float64
 13  Set2_rms_x_y       72060 non-null  float64
 14  Set2_rms_y_z       72060 non-null  float64
 15  Set2_rms_z_x       72060 non-null  float64
 16  Set2_rms_x_y_z     720

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  boning_df['label'] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  slicing_df['label'] = 1


## Step 3: Data pre-processing and Feature computation

In [17]:
dataset_df.head()

Unnamed: 0,Frame,Right Lower Leg x,Right Lower Leg y,Right Lower Leg z,Left Lower Leg x,Left Lower Leg y,Left Lower Leg z,Set1_rms_x_y,Set1_rms_y_z,Set1_rms_z_x,Set1_rms_x_y_z,Set1_roll,Set1_pitch,Set2_rms_x_y,Set2_rms_y_z,Set2_rms_z_x,Set2_rms_x_y_z,Set2_roll,Set2_pitch,label
0,0,0.219806,0.236238,0.03205,0.121231,-0.079631,0.02069,0.22817,0.168576,0.15707,0.187217,-5.331211,42.675967,0.102563,0.058177,0.086963,0.08459,-47.443073,55.836663,0
1,1,0.152939,0.516317,0.034023,0.094984,0.134901,0.073241,0.380771,0.365883,0.110788,0.311518,46.762893,16.466129,0.116662,0.108541,0.084812,0.104218,-32.922799,31.748534,0
2,2,0.009686,0.089548,-0.045127,0.028965,0.138562,0.047474,0.063689,0.070906,0.032636,0.058164,73.119439,5.516979,0.100096,0.103569,0.039324,0.086202,48.359265,11.186103,0
3,3,-0.052419,-0.036543,0.108497,-0.063305,0.215892,0.038873,0.045184,0.080954,0.085204,0.072698,62.732545,-24.601347,0.159086,0.155114,0.052529,0.131818,68.131727,-16.097374,0
4,4,0.065916,-0.276617,0.053206,0.077462,0.142185,0.04401,0.201075,0.199183,0.059899,0.167026,-16.871233,13.170542,0.114492,0.105246,0.062997,0.096874,71.011809,27.494177,0


In [18]:
# Function to compute the number of peaks in a group
def count_peaks(group, feature):
    peaks, _ = find_peaks(group[feature])
    return len(peaks)

columns = [ 'Right Lower Leg x', 'Right Lower Leg y', 'Right Lower Leg z',
       'Left Lower Leg x', 'Left Lower Leg y', 'Left Lower Leg z',
       'Set1_rms_x_y', 'Set1_rms_y_z', 'Set1_rms_z_x', 'Set1_rms_x_y_z',
       'Set1_roll', 'Set1_pitch', 'Set2_rms_x_y', 'Set2_rms_y_z',
       'Set2_rms_z_x', 'Set2_rms_x_y_z', 'Set2_roll', 'Set2_pitch' ]

myData = pd.DataFrame()

for feature in columns:
    # Calculate various statistics for every 60 records
    myData[f'Mean {feature}'] = dataset_df.groupby(dataset_df.index // 60)[feature].transform('mean')
    myData[f'Std {feature}'] = dataset_df.groupby(dataset_df.index // 60)[feature].transform('std')
    myData[f'Min {feature}'] = dataset_df.groupby(dataset_df.index // 60)[feature].transform('min')
    myData[f'Max {feature}'] = dataset_df.groupby(dataset_df.index // 60)[feature].transform('max')
    # Calculate number of peaks for each group
    peaks = dataset_df.groupby(dataset_df.index // 60).apply(lambda g: count_peaks(g, feature))
    # Align peaks with the original DataFrame size
    myData[f'Num Peaks {feature}'] = dataset_df.index.to_series().map(lambda x: peaks[x // 60])

    myData[f'AUC {feature}'] = np.cumsum(dataset_df[feature]) - np.concatenate(([0], np.cumsum(np.diff(dataset_df[feature])/2)), axis=0)



  myData[f'Num Peaks {feature}'] = dataset_df.index.to_series().map(lambda x: peaks[x // 60])
  myData[f'AUC {feature}'] = np.cumsum(dataset_df[feature]) - np.concatenate(([0], np.cumsum(np.diff(dataset_df[feature])/2)), axis=0)
  myData[f'Mean {feature}'] = dataset_df.groupby(dataset_df.index // 60)[feature].transform('mean')
  myData[f'Std {feature}'] = dataset_df.groupby(dataset_df.index // 60)[feature].transform('std')
  myData[f'Min {feature}'] = dataset_df.groupby(dataset_df.index // 60)[feature].transform('min')
  myData[f'Max {feature}'] = dataset_df.groupby(dataset_df.index // 60)[feature].transform('max')
  myData[f'Num Peaks {feature}'] = dataset_df.index.to_series().map(lambda x: peaks[x // 60])
  myData[f'AUC {feature}'] = np.cumsum(dataset_df[feature]) - np.concatenate(([0], np.cumsum(np.diff(dataset_df[feature])/2)), axis=0)


In [19]:
myData.head()

Unnamed: 0,Mean Right Lower Leg x,Std Right Lower Leg x,Min Right Lower Leg x,Max Right Lower Leg x,Num Peaks Right Lower Leg x,AUC Right Lower Leg x,Mean Right Lower Leg y,Std Right Lower Leg y,Min Right Lower Leg y,Max Right Lower Leg y,...,Min Set2_roll,Max Set2_roll,Num Peaks Set2_roll,AUC Set2_roll,Mean Set2_pitch,Std Set2_pitch,Min Set2_pitch,Max Set2_pitch,Num Peaks Set2_pitch,AUC Set2_pitch
0,0.025233,0.196336,-0.337957,0.685027,14,0.219806,0.00846,0.368985,-0.738636,0.801465,...,-83.112235,85.055449,15,-47.443073,-1.971529,40.533459,-85.119584,69.454562,16,55.836663
1,0.025233,0.196336,-0.337957,0.685027,14,0.406178,0.00846,0.368985,-0.738636,0.801465,...,-83.112235,85.055449,15,-87.626009,-1.971529,40.533459,-85.119584,69.454562,16,99.629262
2,0.025233,0.196336,-0.337957,0.685027,14,0.48749,0.00846,0.368985,-0.738636,0.801465,...,-83.112235,85.055449,15,-79.907777,-1.971529,40.533459,-85.119584,69.454562,16,121.09658
3,0.025233,0.196336,-0.337957,0.685027,14,0.466124,0.00846,0.368985,-0.738636,0.801465,...,-83.112235,85.055449,15,-21.662281,-1.971529,40.533459,-85.119584,69.454562,16,118.640944
4,0.025233,0.196336,-0.337957,0.685027,14,0.472872,0.00846,0.368985,-0.738636,0.801465,...,-83.112235,85.055449,15,47.909487,-1.971529,40.533459,-85.119584,69.454562,16,124.339345


In [20]:
myData.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72060 entries, 0 to 72059
Columns: 108 entries, Mean Right Lower Leg x to AUC Set2_pitch
dtypes: float64(90), int64(18)
memory usage: 59.4 MB


## Step 4: Training

### 1. Train-Test split (70/30)

In [21]:
X_train, X_test, y_train, y_test = train_test_split(
    myData,
    dataset_df['label'],
    test_size=0.3,
    random_state=1
)

### 2. SVM With 10-fold cross validation

In [22]:
clf = svm.SVC()
clf.fit(X_train, y_train)
scores = cross_val_score(clf, X_train, y_train, cv=10)
print(scores)

y_pred = clf.predict(X_test)
accuracy_score(y_test,y_pred)

[0.99861249 0.99762141 0.99881047 0.99801745 0.99841396 0.99722443
 0.99841396 0.99881047 0.9982157  0.99762094]


0.9978258858358775

### 3. 1 and 2 with hyper parameter tuning

In [25]:
hyper_parameters = {'C':[1, 10, 20]}
svc_model = svm.SVC()
clf = GridSearchCV(svc_model, hyper_parameters)
clf.fit(X_train, y_train)
scores = cross_val_score(clf, X_train, y_train, cv=10)
print(scores)

y_pred = clf.predict(X_test)
accuracy_score(y_test,y_pred)

[0.99940535 0.99861249 0.99960349 0.99861221 0.99960349 0.99781919
 0.99881047 0.99920698 0.99881047 0.99841396]


0.9988435562956796

### 4. 1 and 2 with hyper parameter tuning and 10 best features

In [27]:
X_new = SelectKBest(f_regression, k=10).fit_transform(myData, dataset_df['label'])
X_train, X_test, y_train, y_test = train_test_split(X_new, dataset_df['label'], test_size=0.3, random_state=1)
print(X_train.shape)
print(X_test.shape)

(50442, 10)
(21618, 10)


In [28]:
hyper_parameters = {'C':[1, 10, 20]}
svc_model = svm.SVC()
clf = GridSearchCV(svc_model, hyper_parameters)
clf.fit(X_train, y_train)
scores = cross_val_score(clf, X_train, y_train, cv=10)
print(scores)

y_pred = clf.predict(X_test)
accuracy_score(y_test,y_pred)

[0.99920714 0.99900892 0.99960349 0.99940523 0.99940523 0.99841396
 0.99940523 0.99960349 0.99940523 0.99920698]


0.9995374225182718

### 5. 1 and 2 with hyper parameter tuning and 10 principal components

In [29]:
pca = PCA(n_components=10).fit(myData)
X_train_pca = pca.fit_transform(myData)
print(X_train_pca.shape)

(72060, 10)


In [31]:
X_train, X_test, y_train, y_test = train_test_split(X_train_pca, dataset_df['label'], test_size=0.3, random_state=1)
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

(50442, 10)
(50442,)
(21618, 10)
(21618,)


In [32]:
hyper_parameters = {'C':[1, 10, 20]}
svc_model = svm.SVC()
clf = GridSearchCV(svc_model, hyper_parameters)
clf.fit(X_train, y_train)
scores = cross_val_score(clf, X_train, y_train, cv=10)
print(scores)

y_pred = clf.predict(X_test)
accuracy_score(y_test,y_pred)

[0.99980178 0.99940535 1.         0.99960349 0.99980174 0.99881047
 0.99980174 0.99980174 0.99960349 0.99940523]


0.9996299380146174

## SGD, Random Forest, MLP

In [33]:
X_train, X_test, y_train, y_test = train_test_split(
    myData,
    dataset_df['label'],
    test_size=0.3,
    random_state=1
)

#### SGD

In [34]:
clf = SGDClassifier(loss="hinge", max_iter=5)
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy_score(y_test,y_pred)



0.9423165880284948

#### Random Forest

In [35]:
clf = RandomForestClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy_score(y_test,y_pred)

0.9999074845036544

#### MLP

In [36]:
clf = MLPClassifier()
clf.fit(X_train, y_train)
y_pred = clf.predict(X_test)
accuracy_score(y_test,y_pred)

0.9998612267554815