---
## **Test the High Dimensional SVM Classifier**
---

<font size="2">Eudald Sangenis  
Creation: 7/13/2022  
Last Update: 7/13/2022  

The objective of this file is to test the High Dimensional SVM classifier:  

Parameters of the trained High Dimensional Dataset:  
- 6 magnitude signals  
- timpe/sample window 

Once this classifier is tested the next step is to verify the navigation solution.</font>

---
## **<font size="5">1. Libraries</font>**
---

In [39]:
import pickle
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import decomposition
from sklearn.metrics import r2_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

from sklearn.preprocessing import StandardScaler

---
## **<font size="5">2. Parameters to modify</font>**
---

<font size="2">Boolean variable:
* True: Predict the test datasets
* False: Import already a done prediction</font>

In [40]:
prediction = True

<font size="2">Path High Dimensional Datasets to test the classifier:</font>

In [41]:
type_dataset = 'test'       # train, test
data_dataset = '2023_08_30' # 2022_03_17, 2022_04_25, 2022_07_27, 2022_08_01
bias_dataset = True        # True (no Bias), False (Bias).
num_df_used_to_train = 4    # 2 (2022_03_17 & 2022_04_25), 3 (2022_07_27 or 2022_08_01)
num_window = 400             # 50, 100, 200, 400
exp = 1000

In [42]:
# Folder to read the labeled and HD datasets:
dataset_path = 'G:\\Shared drives\\NIST-Pedestrian Activity Classification\\motion classification\\6_HD_6sig_SVM_Paper_Results\\HD_datasets\\'+data_dataset+'\\'

<font size="2">Number components of PCA</font>

In [43]:
num_components = 6

<font size="2">Path to the High Dimensional Classifier:</font>

In [44]:
# Folder classifier:
folder_clf = 'G:\\Shared drives\\NIST-Pedestrian Activity Classification\\motion classification\\6_HD_6sig_SVM_Paper_Results\\'
# Classifier Name:
name_clf = str(num_window)+'HD_6sig_pca'+str(num_components)+'_no_bias_balanced_clf.sav'

if prediction == True:
    support = pickle.load(open(folder_clf + name_clf, 'rb'))    # read clf model
    print('Classifier Loaded!')

Classifier Loaded!


<font size="2">Path to the predicted labels dataset:</font>

In [45]:
folder_label_predicted = 'G:\\Shared drives\\NIST-Pedestrian Activity Classification\\motion classification\\6_HD_6sig_SVM_Paper_Results\\'
l_predicted_name = data_dataset + '_HD'+str(num_window)+'_6sig_pca'+str(num_components)+'_exp'+str(exp)+'_Y_predicted'

---
## **<font size="5">3. Load Trained Model Dataset:</font>**
---

In [46]:
# type_dataset_model = 'train'      # train, test
# data_dataset_1 = '2022_03_17'
# data_dataset_2 = '2022_04_25'
# data_dataset_3 = '2022_07_27'
# data_dataset_4 = '2022_08_01'

# dataset_path_1 = 'G:\\Shared drives\\NIST-Pedestrian Activity Classification\\motion classification\\6_HD_6sig_SVM_Paper_Results\\HD_datasets\\'+data_dataset_1+'\\'
# dataset_path_2 = 'G:\\Shared drives\\NIST-Pedestrian Activity Classification\\motion classification\\6_HD_6sig_SVM_Paper_Results\\HD_datasets\\'+data_dataset_2+'\\'
# dataset_path_3 = 'G:\\Shared drives\\NIST-Pedestrian Activity Classification\\motion classification\\6_HD_6sig_SVM_Paper_Results\\HD_datasets\\'+data_dataset_3+'\\'
# dataset_path_4 = 'G:\\Shared drives\\NIST-Pedestrian Activity Classification\\motion classification\\6_HD_6sig_SVM_Paper_Results\\HD_datasets\\'+data_dataset_4+'\\'
# print('Reading datasets...')
# # Load the HD dataset
# if bias_dataset == False:
#     if num_df_used_to_train == 2:
#         df_1 = pd.read_csv(dataset_path_1 + 'HD_'+str(num_window)+'_6sig_'+type_dataset_model+'.csv')
#         df_2 = pd.read_csv(dataset_path_2 + 'HD_'+str(num_window)+'_6sig_'+type_dataset_model+'.csv')

#     if num_df_used_to_train == 3: 
#         df_1 = pd.read_csv(dataset_path_1 + 'HD_'+str(num_window)+'_6sig_'+type_dataset_model+'.csv')
#         df_2 = pd.read_csv(dataset_path_2 + 'HD_'+str(num_window)+'_6sig_'+type_dataset_model+'.csv')
#         df_3 = pd.read_csv(dataset_path_3 + 'HD_'+str(num_window)+'_6sig_'+type_dataset_model+'.csv')
    
#     if num_df_used_to_train == 4: 
#         df_1 = pd.read_csv(dataset_path_1 + 'HD_'+str(num_window)+'_6sig_'+type_dataset_model+'.csv')
#         df_2 = pd.read_csv(dataset_path_2 + 'HD_'+str(num_window)+'_6sig_'+type_dataset_model+'.csv')
#         df_3 = pd.read_csv(dataset_path_3 + 'HD_'+str(num_window)+'_6sig_'+type_dataset_model+'.csv')
#         df_4 = pd.read_csv(dataset_path_4 + 'HD_'+str(num_window)+'_6sig_'+type_dataset_model+'.csv')

# if bias_dataset == True:
#     if num_df_used_to_train == 2:
#         df_1 = pd.read_csv(dataset_path_1 + 'HD_'+str(num_window)+'_6sig_'+type_dataset_model+'_no_Bias.csv')
#         df_2 = pd.read_csv(dataset_path_2 + 'HD_'+str(num_window)+'_6sig_'+type_dataset_model+'_no_Bias.csv')

#     if num_df_used_to_train == 3: 
#         df_1 = pd.read_csv(dataset_path_1 + 'HD_'+str(num_window)+'_6sig_'+type_dataset_model+'_no_Bias.csv')
#         df_2 = pd.read_csv(dataset_path_2 + 'HD_'+str(num_window)+'_6sig_'+type_dataset_model+'_no_Bias.csv')
#         df_3 = pd.read_csv(dataset_path_3 + 'HD_'+str(num_window)+'_6sig_'+type_dataset_model+'_no_Bias.csv')
    
#     if num_df_used_to_train == 4: 
#         df_1 = pd.read_csv(dataset_path_1 + 'HD_'+str(num_window)+'_6sig_'+type_dataset_model+'_no_Bias.csv')
#         df_2 = pd.read_csv(dataset_path_2 + 'HD_'+str(num_window)+'_6sig_'+type_dataset_model+'_no_Bias.csv')
#         df_3 = pd.read_csv(dataset_path_3 + 'HD_'+str(num_window)+'_6sig_'+type_dataset_model+'_no_Bias.csv')
#         df_4 = pd.read_csv(dataset_path_4 + 'HD_'+str(num_window)+'_6sig_'+type_dataset_model+'_no_Bias.csv')

# print('Reading done!')

# df_1 = df_1.drop(['Unnamed: 0'], axis=1)
# df_2 = df_2.drop(['Unnamed: 0'], axis=1)
# if num_df_used_to_train == 3: 
#     df_3 = df_3.drop(['Unnamed: 0'], axis=1)
# if num_df_used_to_train == 4: 
#     df_3 = df_3.drop(['Unnamed: 0'], axis=1)
#     df_4 = df_4.drop(['Unnamed: 0'], axis=1)
    
# if num_df_used_to_train == 2: df = pd.concat([df_1, df_2], axis=0)
# if num_df_used_to_train == 3: df = pd.concat([df_1, df_2, df_3], axis=0)
# if num_df_used_to_train == 4: df = pd.concat([df_1, df_2, df_3, df_4], axis=0)

# del df_1, df_2
# if num_df_used_to_train == 3: del df_3
# if num_df_used_to_train == 4: del df_3, df_4

# df = df.reset_index().drop('index', axis=1)

# print('Shape concat dfs: ')
# print(df.shape)
# df

In [47]:
# X_model = df.iloc[:,:-1]
# del df                      # delete variable to reduce memory
# X_model.head(2)

In [48]:
## STANDARIZE DATA
# scaler_model = StandardScaler()
# scaler_model.fit(X_model)                       # fit with dataset that was used to train the model
# X_norm_model = scaler_model.transform(X_model)  # transform the new datset
# X_norm_model = pd.DataFrame(X_norm_model, columns=X_model.columns)
# del X_model                         # delete variable to reduce memory
# X_norm_model

---
## **<font size="5">3. Load Test Dataset:</font>**
---

In [49]:
print('Reading dataset ' + data_dataset + ' ...')
# Load the HD dataset
    # if bias_dataset == False: df = pd.read_csv(dataset_path + 'HD_'+str(num_window)+'_6sig_'+type_dataset+'.csv')
    # if bias_dataset == True: df = pd.read_csv(dataset_path + 'HD_'+str(num_window)+'_6sig_'+type_dataset+'_no_Bias.csv')

# Only for unlabeled datasets for navigation solution (GOOD CODE above):
dataset_path = 'G:\\Shared drives\\NIST-Pedestrian Activity Classification\\motion classification\\6_HD_6sig_SVM_Paper_Results\\HD_datasets\\'+data_dataset+'\\'
df = pd.read_csv(dataset_path + 'HD_'+str(num_window)+'_6sig_exp'+str(exp)+'_no_Bias.csv')

print('Reading done!')

df = df.drop(['Unnamed: 0'], axis=1)

print('Shapes '+data_dataset+': ')
print(df.shape)

Reading dataset 2023_08_30 ...
Reading done!
Shapes 2023_08_30: 
(61309, 2406)


In [50]:
print('Test dataset:')
df.head(2)

Test dataset:


Unnamed: 0,acc1_t400,acc2_t400,acc3_t400,gyro1_t400,gyro2_t400,gyro3_t400,acc1_t399,acc2_t399,acc3_t399,gyro1_t399,...,acc3_t1,gyro1_t1,gyro2_t1,gyro3_t1,acc1_t0,acc2_t0,acc3_t0,gyro1_t0,gyro2_t0,gyro3_t0
0,0.006228,0.008727,-0.004955,-0.004336,0.000138,0.000926,0.006411,0.009501,4.6e-05,-0.005277,...,-0.003661,-0.008725,0.005248,-0.000339,-0.00024,-0.002519,-0.002704,-0.008894,0.005561,0.001362
1,0.006411,0.009501,4.6e-05,-0.005277,0.000188,0.000555,4.5e-05,0.004815,3.6e-05,-0.003813,...,-0.002704,-0.008894,0.005561,0.001362,-0.00353,0.002462,0.000393,-0.008207,0.004986,0.00088


In [51]:
X = df

---
## **<font size="5">4. Normalize data:</font>**
---

In [52]:
## STANDARIZE DATA
# scaler_train = StandardScaler()
# scaler_train.fit(X_model)           # fit with dataset that was used to train the model
# del X_model                         # delete variable to reduce memory

scaler_model = pickle.load(open('G:\\Shared drives\\NIST-Pedestrian Activity Classification\\motion classification\\6_HD_6sig_SVM_Paper_Results\\'+str(num_window)+'HD_6sig_pca'+str(num_components)+'_scaler.pkl','rb'))

X_norm = scaler_model.transform(X)  # transform the new datset
X_norm = pd.DataFrame(X_norm, columns = df.columns)
X_norm.head(2)

Unnamed: 0,acc1_t400,acc2_t400,acc3_t400,gyro1_t400,gyro2_t400,gyro3_t400,acc1_t399,acc2_t399,acc3_t399,gyro1_t399,...,acc3_t1,gyro1_t1,gyro2_t1,gyro3_t1,acc1_t0,acc2_t0,acc3_t0,gyro1_t0,gyro2_t0,gyro3_t0
0,0.174429,0.052085,0.127753,0.004046,0.010636,0.043845,0.174565,0.052819,0.13195,0.003484,...,0.12884,0.001428,0.01226,0.04289,0.169616,0.041374,0.129643,0.001327,0.01236,0.044176
1,0.174565,0.052819,0.13195,0.003484,0.010652,0.043565,0.169826,0.048373,0.131942,0.004358,...,0.129643,0.001327,0.01236,0.044176,0.167167,0.0461,0.132242,0.001737,0.012177,0.043811


---
## **<font size="5">5. PCA:</font>**
---

In [53]:
# pca_acc_x = decomposition.PCA(n_components = num_components).fit(X_norm_model.iloc[:,0::6])   # fit with the normalization of the model trained dataset
# pca_acc_y = decomposition.PCA(n_components = num_components).fit(X_norm_model.iloc[:,1::6])
# pca_acc_z = decomposition.PCA(n_components = num_components).fit(X_norm_model.iloc[:,2::6])
# pca_gyr_x = decomposition.PCA(n_components = num_components).fit(X_norm_model.iloc[:,3::6])
# pca_gyr_y = decomposition.PCA(n_components = num_components).fit(X_norm_model.iloc[:,4::6])
# pca_gyr_z = decomposition.PCA(n_components = num_components).fit(X_norm_model.iloc[:,5::6])
# del X_norm_model # delete variable to reduce memory

pca_acc_x = pickle.load(open('G:\\Shared drives\\NIST-Pedestrian Activity Classification\\motion classification\\6_HD_6sig_SVM_Paper_Results\\'+str(num_window)+'HD_6sig_pca'+str(num_components)+'_PCA_acc_x.pkl','rb'))
pca_acc_y = pickle.load(open('G:\\Shared drives\\NIST-Pedestrian Activity Classification\\motion classification\\6_HD_6sig_SVM_Paper_Results\\'+str(num_window)+'HD_6sig_pca'+str(num_components)+'_PCA_acc_y.pkl','rb'))
pca_acc_z = pickle.load(open('G:\\Shared drives\\NIST-Pedestrian Activity Classification\\motion classification\\6_HD_6sig_SVM_Paper_Results\\'+str(num_window)+'HD_6sig_pca'+str(num_components)+'_PCA_acc_z.pkl','rb'))
pca_gyr_x = pickle.load(open('G:\\Shared drives\\NIST-Pedestrian Activity Classification\\motion classification\\6_HD_6sig_SVM_Paper_Results\\'+str(num_window)+'HD_6sig_pca'+str(num_components)+'_PCA_gyr_x.pkl','rb'))
pca_gyr_y = pickle.load(open('G:\\Shared drives\\NIST-Pedestrian Activity Classification\\motion classification\\6_HD_6sig_SVM_Paper_Results\\'+str(num_window)+'HD_6sig_pca'+str(num_components)+'_PCA_gyr_y.pkl','rb'))
pca_gyr_z = pickle.load(open('G:\\Shared drives\\NIST-Pedestrian Activity Classification\\motion classification\\6_HD_6sig_SVM_Paper_Results\\'+str(num_window)+'HD_6sig_pca'+str(num_components)+'_PCA_gyr_z.pkl','rb'))

In [54]:
# Project data to its dimensional reduction space
X_projected_acc_x = pca_acc_x.transform(X_norm.iloc[:,0::6])
print('The shape of the Linear Acceleration X projected training data is:' + str(X_projected_acc_x.shape))
X_projected_acc_y = pca_acc_y.transform(X_norm.iloc[:,1::6])
print('The shape of the Linear Acceleration Y projected training data is:' + str(X_projected_acc_y.shape))
X_projected_acc_z = pca_acc_z.transform(X_norm.iloc[:,2::6])
print('The shape of the Linear Acceleration Z projected training data is:' + str(X_projected_acc_z.shape))

X_projected_gyr_x = pca_gyr_x.transform(X_norm.iloc[:,3::6])
print('The shape of the Angular Velocity X projected training data is:' + str(X_projected_gyr_x.shape))
X_projected_gyr_y = pca_gyr_y.transform(X_norm.iloc[:,4::6])
print('The shape of the Angular Velocity Y projected training data is:' + str(X_projected_gyr_y.shape))
X_projected_gyr_z = pca_gyr_z.transform(X_norm.iloc[:,5::6])
print('The shape of the Angular Velocity Z projected training data is:' + str(X_projected_gyr_z.shape))

# Name of the columns of the pca dataset
colnames = []
for i in range(1,6*num_components+1):
    colnames = np.concatenate((colnames, [f'comp_{i}']))
colnames

X_projected = pd.concat([pd.DataFrame(X_projected_acc_x),pd.DataFrame(X_projected_acc_y),pd.DataFrame(X_projected_acc_z), pd.DataFrame(X_projected_gyr_x), pd.DataFrame(X_projected_gyr_y), pd.DataFrame(X_projected_gyr_z)], axis = 1)
X_projected.columns = colnames

The shape of the Linear Acceleration X projected training data is:(61309, 6)
The shape of the Linear Acceleration Y projected training data is:(61309, 6)
The shape of the Linear Acceleration Z projected training data is:(61309, 6)
The shape of the Angular Velocity X projected training data is:(61309, 6)
The shape of the Angular Velocity Y projected training data is:(61309, 6)
The shape of the Angular Velocity Z projected training data is:(61309, 6)


In [55]:
#X_projected['label'] = Y
print('The projected test data in the number of components is:')
X_projected

The projected test data in the number of components is:


Unnamed: 0,comp_1,comp_2,comp_3,comp_4,comp_5,comp_6,comp_7,comp_8,comp_9,comp_10,...,comp_27,comp_28,comp_29,comp_30,comp_31,comp_32,comp_33,comp_34,comp_35,comp_36
0,-3.041144,0.010439,-1.429861,-0.006469,-0.431791,-0.005011,0.022890,0.215301,0.001803,-0.257503,...,-0.161913,0.008275,-0.087532,-0.003130,0.077658,-0.017486,0.002601,0.320713,-0.706081,-0.006590
1,-3.041044,0.010327,-1.429521,-0.006521,-0.431562,-0.005039,0.022487,0.215336,0.001956,-0.257789,...,-0.161777,0.008196,-0.087814,-0.003327,0.077292,-0.017487,0.002681,0.320536,-0.706264,-0.006723
2,-3.040952,0.010106,-1.429302,-0.006451,-0.431458,-0.004987,0.022092,0.215438,0.002124,-0.258099,...,-0.161634,0.008148,-0.088057,-0.003479,0.076919,-0.017517,0.002711,0.320402,-0.706442,-0.006763
3,-3.040881,0.009985,-1.429433,-0.006497,-0.431690,-0.005005,0.021943,0.215483,0.002607,-0.258324,...,-0.161498,0.008050,-0.088326,-0.003673,0.076562,-0.017554,0.002722,0.320158,-0.706640,-0.006757
4,-3.040810,0.010124,-1.429556,-0.006840,-0.431919,-0.005208,0.021906,0.215224,0.003236,-0.258245,...,-0.161360,0.007901,-0.088578,-0.003910,0.076214,-0.017599,0.002716,0.319843,-0.706848,-0.006706
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61304,-2.940278,0.000547,-1.372793,0.002635,-0.416206,-0.006250,-0.002781,0.249860,0.003420,-0.272266,...,-0.162524,0.008671,-0.082509,0.006055,0.077325,0.001462,0.000461,0.324660,-0.710441,-0.000484
61305,-2.940262,0.000763,-1.372653,0.002402,-0.415931,-0.006426,-0.002956,0.249730,0.003367,-0.272079,...,-0.162351,0.008700,-0.082447,0.005847,0.077375,0.001447,0.000491,0.324564,-0.710473,-0.000592
61306,-2.940245,0.001063,-1.372506,0.002061,-0.415648,-0.006668,-0.003158,0.249855,0.003277,-0.272110,...,-0.162186,0.008715,-0.082430,0.005621,0.077439,0.001385,0.000458,0.324361,-0.710524,-0.000598
61307,-2.940235,0.001317,-1.372517,0.001771,-0.415508,-0.006867,-0.003094,0.250175,0.003517,-0.272310,...,-0.162015,0.008732,-0.082394,0.005396,0.077510,0.001388,0.000519,0.324109,-0.710585,-0.000746


---
## **<font size="5">6. Prediction Model</font>**
---

In [56]:
X = X_projected

In [57]:
if prediction == True:
    print('Predicting...')
    Y_predicted = support.predict(X)
    print('Prediction done!')
    Y_predicted_df = pd.DataFrame(Y_predicted, columns = ['predicted_l'])
    Y_predicted_df.to_csv(folder_label_predicted + l_predicted_name)
    Y_predicted = pd.read_csv(folder_label_predicted + l_predicted_name)
else:
    Y_predicted = pd.read_csv(folder_label_predicted + l_predicted_name)
    print('L Predicted Loaded!')

Predicting...
Prediction done!
