## Get independent csv files for the linear and angular kinematics. 
    
- The shape of the csv will be one trial after another, not in order of trials nor participants. The first row of each trial will contain the information regarding the subject_number, trial_number and state ("on", "off")
- Input: output_kinematics_dataframe.csv
- Outputs:
  - linear_kinematics_csv.csv
  - angular_kinematics_csv.csv

In [2]:
%matplotlib inline

In [None]:
# https://github.com/nrg-projects/sara-on-off-boari/blob/main/classification_kinematics.ipynb

In [None]:
import pandas as pd
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import zscore
import numpy as np
from sklearn.svm import SVC
from sklearn.experimental import enable_halving_search_cv
from sklearn.model_selection import LeaveOneGroupOut, HalvingGridSearchCV
from sklearn.metrics import accuracy_score, roc_curve, roc_auc_score, confusion_matrix, ConfusionMatrixDisplay
from sklearn.inspection import permutation_importance
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import GradientBoostingClassifier
import xgboost as xgb
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from IPython.display import display
from numpy import ravel

In [None]:
df = pd.read_csv('output_kinematics_dataframe.csv',index_col=0).convert_dtypes()
df.head(20)

### Missing values

- Identify the indexes with missing values
- Print the subject number, state and trial of this missing data
- Drop these rows and reset indexes



In [None]:
missing_rows = df.isnull().sum(axis=1)
missing_rows_filtered = missing_rows[missing_rows != 0]
print(missing_rows_filtered.index)

In [None]:
import pandas as pd
import pathlib as path
import numpy as np
import warnings
import re
import xlrd

In [None]:
warnings.simplefilter(action='ignore', category=FutureWarning)

In [None]:
root_folder = path.Path('C3Dfiles')

In [None]:
angles_header = ['Trunk_Lateral_Flexion', 'Trunk_Rotation', 'Trunk_Flx/Extension',
                'Left_Shoulder_Add/Abduction', 'Left_Shoulder_Int/External_Rotation', 'Left_Shoulder_Flx/Extension',
                'Right_Shoulder_Add/Abduction', 'Right_Shoulder_Int/External_Rotation', 'Right_Shoulder_Flx/Extension',
                'Left_Elbow_Add/Abduction', 'Left_Elbow_Pron/Supination', 'Left_Elbow_Flx/Extension',
                'Right_Elbow_Add/Abduction', 'Right_Elbow_Pron/Supination', 'Right_Elbow_Flx/Extension',
                'Left_Pelvic_Obliquity', 'Left_Pelvic_Rotation', 'Left_Pelvic_Tilt',
                'Right_Pelvic_Obliquity', 'Right_Pelvic_Rotation', 'Right_Pelvic_Tilt',
                'Left_Hip_Add/Abduction', 'Left_Hip_Int/External_Rotation', 'Left_Hip_Flexion/Extension',
                'Right_Hip_Add/Abduction', 'Right_Hip_Int/External_Rotation', 'Right_Hip_Flexion/Extension',
                'Left_Knee_Add/Abduction', 'Left_Knee_Int/External_Rotation', 'Left_Knee_Flx/Extension',
                'Right_Knee_Add/Abduction', 'Right_Knee_Int/External_Rotation', 'Right_Knee_Flx/Extension',
                'Left_Ankle_Inv/Eversion', 'Left_Ankle_Add/Abduction', 'Left_Ankle_Dorsi/Plantarflexion',
                'Right_Ankle_Inv/Eversion', 'Right_Ankle_Add/Abduction', 'Right_Ankle_Dorsi/Plantarflexion',
                'Left_Foot_Inv/Eversion', 'Left_Foot_Int/External_Rotation', 'Left_Foot_DF/Plantarflexion',
                'Right_Foot_Inv/Eversion', 'Right_Foot_Int/External_Rotation', 'Right_Foot_DF/Plantarflexion']

# Replace spaces and slashes with underscores in angles_header
angles_header = [header.replace(' ', '_').replace('/', '_') for header in angles_header]

In [None]:
trajectories_header = ['CoM_AP', 'CoM_Vertical', 'CoM_ML', 'CLAV_AP', 'CLAV_Vertical', 'CLAV_ML', 'STRN_AP', 
                       'STRN_Vertical', 'STRN_ML', 'C7_AP', 'C7_Vertical', 'C7_ML', 'T10_AP', 'T10_Vertical',
                       'T10_ML', 'RSHO_AP', 'RSHO_Vertical', 'RSHO_ML', 'LSHO_AP', 'LSHO_Vertical', 'LSHO_ML',
                       'RUPA_AP', 'RUPA_Vertical', 'RUPA_ML', 'REL_AP', 'REL_Vertical', 'REL_ML', 'REM_AP',
                       'REM_Vertical', 'REM_ML', 'RFRA_AP', 'RFRA_Vertical', 'RFRA_ML', 'RWL_AP', 'RWL_Vertical',
                       'RWL_ML', 'RWM_AP', 'RWM_Vertical', 'RWM_ML', 'LUPA_AP', 'LUPA_Vertical', 'LUPA_ML', 'LEL_AP',
                       'LEL_Vertical', 'LEL_ML', 'LEM_AP', 'LEM_Vertical', 'LEM_ML', 'LFRA_AP', 'LFRA_Vertical',
                       'LFRA_ML', 'LWL_AP', 'LWL_Vertical', 'LWL_ML', 'LWM_AP', 'LWM_Vertical', 'LWM_ML', 'R.ASIS_AP',
                       'R.ASIS_Vertical', 'R.ASIS_ML', 'L.ASIS_AP', 'L.ASIS_Vertical', 'L.ASIS_ML', 'R.PSIS_AP',
                       'R.PSIS_Vertical', 'R.PSIS_ML', 'L.PSIS_AP', 'L.PSIS_Vertical', 'L.PSIS_ML', 'R.GTR_AP',
                       'R.GTR_Vertical', 'R.GTR_ML', 'R.Knee_AP', 'R.Knee_Vertical', 'R.Knee_ML', 'R.HF_AP', 
                       'R.HF_Vertical', 'R.HF_ML', 'R.TT_AP', 'R.TT_Vertical', 'R.TT_ML', 'R.Ankle_AP',
                       'R.Ankle_Vertical', 'R.Ankle_ML', 'R.Heel_AP', 'R.Heel_Vertical', 'R.Heel_ML', 'R.MT1_AP',
                       'R.MT1_Vertical', 'R.MT1_ML', 'R.MT5_AP', 'R.MT5_Vertical', 'R.MT5_ML', 'L.GTR_AP',
                       'L.GTR_Vertical', 'L.GTR_ML', 'L.Knee_AP', 'L.Knee_Vertical', 'L.Knee_ML', 'L.HF_AP',
                       'L.HF_Vertical', 'L.HF_ML', 'L.TT_AP', 'L.TT_Vertical', 'L.TT_ML', 'L.Ankle_AP', 
                       'L.Ankle_Vertical', 'L.Ankle_ML', 'L.Heel_AP', 'L.Heel_Vertical', 'L.Heel_ML', 'L.MT1_AP',
                       'L.MT1_Vertical', 'L.MT1_ML', 'L.MT5_AP', 'L.MT5_Vertical', 'L.MT5_ML', 'R.Knee.Medial_AP',
                       'R.Knee.Medial_Vertical', 'R.Knee.Medial_ML', 'R.Ankle.Medial_AP', 'R.Ankle.Medial_Vertical',
                       'R.Ankle.Medial_ML', 'R.MT2_AP', 'R.MT2_Vertical', 'R.MT2_ML', 'L.Knee.Medial_AP',
                       'L.Knee.Medial_Vertical', 'L.Knee.Medial_ML', 'L.Ankle.Medial_AP', 'L.Ankle.Medial_Vertical',
                       'L.Ankle.Medial_ML', 'L.MT2_AP', 'L.MT2_Vertical', 'L.MT2_ML']

# Replace spaces and slashes with underscores in trajectories_header
trajectories_header = [trajectory.replace(' ', '_').replace('/', '_') for trajectory in trajectories_header]

In [None]:
df_angles = pd.DataFrame()
df_spatiotemporal = pd.DataFrame()
df_trajectories = pd.DataFrame()

# Linear kinematics: Read the data and turn in into a csv

In [None]:
# Load all folders
for subject in root_folder.glob('**/*'):
    
    #string with subject number
    subject_str = str(subject)
    
    # ignore files, consider just directories
    if subject.is_dir() and "SUB" in subject_str:
        
        # save strings of subject number and condition and load this data as columns of a new dataframe
        # this dataframe will not be relevant, itself, is just used as an organizing tool
        subject_number = int(subject_str[subject_str.find("SUB") + 3 : subject_str.find("SUB") + 5])
        condition = [subject_str[subject_str.find("SUB") + 6 :]]
        df_num_state = pd.DataFrame({'subject_number':subject_number, 'state':condition})
       

        


        # Repeat process for spatiotemporal parameters
        for csv_spatiotemporal in subject.glob('**/*_temporal_distance.txt'):

            # just like before we save the number of the trial in another dataframe
            trial_str = str(csv_spatiotemporal)
            trial_number = int(trial_str[trial_str.find("_walk_") + 6 : trial_str.find("_temporal_distance")])
            df_trial_num = pd.Series(trial_number, name='trial_number')

            # read the trajectories csv seting the correct separator. 
            # Drop unuseful columns. Fix format. Reset index after. 
            csv_spat = pd.read_csv(csv_spatiotemporal, sep = '\t')
            csv_spat.drop([1, 2, 3], inplace=True)
            csv_spat.drop(csv_spat.columns[[0]], axis = 1, inplace = True)
            csv_spat.reset_index(drop=True, inplace=True)
            csv_spat.columns = csv_spat.iloc[0].str.lower()
            csv_spat = csv_spat[1:].reset_index(drop=True)

            # concatenate the three created databases: subject number, state, trial and spatiotemporal parameters
            # the name of this database is df_spatiotemporal
            df_spat = pd.concat([df_num_state, df_trial_num, csv_spat], axis=1)
            df_spatiotemporal = pd.concat([df_spatiotemporal, df_spat], ignore_index=True)




        
        for csv_trajectories in subject.glob('**/*_linear_kinematics.csv'):
            print(csv_trajectories)
                # just like before we save the number of the trial in another dataframe
            trial_str = str(csv_trajectories)
            trial_number = int(trial_str[trial_str.find("_walk_") + 6 : trial_str.find("_linear_kinematics")])
            df_trial_num = pd.Series(trial_number, name='trial_number')
        
            #read file and delete three first rows (missing info) and the fourth (units).
            csv_traj = pd.read_excel(csv_trajectories, engine='xlrd')
            csv_traj.drop([0, 1, 2, 3, 4], inplace=True)
        
            #delete time and frame columns
            csv_traj.drop(csv_traj.columns[[0, 1]], axis = 1, inplace = True)
        
            #set first row as dataframe header
            #set all headers to lower letters
            csv_traj = csv_traj[0:]
            csv_traj.columns = trajectories_header
            csv_traj.columns = csv_traj.columns.str.lower()
            csv_traj.reset_index(drop=True, inplace=True)
        
            #calculate range of motion for each joint and set values as a df
            #rom = csv_traj.max() - csv_traj.min()
            #df_rom = pd.DataFrame([rom.values], columns=csv_traj.columns)
        
            # concatenate subject number, state and roms
            #df_trial = pd.concat([df_num_state, df_trial_num, df_rom], axis=1)
            df_trial = pd.concat([df_num_state, df_trial_num, csv_traj], axis=1)
            df_trajectories = pd.concat([df_trajectories, df_trial], ignore_index=True)
            

In [None]:
df_trajectories.shape

In [None]:
df_trajectories.to_csv('linear_kinematics_csv.csv', index=True)

## Angular kinematics

In [None]:
# Load all folders
for subject in root_folder.glob('**/*'):
    
    #string with subject number
    subject_str = str(subject)
    
    # ignore files, consider just directories
    if subject.is_dir() and "SUB" in subject_str:
        
        # save strings of subject number and condition and load this data as columns of a new dataframe
        # this dataframe will not be relevant, itself, is just used as an organizing tool
        subject_number = int(subject_str[subject_str.find("SUB") + 3 : subject_str.find("SUB") + 5])
        condition = [subject_str[subject_str.find("SUB") + 6 :]]
        df_num_state = pd.DataFrame({'subject_number':subject_number, 'state':condition})
       

        # consider all (but only) csv_angles in the subject directory
        for csv_angles in subject.glob('**/*_angular_kinematics.csv'):
            
           # just like before with the subject number, we save the number of the trial in another dataframe
           trial_str = str(csv_angles)
           trial_number = int(trial_str[trial_str.find("_walk_") + 6 : trial_str.find("_angular_kinematics")])
           df_trial_num = pd.Series(trial_number, name='trial_number')

           #read csv_angles file and delete three first rows (missing info) and the fourth row (units)
           csv_ang = pd.read_excel(csv_angles, engine='xlrd')
           csv_ang.drop([0, 1, 2, 3, 4], inplace=True)

           #delete time and frame columns
           csv_ang.drop(csv_ang.columns[[0, 1]], axis = 1, inplace = True)

           #set first row as dataframe header
           #set all headers to lower letters and replace spaces and slashes with "_"
           csv_ang = csv_ang[0:]
           csv_ang.columns = angles_header
           csv_ang.columns = csv_ang.columns.str.lower()
           csv_ang.reset_index(drop=True, inplace=True)

           #calculate range of motion (ROM) for each joint and set values as a df
           #the ROM is calculated as the maximum registered angle minus the minimum one
           #rom = csv_ang.max() - csv_ang.min()
           #df_rom = pd.DataFrame([rom.values], columns=csv_ang.columns)

           # concatenate the three created databases: subject number, state and ROMS
           # the name of this database is df_angles
           df_trial = pd.concat([df_num_state, df_trial_num, csv_ang], axis=1)
           df_angles = pd.concat([df_angles, df_trial], ignore_index=True)


        # Repeat process for spatiotemporal parameters
        for csv_spatiotemporal in subject.glob('**/*_temporal_distance.txt'):

            # just like before we save the number of the trial in another dataframe
            trial_str = str(csv_spatiotemporal)
            trial_number = int(trial_str[trial_str.find("_walk_") + 6 : trial_str.find("_temporal_distance")])
            df_trial_num = pd.Series(trial_number, name='trial_number')

            # read the trajectories csv seting the correct separator. 
            # Drop unuseful columns. Fix format. Reset index after. 
            csv_spat = pd.read_csv(csv_spatiotemporal, sep = '\t')
            csv_spat.drop([1, 2, 3], inplace=True)
            csv_spat.drop(csv_spat.columns[[0]], axis = 1, inplace = True)
            csv_spat.reset_index(drop=True, inplace=True)
            csv_spat.columns = csv_spat.iloc[0].str.lower()
            csv_spat = csv_spat[1:].reset_index(drop=True)

            # concatenate the three created databases: subject number, state, trial and spatiotemporal parameters
            # the name of this database is df_spatiotemporal
            df_spat = pd.concat([df_num_state, df_trial_num, csv_spat], axis=1)
            df_spatiotemporal = pd.concat([df_spatiotemporal, df_spat], ignore_index=True)

In [None]:
df_trajectories.to_csv('angular_kinematics_csv.csv', index=True)