# Concat after Normalization
### recommended!
# Only by doing this can features be preserved without loss and keep the data intact

In [1]:
import random
import pandas as pd 
import numpy as np
from numpy import savez_compressed
from numpy import load
from natsort import natsorted
import os
!pip install scikit-learn
from sklearn.preprocessing import MinMaxScaler, StandardScaler ,RobustScaler
from sklearn.model_selection import KFold

from tqdm.notebook import trange, tqdm
from os.path import join

from pickle import dump, load

seed_rand = 41 # 2nd 777 # 1st 41
nameDataset = "IWALQQ_1st_correction"



In [2]:
def makeColumnsWOMAG():
    """
    Creates column names for biomechanical data excluding magnetometer (MAG) sensors.
    Returns standardized column names for IMU and biomech without MAG columns.
    """
    SIDEIDX = ['non','oa']  # Non-operated vs operated leg sides
    PARTIDX = ['shank','shoe','thigh']  # Body parts where sensors are placed
    TYPEIDX = ['ACC','GYRO','MAG']  # Sensor types: Accelerometer, Gyroscope, Magnetometer
    AXISIDX = ['X', 'Y', 'Z']  # 3D coordinate axes
    
    # Generate leg IMU column names (54 columns total)
    LEGCOLUMNSLENGTH = 54
    COl_imu_legs = [f'{SIDEIDX[int(i//(LEGCOLUMNSLENGTH/2))]}\
_{PARTIDX[(i//(len(TYPEIDX)*len(AXISIDX)))%len(PARTIDX)]}\
_{TYPEIDX[(i//(len(AXISIDX)))%len(TYPEIDX)]}\
_{AXISIDX[i%len(AXISIDX)]}' for i in range(0,LEGCOLUMNSLENGTH)]
    
    # Generate trunk IMU column names (9 columns total)
    TRKCOLUMNSLENGTH = 9
    Col_imu_trunk = [f'trunk_{TYPEIDX[(i//(len(AXISIDX)))%len(TYPEIDX)]}_{AXISIDX[i%len(AXISIDX)]}' for i in range(0,TRKCOLUMNSLENGTH)]
    
    # Generate biomeche column names (12 columns total)
    FPCOLUMNSLENGTH = 12
    FPTYPEIDX = ['GRF','ANGLE','MONM','MOBWHT']  # Ground Reaction Force, Angle, Moment, Mobile Weight
    Col_FP = [f'{FPTYPEIDX[(i//(len(AXISIDX)))%len(FPTYPEIDX)]}_{AXISIDX[i%len(AXISIDX)]}' for i in range(0,FPCOLUMNSLENGTH)]
    
    # Combine all column names
    newColumns = COl_imu_legs+Col_imu_trunk+Col_FP
    # Filter out magnetometer columns for final dataset
    newColumnswithoutMAG = [col for col in newColumns if not 'MAG' in col] 
    return newColumnswithoutMAG

# Convert subject-based classification back to overall data index basis
# Function to convert subject-based KFold splits into subject fold indices
def kfold2subfold(arrName,listData,train,test):
    """
    Converts subject-based train/test splits to data index-based splits.
    
    Args:
        arrName: Array of subject IDs for train/test splits
        listData: Metadata dataframe containing patient information
        train: Training subject indices
        test: Testing subject indices
    
    Returns:
        arrTrain: List of data indices for training samples
        arrTest: List of data indices for testing samples
    """
    arrTrain = []  # Store training data indices
    arrTest = []   # Store testing data indices
    
    # Get all data indices for training subjects
    for pID in arrName[train]:
        idxofID = listData.index[listData['patientID']==pID].copy()  # Find all rows for this patient
        arrTrain.extend(idxofID.to_list())  # Add patient's data indices to training set
    
    # Get all data indices for testing subjects
    for pID in arrName[test]:
        idxofID = listData.index[listData['patientID']==pID].copy()  # Find all rows for this patient
        arrTest.extend(idxofID.to_list())  # Add patient's data indices to testing set
    
    return arrTrain, arrTest

# Local settings

In [3]:
# Set required directories
dataDir =     r'R:\KumarLab3\PROJECTS\wesens\Data\Analysis\smith_dl\IMU Deep Learning\Data\allnew_20220325_raw_byDeepak_csv\INC_ByStep\INC_ByZero\Included_checked'
normalizedDir = join(dataDir, r'NORM')

#######################################################
# Configuration window
# This time using time-normalized data - all data lengths converted to 101 points
TargetDir = normalizedDir
#######################################################
# Get file list
# Get entire file list, select only needed extensions, and exclude everything except CSV files from the retrieved file list
dataExt = r".csv"
listFromFolder = natsorted([_ for _ in os.listdir(TargetDir) if _.endswith(dataExt)])
# Load organized file list
listfileName  = r'list_dataset_correction.xlsx'
listFromxlsx = pd.read_excel(join(dataDir,listfileName))
# Extract unique subjects
arrName = listFromxlsx.patientID.unique()
print("The order and count of listfileName and dataList must always match")
print(f"\nNum_listFromxlsx: {len(listFromxlsx)} | Num_listFromFolder: {len(listFromFolder)}")
print(f'Is same size: {len(listFromxlsx)==len(listFromFolder)}')
listFromxlsx.head(), listFromFolder[:5]

The order and count of listfileName and dataList must always match

Num_listFromxlsx: 876 | Num_listFromFolder: 876
Is same size: True


(  patientID  dateVisit speed  numtrial    side  numStep
 0      P002      31220     w         7  nonleg        1
 1      P002      31220     w         7  nonleg        2
 2      P002      31220     w         7   oaleg        1
 3      P002      31220     w         8  nonleg        1
 4      P002      31220     w         8   oaleg        1,
 ['N_F_P002_031220_w_0007_nonleg_imu_knee_angle_moment_R_1_Step.csv',
  'N_F_P002_031220_w_0007_nonleg_imu_knee_angle_moment_R_2_Step.csv',
  'N_F_P002_031220_w_0007_oaleg_imu_knee_angle_moment_R_1_Step.csv',
  'N_F_P002_031220_w_0008_nonleg_imu_knee_angle_moment_R_1_Step.csv',
  'N_F_P002_031220_w_0008_oaleg_imu_knee_angle_moment_R_1_Step.csv'])

# SCC settings     BU Comuting Cluster


In [18]:
# Set required directories
dataDir =     r'.'
normalizedDir = join(dataDir, r'NORM_CORRECTION')
#######################################################
# Configuration window
# This time using time-normalized data - all data lengths converted to 101 points
TargetDir = normalizedDir
#######################################################
# Get file list
# Get entire file list, select only needed extensions, and exclude everything except CSV files from the retrieved file list
dataExt = r".csv"
listFromFolder = natsorted([_ for _ in os.listdir(TargetDir) if _.endswith(dataExt)])
# Load organized file list
listfileName  = r'list_dataset_correction.xlsx'
listFromxlsx = pd.read_excel(join(dataDir,listfileName))
# Extract unique subjects
arrName = listFromxlsx.patientID.unique()
print("The order and count of listfileName and dataList must always match")
print(f"\nNum_listFromxlsx: {len(listFromxlsx)} | Num_listFromFolder: {len(listFromFolder)}")
print(f'Is same size: {len(listFromxlsx)==len(listFromFolder)}')
listFromxlsx.head(), listFromFolder[:5]

FileNotFoundError: [WinError 3] The system cannot find the path specified: '.\\NORM_CORRECTION'

### Create dataset
- Data split must be performed first before anything else!
- Rather than splitting the dataset into train|valid|test (80|10|10), let's do 5-fold cross-validation

In [4]:
# Required functions and class configuration
def make_dir(file_path):
    """
    Creates a directory if it doesn't already exist.
    Safe directory creation that prevents overwriting existing folders.
    """
    if not os.path.exists(file_path):
        os.makedirs(file_path)
        
# Used for scaling input data with (N, M, P) shape!
# The function below is applied column-wise to the entire dataset!
# Super convenient..
class MinMaxScaler3D(MinMaxScaler):
    """
    Custom MinMaxScaler for 3D data arrays.
    Reshapes 3D data to 2D for scaling, then reshapes back to original dimensions.
    Scales each feature independently while preserving temporal structure.
    """
    def fit(self, X, y=None):
        """
        Fit the scaler to 3D data by reshaping to 2D, fitting, then preserving parameters.
        """
        x = np.reshape(X, newshape=(X.shape[0]*X.shape[1], X.shape[2]))  # Flatten first two dimensions
        super().fit(x, y=y)  # Fit standard MinMaxScaler to reshaped data

    def transform(self, X):
        """
        Transform 3D data using fitted scaler parameters.
        Maintains original 3D shape after scaling.
        """
        x = np.reshape(X, newshape=(X.shape[0]*X.shape[1], X.shape[2]))  # Reshape to 2D
        return np.reshape(super().transform(x), newshape=X.shape)  # Scale and reshape back to 3D
    
    def inverse_transform(self, X):
        """
        Reverse the scaling transformation to get back original scale values.
        """
        x = np.reshape(X, newshape=(X.shape[0]*X.shape[1], X.shape[2]))  # Reshape to 2D
        return np.reshape(super().inverse_transform(x), newshape=X.shape)  # Inverse scale and reshape back

class StandardScaler3D(StandardScaler):
    """
    Custom StandardScaler (Z-score normalization) for 3D data arrays.
    Normalizes data to mean=0, std=1 while preserving 3D structure.
    """
    def fit(self, X, y=None):
        """
        Calculate mean and standard deviation for each feature across all samples and time points.
        """
        x = np.reshape(X, newshape=(X.shape[0]*X.shape[1], X.shape[2]))  # Flatten first two dimensions
        super().fit(x, y=y)  # Fit standard StandardScaler

    def transform(self, X):
        """
        Apply Z-score normalization: (x - mean) / std for each feature.
        """
        x = np.reshape(X, newshape=(X.shape[0]*X.shape[1], X.shape[2]))  # Reshape to 2D
        return np.reshape(super().transform(x), newshape=X.shape)  # Normalize and reshape back to 3D
    
    def inverse_transform(self, X):
        """
        Convert normalized data back to original scale: (x * std) + mean.
        """
        x = np.reshape(X, newshape=(X.shape[0]*X.shape[1], X.shape[2]))  # Reshape to 2D
        return np.reshape(super().inverse_transform(x), newshape=X.shape)  # Denormalize and reshape back

class RobustScaler3D(RobustScaler):
    """
    Custom RobustScaler for 3D data arrays.
    Uses median and interquartile range for scaling, robust to outliers.
    Formula: (x - median) / IQR
    """
    def fit(self, X, y=None):
        """
        Calculate median and interquartile range for each feature.
        More robust to outliers than StandardScaler.
        """
        x = np.reshape(X, newshape=(X.shape[0]*X.shape[1], X.shape[2]))  # Flatten first two dimensions
        super().fit(x, y=y)  # Fit standard RobustScaler

    def transform(self, X):
        """
        Apply robust scaling using median and IQR instead of mean and std.
        """
        x = np.reshape(X, newshape=(X.shape[0]*X.shape[1], X.shape[2]))  # Reshape to 2D
        return np.reshape(super().transform(x), newshape=X.shape)  # Scale and reshape back to 3D
    
    def inverse_transform(self, X):
        """
        Convert robust-scaled data back to original scale.
        """
        x = np.reshape(X, newshape=(X.shape[0]*X.shape[1], X.shape[2]))  # Reshape to 2D
        return np.reshape(super().inverse_transform(x), newshape=X.shape)  # Inverse scale and reshape back

# For complete K-fold
- Running this cell will save K pairs of (train set, test set, fitted scaler) for K-fold cross-validation

In [5]:
# Declare K-FOLD cross-validation
kfold = KFold(n_splits=5, random_state=seed_rand, shuffle=True)
# Always put the subject list in kfold.split(here)
countfold = -1
print(f"Total subject:{len(arrName)}")
print(f"Total Data size:{len(listFromxlsx)}")
for train,test in kfold.split(arrName):
    # Fold numbering for current iteration
    countfold = countfold + 1
    # Create dataset
    # Subject list used for this training session
    print("+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+")
    print(f"Num. of fold: {countfold}\n")
    print(f'{arrName[train]}\nNum for train:{len(arrName[train])}\n')
    print(f'{arrName[test]}\nNum for test:{len(arrName[test])}\n')
    print(f'Num total:{len(arrName[train])+len(arrName[test])}')

    idx4train,idx4test = kfold2subfold(arrName,listFromxlsx,train,test)
    print(f"\n{countfold}_fold\nidx4train:{len(idx4train)} | idx4test:{len(idx4test)} | total:{len(idx4train)+len(idx4test)}/{len(listFromxlsx)}")
    # Create complete list at once and use as needed
    # Store all data and make it extractable by subject
    columnsWOMAG = makeColumnsWOMAG()
    X_columns = [str(i) for i in range(0,42)]
    df_trainData_X = pd.DataFrame(columns=X_columns)

    Y_columns = [str(i) for i in range(0,3)]
    df_trainData_Y_angle = pd.DataFrame(columns=Y_columns) # angle has 3 axes
    df_trainData_Y_moBWHT = pd.DataFrame(columns=Y_columns) # moment also has 3 axes
    
    # Store only data corresponding to training set!
    # Valid and test sets can be created the same way!
    for idx, datum in enumerate(tqdm([listFromFolder[i] for i in idx4train])):
        df = pd.read_csv(join(TargetDir,datum))
        # First exclude all MAG data from dataset
        dfWOMAG = df.loc[:,columnsWOMAG]
        # If measured moment leg is nonleg, keep file as is
        if listFromxlsx.loc[idx,'side'] == "oaleg":
            targetLegArr = dfWOMAG.loc[:,'oa_shank_ACC_X':'oa_thigh_GYRO_Z']
            nonTargetLegArr = dfWOMAG.loc[:,'non_shank_ACC_X':'non_thigh_GYRO_Z'] # exclude mag
            otherArr = dfWOMAG.loc[:,'trunk_ACC_X':'trunk_GYRO_Z'] 
        else:
            targetLegArr = dfWOMAG.loc[:,'non_shank_ACC_X':'non_thigh_GYRO_Z'] # exclude mag
            nonTargetLegArr = dfWOMAG.loc[:,'oa_shank_ACC_X':'oa_thigh_GYRO_Z'] # exclude mag
            otherArr = dfWOMAG.loc[:,'trunk_ACC_X':'trunk_GYRO_Z']
        # Always create data in the same order
        concated = pd.concat([targetLegArr, nonTargetLegArr,otherArr],axis=1)

        # Rename columns
        # Need to stack data..
        concated.columns = X_columns
        # Accumulate input data
        df_trainData_X = pd.concat([df_trainData_X, concated],axis=0,ignore_index=True)
        ##############################################################
        # Create output data
        # kinematic(Angle)
        angle = dfWOMAG.loc[:,'ANGLE_X':'ANGLE_Z']
        angle.columns = Y_columns
        # Accumulate output data
        df_trainData_Y_angle = pd.concat([df_trainData_Y_angle, angle],axis=0,ignore_index=True)
        # kinetic(moment)
        moBWHT = dfWOMAG.loc[:,'MOBWHT_X':'MOBWHT_Z']
        moBWHT.columns = Y_columns
        # Accumulate output data
        df_trainData_Y_moBWHT = pd.concat([df_trainData_Y_moBWHT, moBWHT],axis=0,ignore_index=True)
    # Apply Scaler
    # Currently only applying MinMaxScaler
    scaler4X = MinMaxScaler() # Min-Max normalization
    scaler4Y_angle = MinMaxScaler() # Min-Max normalization
    scaler4Y_moBWHT  = MinMaxScaler() # Min-Max normalization

    scaler4X.fit(df_trainData_X) # ONLY FOR TRAIN DATA!!!!ONLY FOR TRAIN DATA!!!!ONLY FOR TRAIN DATA!!!!
    scaler4Y_angle.fit(df_trainData_Y_angle) # ONLY FOR TRAIN DATA!!!!ONLY FOR TRAIN DATA!!!!ONLY FOR TRAIN DATA!!!!
    scaler4Y_moBWHT.fit(df_trainData_Y_moBWHT) # ONLY FOR TRAIN DATA!!!!ONLY FOR TRAIN DATA!!!!ONLY FOR TRAIN DATA!!!!

    scaled_X_train = scaler4X.transform(df_trainData_X)
    scaled_Y_angle_train = scaler4Y_angle.transform(df_trainData_Y_angle)
    scaled_Y_moBWHT_train = scaler4Y_moBWHT.transform(df_trainData_Y_moBWHT)

    # Scaler save location
    scalerDir = join(dataDir, r'SAVE_dataSet',nameDataset)
    make_dir(scalerDir)
    # Save scaler
    dump(scaler4X, open(join(scalerDir,f"{countfold}_fold_scaler4X.pkl"), 'wb'))
    dump(scaler4Y_angle, open(join(scalerDir,f'{countfold}_fold_scaler4Y_angle.pkl'), 'wb'))
    dump(scaler4Y_moBWHT, open(join(scalerDir,f'{countfold}_fold_scaler4Y_moBWHT.pkl'), 'wb'))

    # Now that scaling is done properly, let's restore original structure!
    # Desired shape format 
    # (N, 4242, 1), (N, 303, 1), (N, 303, 1)   N is number of data samples
    X_train = []
    Y_angle_train = []
    Y_moBWHT_train = []
    for i in range(0,len(idx4train)):
        chopped_X_train = scaled_X_train[i*101:101+i*101,:]
        X_train.append(chopped_X_train.flatten('F').reshape(-1,1))
        
        chopped_Y_angle_train= scaled_Y_angle_train[i*101:101+i*101,:]
        Y_angle_train.append(chopped_Y_angle_train.flatten('F').reshape(-1,1))

        chopped_Y_moBWHT_train= scaled_Y_moBWHT_train[i*101:101+i*101,:]
        Y_moBWHT_train.append(chopped_Y_moBWHT_train.flatten('F').reshape(-1,1))

    final_X_train = np.array(X_train)
    final_Y_angle_train = np.array(Y_angle_train)
    final_Y_moBWHT_train = np.array(Y_moBWHT_train)

    # Check the shape of created data! 
    print(f'TRAIN data  :  {len(idx4train)}')
    print(f'Final shape: {final_X_train.shape}, {final_Y_angle_train.shape}, {final_Y_moBWHT_train.shape}')

    # Data save location
    setDir = join(dataDir, r'SAVE_dataSet',nameDataset)
    make_dir(setDir)
    # Save data
    savez_compressed(join(setDir,f"{countfold}_fold_final_train.npz"), final_X_train=final_X_train,final_Y_angle_train=final_Y_angle_train,final_Y_moBWHT_train=final_Y_moBWHT_train)
    
    #############################################################################################################################################
    # For test set
    # Create complete list at once and use as needed
    # Store all data and make it extractable by subject
    columnsWOMAG = makeColumnsWOMAG()
    X_columns = [str(i) for i in range(0,42)]
    df_testData_X = pd.DataFrame(columns=X_columns)

    Y_columns = [str(i) for i in range(0,3)]
    df_testData_Y_angle = pd.DataFrame(columns=Y_columns) # angle has 3 axes
    df_testData_Y_moBWHT = pd.DataFrame(columns=Y_columns) # moment also has 3 axes
    # Store only data corresponding to test set!
    # Valid and test sets can be created the same way!
    for idx, datum in enumerate(tqdm([listFromFolder[i] for i in idx4test])):
        df = pd.read_csv(join(TargetDir,datum))
        # First exclude all MAG data from dataset
        dfWOMAG = df.loc[:,columnsWOMAG]
        # If measured moment leg is nonleg, keep file as is
        if listFromxlsx.loc[idx,'side'] == "oaleg":
            targetLegArr = dfWOMAG.loc[:,'oa_shank_ACC_X':'oa_thigh_GYRO_Z']
            nonTargetLegArr = dfWOMAG.loc[:,'non_shank_ACC_X':'non_thigh_GYRO_Z'] # exclude mag
            otherArr = dfWOMAG.loc[:,'trunk_ACC_X':'trunk_GYRO_Z'] 
        else:
            targetLegArr = dfWOMAG.loc[:,'non_shank_ACC_X':'non_thigh_GYRO_Z'] # exclude mag
            nonTargetLegArr = dfWOMAG.loc[:,'oa_shank_ACC_X':'oa_thigh_GYRO_Z'] # exclude mag
            otherArr = dfWOMAG.loc[:,'trunk_ACC_X':'trunk_GYRO_Z']
        # Always create data in the same order
        concated = pd.concat([targetLegArr, nonTargetLegArr,otherArr],axis=1)

        # Rename columns
        # Need to stack data..
        concated.columns = X_columns
        # Accumulate input data
        df_testData_X = pd.concat([df_testData_X, concated],axis=0,ignore_index=True)
        ##############################################################
        # Create output data
        # kinematic(Angle)
        angle = dfWOMAG.loc[:,'ANGLE_X':'ANGLE_Z']
        angle.columns = Y_columns
        # Accumulate output data
        df_testData_Y_angle = pd.concat([df_testData_Y_angle, angle],axis=0,ignore_index=True)
        # kinetic(moment)
        moBWHT = dfWOMAG.loc[:,'MOBWHT_X':'MOBWHT_Z']
        moBWHT.columns = Y_columns
        # Accumulate output data
        df_testData_Y_moBWHT = pd.concat([df_testData_Y_moBWHT, moBWHT],axis=0,ignore_index=True)
    
    # Apply Scaler
    scaled_X_test = scaler4X.transform(df_testData_X)
    scaled_Y_angle_test = scaler4Y_angle.transform(df_testData_Y_angle)
    scaled_Y_moBWHT_test = scaler4Y_moBWHT.transform(df_testData_Y_moBWHT)
    
    # Now that scaling is done properly, let's restore original structure!
    # Desired shape format 
    # (N, 4242, 1), (N, 303, 1), (N, 303, 1)   N is number of data samples
    X_test = []
    Y_angle_test = []
    Y_moBWHT_test = []
    for i in range(0,len(idx4test)):
        chopped_X_test = scaled_X_test[i*101:101+i*101,:]
        X_test.append(chopped_X_test.flatten('F').reshape(-1,1))
        
        chopped_Y_angle_test= scaled_Y_angle_test[i*101:101+i*101,:]
        Y_angle_test.append(chopped_Y_angle_test.flatten('F').reshape(-1,1))

        chopped_Y_moBWHT_test= scaled_Y_moBWHT_test[i*101:101+i*101,:]
        Y_moBWHT_test.append(chopped_Y_moBWHT_test.flatten('F').reshape(-1,1))

    final_X_test = np.array(X_test)
    final_Y_angle_test = np.array(Y_angle_test)
    final_Y_moBWHT_test = np.array(Y_moBWHT_test)

    # Check the shape of created data! 
    print(f'TEST data  :  {len(idx4test)}')
    print(f'Final shape: {final_X_test.shape}, {final_Y_angle_test.shape}, {final_Y_moBWHT_test.shape}')

    # Data save location
    # Same as train data
    # Save data
    savez_compressed(join(setDir,f"{countfold}_fold_final_test.npz"), final_X_test=final_X_test,final_Y_angle_test=final_Y_angle_test,final_Y_moBWHT_test=final_Y_moBWHT_test)
    # Delete later or make it into a function

Total subject:44
Total Data size:876
+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
Num. of fold: 0

['P002' 'P007' 'P017' 'P029' 'P050' 'P065' 'P104' 'P106' 'P115' 'P119'
 'P134' 'P135' 'P136' 'P142' 'P147' 'P149' 'P155' 'P168' 'P169' 'P172'
 'P196' 'P203' 'P222' 'P225' 'P226' 'P243' 'P245' 'P258' 'P263' 'P266'
 'P270' 'P272' 'P273' 'P277' 'P290']
Num for train:35

['P061' 'P066' 'P069' 'P105' 'P121' 'P132' 'P205' 'P229' 'P297']
Num for test:9

Num total:44

0_fold
idx4train:722 | idx4test:154 | total:876/876


  0%|          | 0/722 [00:00<?, ?it/s]

  df_trainData_X = pd.concat([df_trainData_X, concated],axis=0,ignore_index=True)
  df_trainData_Y_angle = pd.concat([df_trainData_Y_angle, angle],axis=0,ignore_index=True)
  df_trainData_Y_moBWHT = pd.concat([df_trainData_Y_moBWHT, moBWHT],axis=0,ignore_index=True)


TRAIN data  :  722
Final shape: (722, 4242, 1), (722, 303, 1), (722, 303, 1)


  0%|          | 0/154 [00:00<?, ?it/s]

  df_testData_X = pd.concat([df_testData_X, concated],axis=0,ignore_index=True)
  df_testData_Y_angle = pd.concat([df_testData_Y_angle, angle],axis=0,ignore_index=True)
  df_testData_Y_moBWHT = pd.concat([df_testData_Y_moBWHT, moBWHT],axis=0,ignore_index=True)


TEST data  :  154
Final shape: (154, 4242, 1), (154, 303, 1), (154, 303, 1)
+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
Num. of fold: 1

['P002' 'P007' 'P017' 'P029' 'P061' 'P065' 'P066' 'P069' 'P104' 'P105'
 'P106' 'P115' 'P119' 'P121' 'P132' 'P134' 'P135' 'P142' 'P147' 'P149'
 'P155' 'P168' 'P169' 'P172' 'P196' 'P205' 'P222' 'P225' 'P229' 'P245'
 'P258' 'P263' 'P272' 'P290' 'P297']
Num for train:35

['P050' 'P136' 'P203' 'P226' 'P243' 'P266' 'P270' 'P273' 'P277']
Num for test:9

Num total:44

1_fold
idx4train:667 | idx4test:209 | total:876/876


  0%|          | 0/667 [00:00<?, ?it/s]

  df_trainData_X = pd.concat([df_trainData_X, concated],axis=0,ignore_index=True)
  df_trainData_Y_angle = pd.concat([df_trainData_Y_angle, angle],axis=0,ignore_index=True)
  df_trainData_Y_moBWHT = pd.concat([df_trainData_Y_moBWHT, moBWHT],axis=0,ignore_index=True)


TRAIN data  :  667
Final shape: (667, 4242, 1), (667, 303, 1), (667, 303, 1)


  0%|          | 0/209 [00:00<?, ?it/s]

  df_testData_X = pd.concat([df_testData_X, concated],axis=0,ignore_index=True)
  df_testData_Y_angle = pd.concat([df_testData_Y_angle, angle],axis=0,ignore_index=True)
  df_testData_Y_moBWHT = pd.concat([df_testData_Y_moBWHT, moBWHT],axis=0,ignore_index=True)


TEST data  :  209
Final shape: (209, 4242, 1), (209, 303, 1), (209, 303, 1)
+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
Num. of fold: 2

['P002' 'P007' 'P029' 'P050' 'P061' 'P065' 'P066' 'P069' 'P104' 'P105'
 'P115' 'P121' 'P132' 'P134' 'P136' 'P142' 'P147' 'P149' 'P155' 'P168'
 'P172' 'P196' 'P203' 'P205' 'P226' 'P229' 'P243' 'P245' 'P258' 'P266'
 'P270' 'P272' 'P273' 'P277' 'P297']
Num for train:35

['P017' 'P106' 'P119' 'P135' 'P169' 'P222' 'P225' 'P263' 'P290']
Num for test:9

Num total:44

2_fold
idx4train:722 | idx4test:154 | total:876/876


  0%|          | 0/722 [00:00<?, ?it/s]

  df_trainData_X = pd.concat([df_trainData_X, concated],axis=0,ignore_index=True)
  df_trainData_Y_angle = pd.concat([df_trainData_Y_angle, angle],axis=0,ignore_index=True)
  df_trainData_Y_moBWHT = pd.concat([df_trainData_Y_moBWHT, moBWHT],axis=0,ignore_index=True)


TRAIN data  :  722
Final shape: (722, 4242, 1), (722, 303, 1), (722, 303, 1)


  0%|          | 0/154 [00:00<?, ?it/s]

  df_testData_X = pd.concat([df_testData_X, concated],axis=0,ignore_index=True)
  df_testData_Y_angle = pd.concat([df_testData_Y_angle, angle],axis=0,ignore_index=True)
  df_testData_Y_moBWHT = pd.concat([df_testData_Y_moBWHT, moBWHT],axis=0,ignore_index=True)


TEST data  :  154
Final shape: (154, 4242, 1), (154, 303, 1), (154, 303, 1)
+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
Num. of fold: 3

['P002' 'P007' 'P017' 'P050' 'P061' 'P066' 'P069' 'P105' 'P106' 'P115'
 'P119' 'P121' 'P132' 'P134' 'P135' 'P136' 'P168' 'P169' 'P172' 'P203'
 'P205' 'P222' 'P225' 'P226' 'P229' 'P243' 'P245' 'P258' 'P263' 'P266'
 'P270' 'P273' 'P277' 'P290' 'P297']
Num for train:35

['P029' 'P065' 'P104' 'P142' 'P147' 'P149' 'P155' 'P196' 'P272']
Num for test:9

Num total:44

3_fold
idx4train:670 | idx4test:206 | total:876/876


  0%|          | 0/670 [00:00<?, ?it/s]

  df_trainData_X = pd.concat([df_trainData_X, concated],axis=0,ignore_index=True)
  df_trainData_Y_angle = pd.concat([df_trainData_Y_angle, angle],axis=0,ignore_index=True)
  df_trainData_Y_moBWHT = pd.concat([df_trainData_Y_moBWHT, moBWHT],axis=0,ignore_index=True)


TRAIN data  :  670
Final shape: (670, 4242, 1), (670, 303, 1), (670, 303, 1)


  0%|          | 0/206 [00:00<?, ?it/s]

  df_testData_X = pd.concat([df_testData_X, concated],axis=0,ignore_index=True)
  df_testData_Y_angle = pd.concat([df_testData_Y_angle, angle],axis=0,ignore_index=True)
  df_testData_Y_moBWHT = pd.concat([df_testData_Y_moBWHT, moBWHT],axis=0,ignore_index=True)


TEST data  :  206
Final shape: (206, 4242, 1), (206, 303, 1), (206, 303, 1)
+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+=+
Num. of fold: 4

['P017' 'P029' 'P050' 'P061' 'P065' 'P066' 'P069' 'P104' 'P105' 'P106'
 'P119' 'P121' 'P132' 'P135' 'P136' 'P142' 'P147' 'P149' 'P155' 'P169'
 'P196' 'P203' 'P205' 'P222' 'P225' 'P226' 'P229' 'P243' 'P263' 'P266'
 'P270' 'P272' 'P273' 'P277' 'P290' 'P297']
Num for train:36

['P002' 'P007' 'P115' 'P134' 'P168' 'P172' 'P245' 'P258']
Num for test:8

Num total:44

4_fold
idx4train:723 | idx4test:153 | total:876/876


  0%|          | 0/723 [00:00<?, ?it/s]

  df_trainData_X = pd.concat([df_trainData_X, concated],axis=0,ignore_index=True)
  df_trainData_Y_angle = pd.concat([df_trainData_Y_angle, angle],axis=0,ignore_index=True)
  df_trainData_Y_moBWHT = pd.concat([df_trainData_Y_moBWHT, moBWHT],axis=0,ignore_index=True)


TRAIN data  :  723
Final shape: (723, 4242, 1), (723, 303, 1), (723, 303, 1)


  0%|          | 0/153 [00:00<?, ?it/s]

  df_testData_X = pd.concat([df_testData_X, concated],axis=0,ignore_index=True)
  df_testData_Y_angle = pd.concat([df_testData_Y_angle, angle],axis=0,ignore_index=True)
  df_testData_Y_moBWHT = pd.concat([df_testData_Y_moBWHT, moBWHT],axis=0,ignore_index=True)


TEST data  :  153
Final shape: (153, 4242, 1), (153, 303, 1), (153, 303, 1)


# For individual K-fold

In [1]:
# Declare K-FOLD cross-validation
kfold = KFold(n_splits=5, random_state=41, shuffle=True)
# Always put the subject list in kfold.split(here)
countfold = -1
for train,test in kfold.split(arrName):
    # Numbering for current fold
    countfold = countfold + 1
    # Create dataset
    # Subject list used for this training session
    print(f"Num. of fold: {countfold}")
    print(f'{arrName[train]}\nNum for train:{len(arrName[train])}')
    print(f'{arrName[test]}\nNum for test:{len(arrName[test])}')
    print(f'Num total:{len(arrName[train])+len(arrName[test])}')
    idx4train,idx4test = kfold2subfold(arrName,listFromxlsx,train,test)
    
    break
    # Delete later or turn into a function

NameError: name 'KFold' is not defined

In [35]:
len(idx4train), len(idx4test)

(722, 155)

## Train data

In [6]:
# Create complete list at once and set up for use as needed
# Store all data and make it extractable by subject
columnsWOMAG = makeColumnsWOMAG()
X_columns = [str(i) for i in range(0,42)]
df_trainData_X = pd.DataFrame(columns=X_columns)

Y_columns = [str(i) for i in range(0,3)]
df_trainData_Y_angle = pd.DataFrame(columns=Y_columns) # angle has 3 axes
df_trainData_Y_moBWHT = pd.DataFrame(columns=Y_columns) # moment also has 3 axes

# Store only data corresponding to training set!
# Valid and test sets can be created the same way!
for idx, datum in enumerate(tqdm([listFromFolder[i] for i in idx4train])):
    df = pd.read_csv(join(TargetDir,datum))
    # First exclude all MAG data from dataset
    dfWOMAG = df.loc[:,columnsWOMAG]
    # If measured moment leg is nonleg, keep file as is
    if listFromxlsx.loc[idx,'side'] == "oaleg":
        targetLegArr = dfWOMAG.loc[:,'oa_shank_ACC_X':'oa_thigh_GYRO_Z']
        nonTargetLegArr = dfWOMAG.loc[:,'non_shank_ACC_X':'non_thigh_GYRO_Z'] # exclude mag
        otherArr = dfWOMAG.loc[:,'trunk_ACC_X':'trunk_GYRO_Z'] 
    else:
        targetLegArr = dfWOMAG.loc[:,'non_shank_ACC_X':'non_thigh_GYRO_Z'] # exclude mag
        nonTargetLegArr = dfWOMAG.loc[:,'oa_shank_ACC_X':'oa_thigh_GYRO_Z'] # exclude mag
        otherArr = dfWOMAG.loc[:,'trunk_ACC_X':'trunk_GYRO_Z']
    # Always create data in the same order
    concated = pd.concat([targetLegArr, nonTargetLegArr,otherArr],axis=1)

    # Rename columns
    # Need to stack data..
    concated.columns = X_columns
    # Accumulate input data
    df_trainData_X = pd.concat([df_trainData_X, concated],axis=0,ignore_index=True)
    ##############################################################
    # Create output data
    # kinematic(Angle)
    angle = dfWOMAG.loc[:,'ANGLE_X':'ANGLE_Z']
    angle.columns = Y_columns
    # Accumulate output data
    df_trainData_Y_angle = pd.concat([df_trainData_Y_angle, angle],axis=0,ignore_index=True)
    # kinetic(moment)
    moBWHT = dfWOMAG.loc[:,'MOBWHT_X':'MOBWHT_Z']
    moBWHT.columns = Y_columns
    # Accumulate output data
    df_trainData_Y_moBWHT = pd.concat([df_trainData_Y_moBWHT, moBWHT],axis=0,ignore_index=True)


  0%|          | 0/723 [00:00<?, ?it/s]

  df_trainData_X = pd.concat([df_trainData_X, concated],axis=0,ignore_index=True)
  df_trainData_Y_angle = pd.concat([df_trainData_Y_angle, angle],axis=0,ignore_index=True)
  df_trainData_Y_moBWHT = pd.concat([df_trainData_Y_moBWHT, moBWHT],axis=0,ignore_index=True)


In [17]:
# Apply Scaler
# Currently only applying MinMaxScaler (not MinMaxScaler3D)
scaler4X = MinMaxScaler()  # Min-Max normalization (0-1 range)
scaler4Y_angle = MinMaxScaler()  # Min-Max normalization (0-1 range) 
scaler4Y_moBWHT = MinMaxScaler()  # Min-Max normalization (0-1 range)

# Fit scalers to training data only - critical for preventing data leakage
scaler4X.fit(df_trainData_X)  # ONLY FOR TRAIN DATA!!!!ONLY FOR TRAIN DATA!!!!ONLY FOR TRAIN DATA!!!!
scaler4Y_angle.fit(df_trainData_Y_angle)  # ONLY FOR TRAIN DATA!!!!ONLY FOR TRAIN DATA!!!!ONLY FOR TRAIN DATA!!!!
scaler4Y_moBWHT.fit(df_trainData_Y_moBWHT)  # ONLY FOR TRAIN DATA!!!!ONLY FOR TRAIN DATA!!!!ONLY FOR TRAIN DATA!!!!

# Apply scaling transformations using fitted parameters
scaled_X_train = scaler4X.transform(df_trainData_X)
scaled_Y_angle_train = scaler4Y_angle.transform(df_trainData_Y_angle)
scaled_Y_moBWHT_train = scaler4Y_moBWHT.transform(df_trainData_Y_moBWHT)

# Set scaler save location
scalerDir = join(dataDir, r'SAVE_fittedScaler')
make_dir(scalerDir)  # Create directory if it doesn't exist

# Save fitted scalers for later use on test data and model deployment
# Reference: https://wooono.tistory.com/360
dump(scaler4X, open(join(scalerDir,f"{countfold}_fold_scaler4X.pkl"), 'wb'))  # Save input data scaler
dump(scaler4Y_angle, open(join(scalerDir,f'{countfold}_fold_scaler4Y_angle.pkl'), 'wb'))  # Save angle output scaler
dump(scaler4Y_moBWHT, open(join(scalerDir,f'{countfold}_fold_scaler4Y_moBWHT.pkl'), 'wb'))  # Save moment output scaler

In [18]:
# Now that we've properly scaled the data, let's restore the original structure!
# Desired shape format 
# (N, 4242, 1), (N, 303, 1), (N, 303, 1)   N is the number of data samples
X_train = []
Y_angle_train = []
Y_moBWHT_train = []
for i in range(0,len(idx4train)):
    # Extract 101 time points for each sample from the scaled data
    chopped_X_train = scaled_X_train[i*101:101+i*101,:]  # Get rows i*101 to (i+1)*101 for sample i
    X_train.append(chopped_X_train.flatten('F').reshape(-1,1))  # Flatten in Fortran order and reshape to column vector
    
    chopped_Y_angle_train= scaled_Y_angle_train[i*101:101+i*101,:]  # Extract angle data for sample i
    Y_angle_train.append(chopped_Y_angle_train.flatten('F').reshape(-1,1))  # Flatten and reshape to column vector
    
    chopped_Y_moBWHT_train= scaled_Y_moBWHT_train[i*101:101+i*101,:]  # Extract moment data for sample i
    Y_moBWHT_train.append(chopped_Y_moBWHT_train.flatten('F').reshape(-1,1))  # Flatten and reshape to column vector

# Convert lists to numpy arrays for final dataset format
final_X_train = np.array(X_train)
final_Y_angle_train = np.array(Y_angle_train)
final_Y_moBWHT_train = np.array(Y_moBWHT_train)

In [19]:
# Check the shape of created data! 
print(f'TRAIN data  :  {len(idx4train)}')
print(f'Final shape: {final_X_train.shape}, {final_Y_angle_train.shape}, {final_Y_moBWHT_train.shape}')

# Data save location
setDir = join(dataDir, r'SAVE_dataSet')
make_dir(setDir)  # Create directory if it doesn't exist

# Save data
# Reference: https://machinelearningmastery.com/how-to-save-a-numpy-array-to-file-for-machine-learning/
savez_compressed(join(setDir,f"{countfold}_fold_final_train.npz"), 
                final_X_train=final_X_train,
                final_Y_angle_train=final_Y_angle_train,
                final_Y_moBWHT_train=final_Y_moBWHT_train)

TRAIN data  :  723
Final shape: (723, 4242, 1), (723, 303, 1), (723, 303, 1)


In [20]:
# Verify data loading
loaded_train = np.load(join(setDir,f"{countfold}_fold_final_train.npz"))
print(f'Loaded final shape: {loaded_train["final_X_train"].shape}, {loaded_train["final_Y_angle_train"].shape}, {loaded_train["final_Y_moBWHT_train"].shape}')

Loaded final shape: (723, 4242, 1), (723, 303, 1), (723, 303, 1)


In [30]:
# Data for presentation
if True:
    # Save scaled data for the first training sample
    pd.DataFrame(final_X_train[0]).to_csv("CBD_IMU_1.csv",index=False)
    pd.DataFrame(final_Y_angle_train[0]).to_csv("CBD_angle_1.csv",index=False)
    pd.DataFrame(final_Y_moBWHT_train[0]).to_csv("CBD_moBWHT_1.csv",index=False)
    
    # Restore to original data format! Confirmed this works well!
    reshaped = final_X_train[0].reshape(-1,42, order='F')  # When restoring to original data!
    result = scaler4X.inverse_transform(reshaped)
    pd.DataFrame(result).to_csv("CBD_IMU_rescaled_1.csv",index=False)
    
    reshaped = final_Y_angle_train[0].reshape(-1,3, order='F')  # When restoring to original data!
    result = scaler4Y_angle.inverse_transform(reshaped)
    pd.DataFrame(result).to_csv("CBD_angle_rescaled_1.csv",index=False)
    
    reshaped = final_Y_moBWHT_train[0].reshape(-1,3, order='F')  # When restoring to original data!
    result = scaler4Y_moBWHT.inverse_transform(reshaped)
    pd.DataFrame(result).to_csv("CBD_moBWHT_rescaled_1.csv",index=False)


## Test Data

In [29]:
# Set up data structures for test dataset creation
# Define column structure without magnetometer data for consistency
columnsWOMAG = makeColumnsWOMAG()
X_columns = [str(i) for i in range(0,42)]  # 42 sensor features (ACC+GYRO only, no MAG)
df_testData_X = pd.DataFrame(columns=X_columns)

Y_columns = [str(i) for i in range(0,3)]  # 3-axis outputs (X, Y, Z)
df_testData_Y_angle = pd.DataFrame(columns=Y_columns)  # Joint angle predictions
df_testData_Y_moBWHT = pd.DataFrame(columns=Y_columns)  # Joint moment predictions

# Process each test file to build consolidated test dataset
# Use same preprocessing pipeline as training data to ensure consistency
for idx, datum in enumerate(tqdm([listFromFolder[i] for i in idx4test])):
    df = pd.read_csv(join(TargetDir,datum))
    
    # Remove magnetometer columns to focus on accelerometer and gyroscope data
    dfWOMAG = df.loc[:,columnsWOMAG]
    
    # Standardize sensor arrangement regardless of which leg was measured
    # Critical: Always put target leg first for consistent neural network input
    if listFromxlsx.loc[idx,'side'] == "oaleg":
        # If operated leg was measured, arrange as: operated leg -> non-operated leg -> trunk
        targetLegArr = dfWOMAG.loc[:,'oa_shank_ACC_X':'oa_thigh_GYRO_Z']
        nonTargetLegArr = dfWOMAG.loc[:,'non_shank_ACC_X':'non_thigh_GYRO_Z']
        otherArr = dfWOMAG.loc[:,'trunk_ACC_X':'trunk_GYRO_Z'] 
    else:
        # If non-operated leg was measured, arrange as: non-operated leg -> operated leg -> trunk
        targetLegArr = dfWOMAG.loc[:,'non_shank_ACC_X':'non_thigh_GYRO_Z']
        nonTargetLegArr = dfWOMAG.loc[:,'oa_shank_ACC_X':'oa_thigh_GYRO_Z']
        otherArr = dfWOMAG.loc[:,'trunk_ACC_X':'trunk_GYRO_Z']
    
    # Concatenate sensor data in standardized order: target leg, non-target leg, trunk
    concated = pd.concat([targetLegArr, nonTargetLegArr, otherArr], axis=1)

    # Rename columns to generic numbers for easier processing downstream
    concated.columns = X_columns
    
    # Append this sample's input features to the growing test dataset
    df_testData_X = pd.concat([df_testData_X, concated], axis=0, ignore_index=True)
    
    ##############################################################
    # Extract output targets that the model will learn to predict
    
    # Joint kinematics (angles) - how much the joint bends in each direction
    angle = dfWOMAG.loc[:,'ANGLE_X':'ANGLE_Z']
    angle.columns = Y_columns
    df_testData_Y_angle = pd.concat([df_testData_Y_angle, angle], axis=0, ignore_index=True)
    
    # Joint kinetics (moments) - forces/torques applied at the joint
    moBWHT = dfWOMAG.loc[:,'MOBWHT_X':'MOBWHT_Z']
    moBWHT.columns = Y_columns
    df_testData_Y_moBWHT = pd.concat([df_testData_Y_moBWHT, moBWHT], axis=0, ignore_index=True)

  0%|          | 0/153 [00:00<?, ?it/s]

  df_testData_X = pd.concat([df_testData_X, concated], axis=0, ignore_index=True)
  df_testData_Y_angle = pd.concat([df_testData_Y_angle, angle], axis=0, ignore_index=True)
  df_testData_Y_moBWHT = pd.concat([df_testData_Y_moBWHT, moBWHT], axis=0, ignore_index=True)


In [23]:
# Apply scaling to test data using previously fitted scalers
# Critical: Use scalers fitted ONLY on training data to prevent data leakage
scaled_X_test = scaler4X.transform(df_testData_X)  # Scale test input features using training data statistics
scaled_Y_angle_test = scaler4Y_angle.transform(df_testData_Y_angle)  # Scale test angle outputs using training data statistics
scaled_Y_moBWHT_test = scaler4Y_moBWHT.transform(df_testData_Y_moBWHT)  # Scale test moment outputs using training data statistics

In [24]:
# Now that we've properly scaled the data, let's separate it back into individual samples!
# Desired shape format 
# (N, 4242, 1), (N, 303, 1), (N, 303, 1)   N is the number of data samples
X_test = []
Y_angle_test = []
Y_moBWHT_test = []
for i in range(0,len(idx4test)):
    # Extract 101 time points for each test sample from the scaled data
    chopped_X_test = scaled_X_test[i*101:101+i*101,:]  # Get rows i*101 to (i+1)*101 for sample i
    X_test.append(chopped_X_test.flatten('F').reshape(-1,1))  # Flatten in Fortran order and reshape to column vector
    
    chopped_Y_angle_test= scaled_Y_angle_test[i*101:101+i*101,:]  # Extract angle data for sample i
    Y_angle_test.append(chopped_Y_angle_test.flatten('F').reshape(-1,1))  # Flatten and reshape to column vector
    
    chopped_Y_moBWHT_test= scaled_Y_moBWHT_test[i*101:101+i*101,:]  # Extract moment data for sample i
    Y_moBWHT_test.append(chopped_Y_moBWHT_test.flatten('F').reshape(-1,1))  # Flatten and reshape to column vector

# Convert lists to numpy arrays for final test dataset format
final_X_test = np.array(X_test)
final_Y_angle_test = np.array(Y_angle_test)
final_Y_moBWHT_test = np.array(Y_moBWHT_test)

In [27]:
# Check the shape of created data! 
print(f'TEST data  :  {len(idx4test)}')
print(f'Final shape: {final_X_test.shape}, {final_Y_angle_test.shape}, {final_Y_moBWHT_test.shape}')

# Data save location
setDir = join(dataDir, r'SAVE_dataSet')
make_dir(setDir)  # Create directory if it doesn't exist

# Save data
savez_compressed(join(setDir,f"{countfold}_fold_final_test.npz"), 
                final_X_test=final_X_test,
                final_Y_angle_test=final_Y_angle_test,
                final_Y_moBWHT_test=final_Y_moBWHT_test)


TEST data  :  153
Final shape: (153, 4242, 1), (153, 303, 1), (153, 303, 1)


In [28]:
# Verify data loading
loaded_test = np.load(join(setDir,f"{countfold}_fold_final_test.npz"))
print(f'Loaded final shape: {loaded_test["final_X_test"].shape}, {loaded_test["final_Y_angle_test"].shape}, {loaded_test["final_Y_moBWHT_test"].shape}')

Loaded final shape: (153, 4242, 1), (153, 303, 1), (153, 303, 1)


# End

# What was newly learned

In [63]:
# How to reshape data
flat_concat.shape, concated.to_numpy().reshape(101,42,1).shape

((4242, 1), (101, 42, 1))

In [70]:
# Create a 2D numpy array with 4 rows and 3 columns
a = np.array([[1,2,3],[4,5,6],[7,8,9],[10,11,12]])

# Extract the first row (all columns) and check the overall array shape
a[0,:], a.shape  # Returns: (array([1, 2, 3]), (4, 3))


(array([1, 2, 3]), (4, 3))

In [94]:
# Reshape must be done in order: (rows, columns, count) to be correct
# concated consists of 101 rows and 42 columns
concated.to_numpy().reshape(101,42,1)[:,0,0]  # Reads in the correct direction

array([-2.08866167, -1.6116709 , -1.12578921, -0.69913567, -0.31552896,
        0.01626456,  0.2934858 ,  0.49830202,  0.62481276,  0.67396727,
        0.66056931,  0.60059968,  0.50080002,  0.36252644,  0.18663478,
       -0.0244365 , -0.26707541, -0.52839363, -0.78795609, -1.01967686,
       -1.20064522, -1.31313809, -1.34727698, -1.30148371, -1.18328261,
       -1.01031294, -0.80652645, -0.59887084, -0.41240249, -0.26469963,
       -0.1639805 , -0.11075129, -0.09801369, -0.11699963, -0.1552758 ,
       -0.19970287, -0.2355143 , -0.24934117, -0.23495442, -0.19335586,
       -0.13348058, -0.066768  , -0.00379263,  0.04791029,  0.08493437,
        0.10705636,  0.1168679 ,  0.11778726,  0.1139358 ,  0.10917843,
        0.10651575,  0.10806219,  0.11395871,  0.12298314,  0.13297936,
        0.14167265,  0.14704517,  0.14762067,  0.14238325,  0.13102786,
        0.11354904,  0.09108217,  0.06584669,  0.04045179,  0.01679698,
       -0.00449873, -0.02328051, -0.03918528, -0.0509166 , -0.05

In [230]:
# When you need to split into train/valid/test (two-level splitting)
arrName = np.array(['P002', 'P007', 'P017', 'P029', 'P050', 'P061', 'P065', 'P066',
       'P069', 'P104', 'P105', 'P106', 'P115', 'P119', 'P121', 'P132',
       'P134', 'P135', 'P136', 'P142', 'P147', 'P149', 'P155', 'P168',
       'P169', 'P172', 'P196', 'P203', 'P205', 'P222', 'P225', 'P226',
       'P229', 'P243', 'P245', 'P258', 'P263', 'P266', 'P270', 'P272',
       'P273', 'P277', 'P290', 'P297'])

# Preliminary work for K-fold cross-validation
# Enable folding by subject
# First declare K-FOLD
kfold = KFold(n_splits=5, random_state=4, shuffle=True)

# Always put the subject list in kfold.split
for train_whole,test in kfold.split(arrName):
    # Create dataset
    # Subject list used for this training session
    print(arrName[train_whole])  # Subjects for training+validation combined
    print(arrName[test])         # Subjects for final testing
    print(f'{len(arrName[train_whole])},{len(arrName[test])}')
    print("+++++++++++")
    break

# Second split: divide training subjects into training and validation
arrTWhole = arrName[train_whole]  # Take the training+validation subjects
for train,valid in kfold.split(arrTWhole):
    print(arrTWhole[train])  # Final training subjects
    print(arrTWhole[valid])  # Validation subjects  
    print(f'{len(arrTWhole[train])},{len(arrName[valid])}')
    break

['P002' 'P007' 'P017' 'P029' 'P061' 'P065' 'P069' 'P104' 'P105' 'P106'
 'P115' 'P119' 'P121' 'P135' 'P147' 'P149' 'P155' 'P168' 'P169' 'P172'
 'P196' 'P203' 'P222' 'P225' 'P229' 'P243' 'P245' 'P258' 'P263' 'P266'
 'P270' 'P272' 'P273' 'P277' 'P290']
['P050' 'P066' 'P132' 'P134' 'P136' 'P142' 'P205' 'P226' 'P297']
35,9
+++++++++++
['P002' 'P007' 'P017' 'P029' 'P061' 'P065' 'P069' 'P104' 'P105' 'P106'
 'P115' 'P119' 'P121' 'P135' 'P147' 'P169' 'P172' 'P203' 'P225' 'P229'
 'P243' 'P245' 'P258' 'P266' 'P270' 'P272' 'P273' 'P290']
['P149' 'P155' 'P168' 'P196' 'P222' 'P263' 'P277']
28,7


In [96]:
# Reference: https://stackoverflow.com/questions/50125844/how-to-standard-scale-a-3d-matrix
# God bless this programmer..
# Scaling procedure/workflow
X_tmp = X_train.copy()  # Create a copy to avoid modifying original data

# Declare/initialize the 3D scaler
scaler = StandardScaler3D()

# Fit the scaler to learn scaling parameters (mean and std)
scaler.fit(X_tmp)

# Apply scaling transformation using learned parameters
scaled_X_tmp = scaler.transform(X_tmp)

# Reverse scaling transformation to get back original values
rescaled_X_tmp = scaler.inverse_transform(scaled_X_tmp)