In [7]:
import pandas as pd
import numpy as np
import os

In [10]:
# Example dictionary for leg lengths (replace with actual data)
leg_lengths = {
    "sub1_1_normal": 0.84,
    "sub1_2_normal": 0.84,
    "sub1_3_normal": 0.84,
    "sub2_1_normal": 0.94,
    "sub2_2_normal": 0.94,
    "sub3_1_normal": 0.91,
    "sub3_2_normal": 0.91,
    "sub3_3_normal": 0.91,
    "sub4_1_normal": 0.89,
    "sub4_2_normal": 0.89,
    "sub4_3_normal": 0.89,
    "sub5_1_normal": 0.74,
    "sub5_2_normal": 0.74,
    "sub5_3_normal": 0.74,
    "sub6_2_abnormal_stance": 0.74,
    "sub6_3_abnormal_stance": 0.74,
    "sub6_4_abnormal_swing": 0.74,
    "sub6_5_abnormal_swing": 0.74,
    "sub6_6_abnormal_swing": 0.74,
    "sub7_1_abnormal_stance": 0.79,
    "sub7_2_abnormal_stance": 0.79,
    "sub7_3_abnormal_stance": 0.79,
    "sub8_1_abnormal_swing": 0.74,
    "sub8_2_abnormal_swing": 0.74,
    "sub8_3_abnormal_swing": 0.74,
    "sub9_1_normal": 0.81,
    "sub9_2_normal": 0.81,
    "sub9_3_normal": 0.81,
    "sub10_2_normal": 0.74,
    "sub10_3_normal": 0.74
}

In [12]:
#FINAL FUNCTION:
# Feature extraction functions
def calculate_grf(cycle_data):
    resultant_acceleration = np.sqrt(
        cycle_data['X_Accel']**2 + cycle_data['Y_Accel']**2 + cycle_data['Z_Accel']**2
    )
    return np.max(resultant_acceleration)

def calculate_stride_time(cycle_data):
    # Using number of inputs as a proxy for time
    return len(cycle_data)

def detect_heel_strike(cycle_data):
    heel_strike = cycle_data['Z_Accel'].idxmin()
    return heel_strike

def calculate_mean_acceleration(cycle_data):
    resultant_acceleration = np.sqrt(
        cycle_data['X_Accel']**2 + cycle_data['Y_Accel']**2 + cycle_data['Z_Accel']**2
    )
    return resultant_acceleration.mean()

def calculate_sd_acceleration(cycle_data):
    resultant_acceleration = np.sqrt(
        cycle_data['X_Accel']**2 + cycle_data['Y_Accel']**2 + cycle_data['Z_Accel']**2
    )
    return resultant_acceleration.std()

def calculate_angular_motion(cycle_data):
    resultant_angular_velocity = np.sqrt(
        cycle_data['X_Gyro']**2 + cycle_data['Y_Gyro']**2 + cycle_data['Z_Gyro']**2
    )
    return np.max(resultant_angular_velocity) - np.min(resultant_angular_velocity)

def calculate_peak_velocity(cycle_data):
    resultant_velocity = np.sqrt(
        cycle_data['X_Gyro']**2 + cycle_data['Y_Gyro']**2 + cycle_data['Z_Gyro']**2
    )
    return np.max(resultant_velocity)

def calculate_velocity(cycle_data):
    resultant_velocity = np.sqrt(
        cycle_data['X_Gyro']**2 + cycle_data['Y_Gyro']**2 + cycle_data['Z_Gyro']**2
    )
    return resultant_velocity.mean()

def calculate_percent_of_cycle(cycle_data, total_inputs):
    return (len(cycle_data) / total_inputs) * 100

def calculate_total_cycle_duration(stance_cycle, swing_cycle):
    # Total number of inputs as a proxy for time
    return len(stance_cycle) + len(swing_cycle)

# Feature extraction and file saving function
# Feature extraction and file saving function
def extract_and_save_features(runthrough_path, output_folder):
    for runthrough in os.listdir(runthrough_path):
        runthrough_folder = os.path.join(runthrough_path, runthrough)
        
        if os.path.isdir(runthrough_folder):
            print(f"Processing {runthrough}...")

            # Create output folder for the runthrough
            runthrough_output_folder = os.path.join(output_folder, runthrough)
            os.makedirs(runthrough_output_folder, exist_ok=True)

            # Process stance and swing phases
            stance_phases = []
            swing_phases = []
            for phase_file in os.listdir(runthrough_folder):
                if phase_file.endswith('.csv'):
                    phase_path = os.path.join(runthrough_folder, phase_file)
                    phase_data = pd.read_csv(phase_path)

                    if 'stance' in phase_file:
                        stance_phases.append(phase_data)
                    elif 'swing' in phase_file:
                        swing_phases.append(phase_data)

            # Extract features for each phase (stance and swing)
            total_inputs = sum(len(phase) for phase in stance_phases + swing_phases)  # Total inputs for the entire cycle

            for i, (stance, swing) in enumerate(zip(stance_phases, swing_phases)):
                total_cycle_duration = calculate_total_cycle_duration(stance, swing)  # Combined feature

                # Stance features
                stance_features = {
                    'GRF': calculate_grf(stance),
                    'StrideTime': calculate_stride_time(stance),
                    'HeelStrike': detect_heel_strike(stance),
                    'MeanAcceleration': calculate_mean_acceleration(stance),
                    'SDAcceleration': calculate_sd_acceleration(stance),
                    'PercentStance': calculate_percent_of_cycle(stance, total_inputs),
                    'GeneralVelocity': calculate_velocity(stance),
                    'TotalCycleDuration': total_cycle_duration,  # Add combined feature
                }
                # Save stance features
                stance_output_file = os.path.join(runthrough_output_folder, f"stance_features_{i+1}.csv")
                pd.DataFrame([stance_features]).to_csv(stance_output_file, index=False)
                print(f"Saved stance features for phase {i + 1} in {runthrough}")

                # Swing features
                swing_features = {
                    'SwingTime': calculate_stride_time(swing),
                    'PeakVelocity': calculate_peak_velocity(swing),
                    'AngularMotion': calculate_angular_motion(swing),
                    'MeanAcceleration': calculate_mean_acceleration(swing),
                    'SDAcceleration': calculate_sd_acceleration(swing),
                    'PercentSwing': calculate_percent_of_cycle(swing, total_inputs),
                    'GeneralVelocity': calculate_velocity(swing),
                    'TotalCycleDuration': total_cycle_duration,  # Add combined feature
                }
                # Save swing features
                swing_output_file = os.path.join(runthrough_output_folder, f"swing_features_{i+1}.csv")
                pd.DataFrame([swing_features]).to_csv(swing_output_file, index=False)
                print(f"Saved swing features for phase {i + 1} in {runthrough}")


In [13]:
# Example directories (adjust as needed)
input_folder = r"C:\Users\diyav\.jupyter\Normalized Data"
output_folder = r"C:\Users\diyav\.jupyter\Feature Data"

# Start feature extraction and saving
extract_and_save_features(input_folder, output_folder)

Processing sub10_2_normal...
Saved stance features for phase 1 in sub10_2_normal
Saved swing features for phase 1 in sub10_2_normal
Saved stance features for phase 2 in sub10_2_normal
Saved swing features for phase 2 in sub10_2_normal
Saved stance features for phase 3 in sub10_2_normal
Saved swing features for phase 3 in sub10_2_normal
Saved stance features for phase 4 in sub10_2_normal
Saved swing features for phase 4 in sub10_2_normal
Saved stance features for phase 5 in sub10_2_normal
Saved swing features for phase 5 in sub10_2_normal
Saved stance features for phase 6 in sub10_2_normal
Saved swing features for phase 6 in sub10_2_normal
Saved stance features for phase 7 in sub10_2_normal
Saved swing features for phase 7 in sub10_2_normal
Saved stance features for phase 8 in sub10_2_normal
Saved swing features for phase 8 in sub10_2_normal
Saved stance features for phase 9 in sub10_2_normal
Saved swing features for phase 9 in sub10_2_normal
Saved stance features for phase 10 in sub10_