In [1]:
import pandas as pd 
import numpy as np
import os 

In [2]:
# analysis folder version  
analysis_version = '008'

# Functions 

In [3]:
# split each partition into own data frame 

def split_by_parition(df): 
    # Identify partition row indices
    partition_indices = df[df[0].str.contains("Partition", na=False)].index.tolist()
    partition_indices

    # Dictionary to store partition DataFrames
    partitions = {}

    # Iterate through partitions and extract data
    for i, start_idx in enumerate(partition_indices):
        partition_name = df.iloc[start_idx, 0]  # Get partition name
        end_idx = partition_indices[i + 1] if i + 1 < len(partition_indices) else len(df)
        
        # Extract rows between partitions (excluding the partition title row)
        partition_df = df.iloc[start_idx + 1:end_idx].dropna(how='all')  # Drop blank rows
        partition_df = partition_df.reset_index()
        # Store in dictionary
        partitions[partition_name] = partition_df

    # set column names as partition line values 
    column_names = partitions['Partition Line'].values.flatten()
    for i, current_partition in enumerate(partitions): 
        if i > 0:
            partitions[current_partition].columns = column_names
            
    # Drop Partition Line column names 
    del partitions['Partition Line']

    return partitions

In [4]:
# calculate difference between first and last partition 

def calc_first_last_partition_diffs(partition_dict, filename): 
    # list keys in dictinary: names of partitions 
    keys_list = list(partition_dict.keys())
    first_key = keys_list[0]
    last_key = keys_list[-1]

    # save first and last partition dfs 
    print('first and last partitions')
    print(first_key)
    print(last_key)
    first_partition = partition_dict[first_key]
    last_partition = partition_dict[last_key]

    # calculate difference between metrics 
    # mean stride time, all strides 
    stride_time_diff = round(last_partition.loc[3, 'Stride Time (sec.)'] -  first_partition.loc[3, 'Stride Time (sec.)'], 3)

    # cadence 
    cadence_diff = round(last_partition.loc[3, 'Cadence (steps/min.)'] -  first_partition.loc[3, 'Cadence (steps/min.)'], 3) 

    # mean stride width, all strides 
    stride_width_diff = round(last_partition.loc[3, 'Stride Width (cm.)'] -  first_partition.loc[3, 'Stride Width (cm.)'], 3)

    # Stance Time 
    stance_time_diff = round(last_partition.loc[3, 'Stance Time (sec.)'] -  first_partition.loc[3, 'Stance Time (sec.)'], 3) 
    
    # stance % 
    stance_per_diff = round(last_partition.loc[3, 'Stance %'] -  first_partition.loc[3, 'Stance %'], 3) 

    # Swing Time 
    swing_time_diff = round(last_partition.loc[3, 'Swing Time (sec.)'] -  first_partition.loc[3, 'Swing Time (sec.)'], 3)

    # Swing % 
    swing_per_diff = round(last_partition.loc[3, 'Swing %'] -  first_partition.loc[3, 'Swing %'], 3) 

    # Single Support (sec.)	
    ssupport_time_diff = round(last_partition.loc[3, 'Single Support (sec.)'] -  first_partition.loc[3, 'Single Support (sec.)'], 3) 
    
    # Single Support % 
    ssupport_per_diff = round(last_partition.loc[3, 'Single Support %'] -  first_partition.loc[3, 'Single Support %'], 3) 
    
    # Initial D. Support (sec.)	
    idsupport_time_diff = round(last_partition.loc[3, 'Initial D. Support (sec.)'] -  first_partition.loc[3, 'Initial D. Support (sec.)'], 3) 
    
    # Initial D. Support %	
    idsupport_per_diff = round(last_partition.loc[3, 'Initial D. Support %'] -  first_partition.loc[3, 'Initial D. Support %'], 3) 
    
    # Terminal D. Support (sec.) 
    termdsupport_time_diff = round(last_partition.loc[3, 'Terminal D. Support (sec.)'] -  first_partition.loc[3, 'Terminal D. Support (sec.)'], 3)
    
    # Terminal D. Support %	
    termdsupport_per_diff = round(last_partition.loc[3, 'Terminal D. Support %'] -  first_partition.loc[3, 'Terminal D. Support %'], 3)
   
    # Total D. Support (sec.)
    totdsupport_time_diff = round(last_partition.loc[3, 'Total D. Support (sec.)'] -  first_partition.loc[3, 'Total D. Support (sec.)'], 3) 
    
    # Total D. Support %
    totdsupport_per_diff = round(last_partition.loc[3, 'Total D. Support %'] -  first_partition.loc[3, 'Total D. Support %'], 3) 


    # combine all metric diffs 
    metric_diffs_df = ()
    metric_diffs_df = pd.DataFrame(data = {'filename' : [filename], 
                                           'first_partition' : [first_key], 
                                           'last_partition' : [last_key], 
                                           'mean_stride_time_diff': [stride_time_diff], 
                                           'mean_cadence_diff' : [cadence_diff], 
                                           'mean_stride_width_diff' : [stride_width_diff], 
                                           'mean_stance_time_diff' : [stance_time_diff], 
                                           'mean_stance_per_diff' : [stance_per_diff], 
                                           'mean_swing_time_diff' : [swing_time_diff], 
                                           'mean_swing_per_diff' : [swing_per_diff], 
                                           'mean_single_support_time_diff' : [ssupport_time_diff], 
                                           'mean_single_support_per_diff' : [ssupport_per_diff], 
                                           'mean_ini_d_support_time_diff' : [idsupport_time_diff], 
                                           'mean_ini_d_support_per_diff' : [idsupport_per_diff], 
                                           'mean_term_d_support_time_diff' : [termdsupport_time_diff], 
                                           'mean_term_d_support_per_diff' : [termdsupport_per_diff], 
                                           'mean_tot_d_support_time_diff' : [totdsupport_time_diff], 
                                           'mean_tot_d_support_per_diff' : [totdsupport_per_diff]                               
                                          }) 
                                           
    
    return metric_diffs_df

# Load data and set output Folders 

In [5]:
hc_out_path = os.path.join(r'C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_bw_zeno_home_analysis',
                           analysis_version, 
                           '002_video_vs_mat_metrics', 
                           'a_zeno_pass_by_pass', 
                           'healthy_controls')

if not os.path.exists(hc_out_path): 
    os.makedirs(hc_out_path)

## Folder with Pass by Pass partitioned Zeno data 

### Healthy Controls 

In [6]:
# healthy control pass by pass 
zeno_pass_by_pass_input = r"C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_zeno_metrics_by_pass\healthy_controls"

# Calculate difference between first and last pass (partition)

In [7]:
all_diffs = []

for (dir_path, dir_names, file_names) in os.walk(zeno_pass_by_pass_input):
    for file_name in file_names: 
        name, ext = os.path.splitext(file_name)
        ext = ext.lower()[1:]
        current_in_path = os.path.join(dir_path, file_name) # full path to files 
        # save full path to data frame 
        if (ext == 'xlsx') & ('Notes' not in current_in_path): # if excel file and not in notes folder 
            print(current_in_path) 
            current_df = pd.read_excel(current_in_path, header = None) 
            
            # separate file into partitions 
            current_partitions = split_by_parition(current_df)

            # calculate diff between first and last parition
            current_diffs_df = calc_first_last_partition_diffs(current_partitions, name) 
            all_diffs = all_diffs + [current_diffs_df] 

all_diffs_df = pd.concat(all_diffs, ignore_index = True) 
all_diffs_df.to_csv(os.path.join(hc_out_path, 'all_metric_diff_last_first_partition.csv'))
all_diffs_df.head()

C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_zeno_metrics_by_pass\healthy_controls\BW_0067_2022_11_28_FW.xlsx
first and last partitions
Partition: 1
Partition: 4
C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_zeno_metrics_by_pass\healthy_controls\BW_0067_2022_11_28_PWS.xlsx
first and last partitions
Partition: 1
Partition: 4
C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_zeno_metrics_by_pass\healthy_controls\BW_0086_2024_11_12_FW.xlsx
first and last partitions
Partition: 1
Partition: 3
C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_zeno_metrics_by_pass\healthy_controls\BW_0086_2024_11_12_PWS.xlsx
first and last partitions
Partition: 1
Partition: 4
C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_zeno_metrics_by_pass\healthy_controls\BW_0121_2024_12_17_FW.xlsx
first and last partitions
Partition: 1
Partition: 4
C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_a

Unnamed: 0,filename,first_partition,last_partition,mean_stride_time_diff,mean_cadence_diff,mean_stride_width_diff,mean_stance_time_diff,mean_stance_per_diff,mean_swing_time_diff,mean_swing_per_diff,mean_single_support_time_diff,mean_single_support_per_diff,mean_ini_d_support_time_diff,mean_ini_d_support_per_diff,mean_term_d_support_time_diff,mean_term_d_support_per_diff,mean_tot_d_support_time_diff,mean_tot_d_support_per_diff
0,BW_0067_2022_11_28_FW,Partition: 1,Partition: 4,0.002,0.463,-2.687,-0.004,-0.068,0.001,0.068,0.001,-0.839,0.007,0.594,0.007,0.936,0.012,0.943
1,BW_0067_2022_11_28_PWS,Partition: 1,Partition: 4,0.168,-35.785,-3.88,0.115,9.369,0.016,-9.369,0.016,0.401,0.005,0.125,0.005,-2.682,0.005,-0.447
2,BW_0086_2024_11_12_FW,Partition: 1,Partition: 3,0.159,-17.678,-6.397,0.051,-1.838,0.109,1.838,0.007,-3.01,0.021,1.305,0.021,1.234,0.039,2.073
3,BW_0086_2024_11_12_PWS,Partition: 1,Partition: 4,0.063,-4.624,2.252,0.035,0.697,0.013,-0.697,0.013,-0.733,0.017,0.704,0.017,0.773,0.037,1.582
4,BW_0121_2024_12_17_FW,Partition: 1,Partition: 4,-0.004,0.447,-2.131,0.008,-0.472,0.003,0.472,0.003,0.405,0.0,-0.576,0.0,-0.352,-0.001,-0.605


In [8]:
# preferred walking speed 
pws_metric_diffs = all_diffs_df.loc[all_diffs_df['filename'].str.contains('PWS')]

# mean diff of each metric (numeric column_ 
pws_mean_diff = pws_metric_diffs.mean(axis = 0, numeric_only = True) 
pws_mean_diff= round(pws_mean_diff, 2) 
#pws_mean_diff.to_csv(os.path.join(hc_out_path, 'pws_zeno_partitions_mean_diff.csv')) 

# mean abs diff of each metric
pws_metric_abs_diffs  = pws_metric_diffs.iloc[:, 3:].abs()
pws_mean_abs_diff = pws_metric_abs_diffs.mean(axis = 0, numeric_only = True)
pws_mean_abs_diff= round(pws_mean_abs_diff, 2)
#pws_mean_abs_diff.to_csv(os.path.join(hc_out_path, 'pws_zeno_partitions_mean_abs_diff.csv'))

# concat and save 
pws_diffs_all = pd.concat([pws_mean_diff, pws_mean_abs_diff], axis = 1)
pws_diffs_all.columns = ['mean_diff', 'mean_abs_diff']
pws_diffs_all['mean_and_mean_abs'] = pws_diffs_all['mean_diff'].astype(str) + ', ' + pws_diffs_all['mean_abs_diff'].astype(str)
pws_diffs_all.to_csv(os.path.join(hc_out_path, 'pws_zeno_partitions_mean_diffs_all.csv'))
pws_diffs_all.head()

Unnamed: 0,mean_diff,mean_abs_diff,mean_and_mean_abs
mean_stride_time_diff,0.01,0.08,"0.01, 0.08"
mean_cadence_diff,-4.27,10.68,"-4.27, 10.68"
mean_stride_width_diff,0.42,2.21,"0.42, 2.21"
mean_stance_time_diff,0.02,0.05,"0.02, 0.05"
mean_stance_per_diff,1.95,2.51,"1.95, 2.51"


In [9]:
pws_mean_diff

mean_stride_time_diff            0.01
mean_cadence_diff               -4.27
mean_stride_width_diff           0.42
mean_stance_time_diff            0.02
mean_stance_per_diff             1.95
mean_swing_time_diff            -0.01
mean_swing_per_diff             -1.95
mean_single_support_time_diff    0.00
mean_single_support_per_diff    -0.23
mean_ini_d_support_time_diff     0.00
mean_ini_d_support_per_diff     -0.03
mean_term_d_support_time_diff    0.00
mean_term_d_support_per_diff    -0.15
mean_tot_d_support_time_diff     0.01
mean_tot_d_support_per_diff      0.82
dtype: float64

In [10]:
pws_mean_abs_diff

mean_stride_time_diff             0.08
mean_cadence_diff                10.68
mean_stride_width_diff            2.21
mean_stance_time_diff             0.05
mean_stance_per_diff              2.51
mean_swing_time_diff              0.03
mean_swing_per_diff               2.51
mean_single_support_time_diff     0.01
mean_single_support_per_diff      0.64
mean_ini_d_support_time_diff      0.01
mean_ini_d_support_per_diff       0.84
mean_term_d_support_time_diff     0.01
mean_term_d_support_per_diff      1.00
mean_tot_d_support_time_diff      0.02
mean_tot_d_support_per_diff       1.19
dtype: float64

In [11]:
# Fast Walking Speed 
fw_metric_diffs = all_diffs_df.loc[all_diffs_df['filename'].str.contains('FW')]

# mean diff of each metric (numeric column_ 
fw_mean_diff = fw_metric_diffs.mean(axis = 0, numeric_only = True) 
fw_mean_diff= round(fw_mean_diff, 2) 
#fw_mean_diff.to_csv(os.path.join(hc_out_path, 'fw_zeno_partitions_mean_diff.csv')) 

# mean abs diff of each metric
fw_metric_abs_diffs  = fw_metric_diffs.iloc[:, 3:].abs()
fw_mean_abs_diff = fw_metric_abs_diffs.mean(axis = 0, numeric_only = True)
fw_mean_abs_diff= round(fw_mean_abs_diff, 2)
#fw_mean_abs_diff.to_csv(os.path.join(hc_out_path, 'fw_zeno_partitions_mean_abs_diff.csv'))

# concat and save 
fw_diffs_all = pd.concat([fw_mean_diff, fw_mean_abs_diff], axis = 1)
fw_diffs_all.columns = ['mean_diff', 'mean_abs_diff']
fw_diffs_all['mean_and_mean_abs'] = fw_diffs_all['mean_diff'].astype(str) + ', ' + fw_diffs_all['mean_abs_diff'].astype(str)
fw_diffs_all.to_csv(os.path.join(hc_out_path, 'fw_zeno_partitions_mean_diffs_all.csv'))
fw_diffs_all.head()

Unnamed: 0,mean_diff,mean_abs_diff,mean_and_mean_abs
mean_stride_time_diff,-0.02,0.07,"-0.02, 0.07"
mean_cadence_diff,2.34,7.72,"2.34, 7.72"
mean_stride_width_diff,2.12,4.88,"2.12, 4.88"
mean_stance_time_diff,0.01,0.02,"0.01, 0.02"
mean_stance_per_diff,1.49,2.08,"1.49, 2.08"


### MS pass by pass variability - maybe add later