In [1]:
# 1/21/2025: summary metrics of videos included in analysis - video duration, fps, etc 

In [2]:
import os
import pandas as pd 
import numpy as np 

## Load data and folders

In [3]:
version = '004'

In [4]:
# input paths - metrics merged with brainwalk data 
# zeno fast walk 
zv_fw_w_bw_path = os.path.join(r'C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_bw_zeno_home_analysis',
                               version,
                               'zv_bw_merged_gait_vertical_FW_1.csv') 
zv_fw_w_bw_df = pd.read_csv(zv_fw_w_bw_path, index_col = 0) 

# zeno preferred walking speed 
zv_pws_w_bw_path = os.path.join(r'C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_bw_zeno_home_analysis',
                               version,
                               'zv_bw_merged_gait_vertical_PWS_1.csv') 
zv_pws_w_bw_df = pd.read_csv(zv_pws_w_bw_path, index_col = 0)

# home video 
hv_all_metrics_path = os.path.join(r'C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_bw_zeno_home_analysis',
                                   version,
                                   'hv_bw_merged.csv') 
hv_all_metrics_df = pd.read_csv(hv_all_metrics_path, index_col = 0)  

In [5]:
# output path 
output_path = os.path.join(r'C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_bw_zeno_home_analysis',
                           version,
                           'video_segment_metric_summary')

if not os.path.exists(output_path): 
    os.makedirs(output_path)

## Functions 

In [6]:
# function - summarize one column
def video_one_col_stats(df, column_name):
    col_count = round(df[column_name].count(), 2)
    col_mean = round(df[column_name].mean(), 2)
    col_median = round(df[column_name].median(), 2)
    col_max = round(df[column_name].max(), 2)
    col_min = round(df[column_name].min(), 2) 
    col_std = round(df[column_name].std(), 2) 
    col_q1 = df[column_name].quantile(0.25)
    col_q3 = df[column_name].quantile(0.75) 
    col_iqr = round(col_q3 - col_q1, 2) 

    col_stats = {'Value' : column_name,
                 'Count' : [col_count],
                 'Mean' : [col_mean],
                 'Median' : [col_median], 
                 'Maximum' : [col_max],
                 'Minimum' : [col_min],
                 'S.D.' : [col_std],
                 'IQR' : [col_iqr]
                 } 
    
    col_stats_df = pd.DataFrame(col_stats) 

    return col_stats_df 


In [7]:
# function - counts and % of each video 
def col_counts_and_percent(df, column_name): 
    df_n_value_counts = pd.DataFrame(df[column_name].value_counts()) 
    df_n_value_counts_per = pd.DataFrame(round(df[column_name].value_counts(normalize = True) * 100, 1)) 

    counts_per_df = pd.concat([df_n_value_counts, df_n_value_counts_per], axis = 1) 
    counts_per_df = counts_per_df.sort_index()

    return counts_per_df

In [8]:
# function - % of each video in which metrics were calculated 
def calculate_per_vid_w_metric(df): 
    all_cols_per = [] 
    
    # total number of videos included 
    df_len = len(df) 
    
    for col_i, current_col in enumerate(df.columns): 
        # number of values in this column 
        current_col_count = df[current_col].count()

        # % of videos (rows) with data 
        current_col_per = (current_col_count / df_len) * 100 
        current_col_per = round(current_col_per, 2) 

        output = {"column_name": current_col,
                  'num_rows' : df_len, 
                  'rows_with_data' : current_col_count,
                  "percent_with_data": current_col_per
                 }

        all_cols_per.append(output)

    all_cols_per_df = pd.DataFrame(all_cols_per)
    
    # prettier one for tables 
    filter_values = [
        'stride_time_mean_sec_pose',
        'mean_cadence_step_per_min_pose',
        'stride_width_mean_cm_pose',
        'foot1_gait_cycle_time_mean_pose']

    clean_names = ['Stride Time',
                   'Cadence',
                   'Stride Width',
                   'Gait Cycle: Stance, Swing, and Support']

    # Filter rows where 'column_name' matches any value in the list
    filtered_df = all_cols_per_df.loc[all_cols_per_df['column_name'].str.contains('|'.join(filter_values))]
    filtered_df = filtered_df[['column_name', 'percent_with_data']]
    
    filtered_df['Metric'] = clean_names 
    filtered_df.rename(columns={'percent_with_data': 'Videos with Metric Calculated (%)'}, 
                                     inplace=True)
    filtered_df = filtered_df[['Metric', 'Videos with Metric Calculated (%)', 'column_name']] 

    return all_cols_per_df, filtered_df
        

## split zeno vid dataframes by HC vs MS participants  

In [9]:
zv_fw_w_bw_df.head()

Unnamed: 0,id_date_pose_zv,video_id_date_name_pose_zv,task_pose_zv,frames_per_second_pose_zv,total_video_duration_sec_pose_zv,walking_segmets_n_pose_zv,walking_segments_duration_mean_pose_zv,walking_segments_duration_median_pose_zv,stride_time_mean_sec_pose_zv,stride_time_median_sec_pose_zv,...,FW_stridetimesecmean,FW_stridewidthcmmean,FW_stridewidthcmsd,FW_totaldsupportmean,FW_totaldsupportratiolr,demographic_diagnosis,edss_severity_num,edss_severity_cat,t25fw_group_num,t25fw_group_cat
0,BW-0002\2023_09_12,gait_vertical_FW_1_BW-0002_2023_09_12,gait_vertical_FW_1,30,36.5,2,5.23,5.23,1.007,1.067,...,1.158,13.909,3.749,30.466,1.003,MS,2.0,moderate,2.0,6_to_8
1,BW-0004\2022_09_19,gait_vertical_FW_1_BW-0004_2022_09_19,gait_vertical_FW_1,30,25.3,2,2.47,2.47,1.433,1.333,...,0.891,7.098,4.085,20.45,0.992,MS,2.0,moderate,1.0,under_6
2,BW-0006\2022_09_26,gait_vertical_FW_1_BW-0006_2022_09_26,gait_vertical_FW_1,30,24.97,1,4.87,4.87,0.957,0.967,...,0.99,7.547,3.187,26.88,0.984,MS,2.0,moderate,1.0,under_6
3,BW-0007\2022_09_27,gait_vertical_FW_1_BW-0007_2022_09_27,gait_vertical_FW_1,30,22.63,4,3.48,3.47,1.027,0.933,...,0.959,11.938,3.076,24.046,1.046,MS,2.0,moderate,1.0,under_6
4,BW-0007\2024_06_10,gait_vertical_FW_1_BW-0007_2024_06_10,gait_vertical_FW_1,30,23.0,4,4.79,4.97,1.028,1.0,...,0.987,10.964,3.005,27.088,1.017,MS,1.0,mild,1.0,under_6


In [10]:
# zv all metrics 

# fast walk 
print('fast walk') 
zv_fw_MS_w_bw_df = zv_fw_w_bw_df.loc[zv_fw_w_bw_df['demographic_diagnosis'] == 'MS']
print('------') 
print(zv_fw_MS_w_bw_df['demographic_diagnosis'].value_counts())
print(zv_fw_MS_w_bw_df['task_pose_zv'].value_counts())

zv_fw_HC_w_bw_df = zv_fw_w_bw_df.loc[zv_fw_w_bw_df['demographic_diagnosis'] == 'HC']
print('------') 
print(zv_fw_HC_w_bw_df['demographic_diagnosis'].value_counts())
print(zv_fw_HC_w_bw_df['task_pose_zv'].value_counts())
    
# preferred walk 
print('preferred walking speed') 
zv_pws_MS_w_bw_df = zv_pws_w_bw_df.loc[zv_pws_w_bw_df['demographic_diagnosis'] == 'MS']
print('------') 
print(zv_pws_MS_w_bw_df['demographic_diagnosis'].value_counts())
print(zv_pws_MS_w_bw_df['task_pose_zv'].value_counts())

zv_pws_HC_w_bw_df = zv_pws_w_bw_df.loc[zv_pws_w_bw_df['demographic_diagnosis'] == 'HC']
print('------') 
print(zv_pws_HC_w_bw_df['demographic_diagnosis'].value_counts())
print(zv_pws_HC_w_bw_df['task_pose_zv'].value_counts())

fast walk
------
demographic_diagnosis
MS    205
Name: count, dtype: int64
task_pose_zv
gait_vertical_FW_1    205
Name: count, dtype: int64
------
demographic_diagnosis
HC    39
Name: count, dtype: int64
task_pose_zv
gait_vertical_FW_1    39
Name: count, dtype: int64
preferred walking speed
------
demographic_diagnosis
MS    215
Name: count, dtype: int64
task_pose_zv
gait_vertical_PWS_1    215
Name: count, dtype: int64
------
demographic_diagnosis
HC    36
Name: count, dtype: int64
task_pose_zv
gait_vertical_PWS_1    36
Name: count, dtype: int64


## split home vid dataframes by task 

In [11]:
# home videos 
# right 
hv_r_df = hv_all_metrics_df.loc[hv_all_metrics_df['task_pose_hv'] == 'gait_vertical_right']
print(hv_r_df['task_pose_hv'].value_counts())
# left 
hv_l_df = hv_all_metrics_df.loc[hv_all_metrics_df['task_pose_hv'] == 'gait_vertical_left']
print(hv_l_df['task_pose_hv'].value_counts())

task_pose_hv
gait_vertical_right    31
Name: count, dtype: int64
task_pose_hv
gait_vertical_left    30
Name: count, dtype: int64


## Run Functions 

### Zeno Videos - Fast Walk 

In [12]:
# HC - Fast Walk 
# summary statistics ----------------------------------
# calculate 
zv_fw_HC_duration_summary_df = video_one_col_stats(zv_fw_HC_w_bw_df, 'total_video_duration_sec_pose_zv')
zv_fw_HC_fps_summary_df = video_one_col_stats(zv_fw_HC_w_bw_df, 'frames_per_second_pose_zv')
zv_fw_HC_segment_n_summary_df = video_one_col_stats(zv_fw_HC_w_bw_df, 'walking_segmets_n_pose_zv') 
zv_fw_HC_segment_duration_summary_df = video_one_col_stats(zv_fw_HC_w_bw_df, 'walking_segments_duration_mean_pose_zv')

# compile + save 
zv_fw_HC_stats_df = pd.concat([zv_fw_HC_duration_summary_df, 
                                 zv_fw_HC_fps_summary_df, 
                                 zv_fw_HC_segment_n_summary_df, 
                                 zv_fw_HC_segment_duration_summary_df]) 

zv_fw_HC_stats_df.to_csv(os.path.join(output_path, 
                                    'zv_fw_HC_video_segment_stats.csv'))                       

# categorical - counts and % -----------------------
zv_fw_HC_fps_counts_df = col_counts_and_percent(zv_fw_HC_w_bw_df, 'frames_per_second_pose_zv')
zv_fw_HC_segment_counts_df = col_counts_and_percent(zv_fw_HC_w_bw_df, 'walking_segmets_n_pose_zv')

# save 
zv_fw_HC_fps_counts_df.to_csv(os.path.join(output_path, 
                                         'zv_fw_HC_fps_counts.csv')) 

zv_fw_HC_segment_counts_df.to_csv(os.path.join(output_path, 
                                             'zv_fw_HC_segment_counts.csv'))  

# % of videos with each video metric calculated --------------------
zv_fw_HC_cols_per_df, zv_fw_HC_filt_cols_per_df = calculate_per_vid_w_metric(zv_fw_HC_w_bw_df)
zv_fw_HC_cols_per_df.to_csv(os.path.join(output_path, 
                                       'zv_fw_HC_percent_w_data.csv'))
zv_fw_HC_filt_cols_per_df.to_csv(os.path.join(output_path, 
                                                'zv_fw_HC_filtered_percent_w_data.csv'))

In [13]:
# MS - Fast Walk 
# summary statistics ----------------------------------
# calculate 
zv_fw_MS_duration_summary_df = video_one_col_stats(zv_fw_MS_w_bw_df, 'total_video_duration_sec_pose_zv')
zv_fw_MS_fps_summary_df = video_one_col_stats(zv_fw_MS_w_bw_df, 'frames_per_second_pose_zv')
zv_fw_MS_segment_n_summary_df = video_one_col_stats(zv_fw_MS_w_bw_df, 'walking_segmets_n_pose_zv') 
zv_fw_MS_segment_duration_summary_df = video_one_col_stats(zv_fw_MS_w_bw_df, 'walking_segments_duration_mean_pose_zv')

# compile + save 
zv_fw_MS_stats_df = pd.concat([zv_fw_MS_duration_summary_df, 
                             zv_fw_MS_fps_summary_df, 
                             zv_fw_MS_segment_n_summary_df, 
                             zv_fw_MS_segment_duration_summary_df]) 

zv_fw_MS_stats_df.to_csv(os.path.join(output_path, 
                                    'zv_fw_MS_video_segment_stats.csv'))                       

# categorical - counts and % -----------------------
zv_fw_MS_fps_counts_df = col_counts_and_percent(zv_fw_MS_w_bw_df, 'frames_per_second_pose_zv')
zv_fw_MS_segment_counts_df = col_counts_and_percent(zv_fw_MS_w_bw_df, 'walking_segmets_n_pose_zv')

# save 
zv_fw_MS_fps_counts_df.to_csv(os.path.join(output_path, 
                                         'zv_fw_MS_fps_counts.csv')) 

zv_fw_MS_segment_counts_df.to_csv(os.path.join(output_path, 
                                             'zv_fw_MS_segment_counts.csv'))  

# % of videos with each video metric calculated --------------------
zv_fw_MS_cols_per_df, zv_fw_MS_filt_cols_per_df = calculate_per_vid_w_metric(zv_fw_MS_w_bw_df)
zv_fw_MS_cols_per_df.to_csv(os.path.join(output_path, 
                                       'zv_fw_MS_percent_w_data.csv'))
zv_fw_MS_filt_cols_per_df.to_csv(os.path.join(output_path, 
                                                'zv_fw_MS_filtered_percent_w_data.csv'))


### Zeno Videos - Preferred Walk 

In [14]:
# HC - preferred walk 
# summary statistics ----------------------------------
# calculate 
zv_pws_HC_duration_summary_df = video_one_col_stats(zv_pws_HC_w_bw_df, 'total_video_duration_sec_pose_zv')
zv_pws_HC_fps_summary_df = video_one_col_stats(zv_pws_HC_w_bw_df, 'frames_per_second_pose_zv')
zv_pws_HC_segment_n_summary_df = video_one_col_stats(zv_pws_HC_w_bw_df, 'walking_segmets_n_pose_zv') 
zv_pws_HC_segment_duration_summary_df = video_one_col_stats(zv_pws_HC_w_bw_df, 'walking_segments_duration_mean_pose_zv')

# compile + save 
zv_pws_HC_stats_df = pd.concat([zv_pws_HC_duration_summary_df, 
                             zv_pws_HC_fps_summary_df, 
                             zv_pws_HC_segment_n_summary_df, 
                             zv_pws_HC_segment_duration_summary_df]) 

zv_pws_HC_stats_df.to_csv(os.path.join(output_path, 
                                    'zv_pws_HC_video_segment_stats.csv'))                       

# categorical - counts and % -----------------------
zv_pws_HC_fps_counts_df = col_counts_and_percent(zv_pws_HC_w_bw_df, 'frames_per_second_pose_zv')
zv_pws_HC_segment_counts_df = col_counts_and_percent(zv_pws_HC_w_bw_df, 'walking_segmets_n_pose_zv')

# save 
zv_pws_HC_fps_counts_df.to_csv(os.path.join(output_path, 
                                         'zv_pws_HC_fps_counts.csv')) 

zv_pws_HC_segment_counts_df.to_csv(os.path.join(output_path, 
                                             'zv_pws_HC_segment_counts.csv'))  

# % of videos with each video metric calculated --------------------
zv_pws_HC_cols_per_df, zv_pws_HC_filt_cols_per_df = calculate_per_vid_w_metric(zv_pws_HC_w_bw_df)
zv_pws_HC_cols_per_df.to_csv(os.path.join(output_path, 
                                       'zv_pws_HC_percent_w_data.csv'))
zv_pws_HC_filt_cols_per_df.to_csv(os.path.join(output_path, 
                                                'zv_pws_HC_filtered_percent_w_data.csv'))

In [15]:
# HC - preferred walk 
# summary statistics ----------------------------------
# calculate 
zv_pws_MS_duration_summary_df = video_one_col_stats(zv_pws_MS_w_bw_df, 'total_video_duration_sec_pose_zv')
zv_pws_MS_fps_summary_df = video_one_col_stats(zv_pws_MS_w_bw_df, 'frames_per_second_pose_zv')
zv_pws_MS_segment_n_summary_df = video_one_col_stats(zv_pws_MS_w_bw_df, 'walking_segmets_n_pose_zv') 
zv_pws_MS_segment_duration_summary_df = video_one_col_stats(zv_pws_MS_w_bw_df, 'walking_segments_duration_mean_pose_zv')

# compile + save 
zv_pws_MS_stats_df = pd.concat([zv_pws_MS_duration_summary_df, 
                             zv_pws_MS_fps_summary_df, 
                             zv_pws_MS_segment_n_summary_df, 
                             zv_pws_MS_segment_duration_summary_df]) 

zv_pws_MS_stats_df.to_csv(os.path.join(output_path, 
                                    'zv_pws_MS_video_segment_stats.csv'))                       

# categorical - counts and % -----------------------
zv_pws_MS_fps_counts_df = col_counts_and_percent(zv_pws_MS_w_bw_df, 'frames_per_second_pose_zv')
zv_pws_MS_segment_counts_df = col_counts_and_percent(zv_pws_MS_w_bw_df, 'walking_segmets_n_pose_zv')

# save 
zv_pws_MS_fps_counts_df.to_csv(os.path.join(output_path, 
                                         'zv_pws_MS_fps_counts.csv')) 

zv_pws_MS_segment_counts_df.to_csv(os.path.join(output_path, 
                                             'zv_pws_MS_segment_counts.csv'))  

# % of videos with each video metric calculated --------------------
zv_pws_MS_cols_per_df, zv_pws_MS_filt_cols_per_df = calculate_per_vid_w_metric(zv_pws_MS_w_bw_df)
zv_pws_MS_cols_per_df.to_csv(os.path.join(output_path, 
                                       'zv_pws_MS_percent_w_data.csv'))
zv_pws_MS_filt_cols_per_df.to_csv(os.path.join(output_path, 
                                                'zv_pws_MS_filtered_percent_w_data.csv'))

### Home Videos 

In [16]:
# all home videos 
# summary statistics ----------------------------------
# calculate 
hv_all_duration_summary_df = video_one_col_stats(hv_all_metrics_df, 'total_video_duration_sec_pose_hv')
hv_all_fps_summary_df = video_one_col_stats(hv_all_metrics_df, 'frames_per_second_pose_hv')
hv_all_segment_n_summary_df = video_one_col_stats(hv_all_metrics_df, 'walking_segmets_n_pose_hv') 
hv_all_segment_duration_summary_df = video_one_col_stats(hv_all_metrics_df, 'walking_segments_duration_mean_pose_hv')

# compile + save 
hv_all_stats_df = pd.concat([hv_all_duration_summary_df, 
                             hv_all_fps_summary_df, 
                             hv_all_segment_n_summary_df, 
                             hv_all_segment_duration_summary_df]) 

hv_all_stats_df.to_csv(os.path.join(output_path, 
                                    'hv_all_video_segment_stats.csv'))                       

# categorical - counts and % -----------------------
hv_all_fps_counts_df = col_counts_and_percent(hv_all_metrics_df, 'frames_per_second_pose_hv')
hv_all_segment_counts_df = col_counts_and_percent(hv_all_metrics_df, 'walking_segmets_n_pose_hv')

# save 
hv_all_fps_counts_df.to_csv(os.path.join(output_path, 
                                         'hv_all_fps_counts.csv')) 

hv_all_segment_counts_df.to_csv(os.path.join(output_path, 
                                             'hv_all_segment_counts.csv'))  

# % of videos with each video metric calculated --------------------
hv_all_cols_per_df, hv_all_filt_cols_per_df = calculate_per_vid_w_metric(hv_all_metrics_df)
hv_all_cols_per_df.to_csv(os.path.join(output_path, 
                                       'hv_all_percent_w_data.csv'))

hv_all_filt_cols_per_df.to_csv(os.path.join(output_path, 
                                                'hv_all_filtered_percent_w_data.csv'))

In [17]:
# right turn only 
# summary statistics ----------------------------------
# calculate 
hv_r_duration_summary_df = video_one_col_stats(hv_r_df, 'total_video_duration_sec_pose_hv')
hv_r_fps_summary_df = video_one_col_stats(hv_r_df, 'frames_per_second_pose_hv')
hv_r_segment_n_summary_df = video_one_col_stats(hv_r_df, 'walking_segmets_n_pose_hv') 
hv_r_segment_duration_summary_df = video_one_col_stats(hv_r_df, 'walking_segments_duration_mean_pose_hv')

# compile + save 
hv_r_stats_df = pd.concat([hv_r_duration_summary_df, 
                             hv_r_fps_summary_df, 
                             hv_r_segment_n_summary_df, 
                             hv_r_segment_duration_summary_df]) 

hv_r_stats_df.to_csv(os.path.join(output_path, 
                                    'hv_r_video_segment_stats.csv'))                       

# categorical - counts and % -----------------------
hv_r_fps_counts_df = col_counts_and_percent(hv_r_df, 'frames_per_second_pose_hv')
hv_r_segment_counts_df = col_counts_and_percent(hv_r_df, 'walking_segmets_n_pose_hv')

# save 
hv_r_fps_counts_df.to_csv(os.path.join(output_path, 
                                         'hv_r_fps_counts.csv')) 

hv_r_segment_counts_df.to_csv(os.path.join(output_path, 
                                             'hv_r_segment_counts.csv'))  

# % of videos with each video metric calculated --------------------
hv_r_cols_per_df, hv_r_filt_cols_per_df = calculate_per_vid_w_metric(hv_r_df)
hv_r_cols_per_df.to_csv(os.path.join(output_path, 
                                       'hv_r_percent_w_data.csv'))

hv_r_filt_cols_per_df.to_csv(os.path.join(output_path, 
                                                'hv_r_filtered_percent_w_data.csv'))

In [18]:
# left turn only 
# summary statistics ----------------------------------
# calculate 
hv_l_duration_summary_df = video_one_col_stats(hv_l_df, 'total_video_duration_sec_pose_hv')
hv_l_fps_summary_df = video_one_col_stats(hv_l_df, 'frames_per_second_pose_hv')
hv_l_segment_n_summary_df = video_one_col_stats(hv_l_df, 'walking_segmets_n_pose_hv') 
hv_l_segment_duration_summary_df = video_one_col_stats(hv_l_df, 'walking_segments_duration_mean_pose_hv')

# compile + save 
hv_l_stats_df = pd.concat([hv_l_duration_summary_df, 
                             hv_l_fps_summary_df, 
                             hv_l_segment_n_summary_df, 
                             hv_l_segment_duration_summary_df]) 

hv_l_stats_df.to_csv(os.path.join(output_path, 
                                    'hv_l_video_segment_stats.csv'))                       

# categorical - counts and % -----------------------
hv_l_fps_counts_df = col_counts_and_percent(hv_l_df, 'frames_per_second_pose_hv')
hv_l_segment_counts_df = col_counts_and_percent(hv_l_df, 'walking_segmets_n_pose_hv')

# save 
hv_l_fps_counts_df.to_csv(os.path.join(output_path, 
                                         'hv_l_fps_counts.csv')) 

hv_l_segment_counts_df.to_csv(os.path.join(output_path, 
                                             'hv_l_segment_counts.csv'))  

# % of videos with each video metric calculated --------------------
hv_l_cols_per_df, hv_l_filt_cols_per_df = calculate_per_vid_w_metric(hv_l_df)
hv_l_cols_per_df.to_csv(os.path.join(output_path, 
                                       'hv_l_percent_w_data.csv'))

hv_l_filt_cols_per_df.to_csv(os.path.join(output_path, 
                                                'hv_l_filtered_percent_w_data.csv'))

In [19]:
hv_l_segment_counts_df.sort_index()

Unnamed: 0_level_0,count,proportion
walking_segmets_n_pose_hv,Unnamed: 1_level_1,Unnamed: 2_level_1
1,6,20.0
2,4,13.3
3,5,16.7
4,7,23.3
5,5,16.7
6,2,6.7
8,1,3.3
