In [1]:
# 1/21/2025: summary metrics of videos included in analysis - video duration, fps, etc 

In [2]:
import os
import pandas as pd 
import numpy as np 

In [3]:
version = '004'

In [4]:
# input paths - output of pose analysis 
# video metrics before merging with brainwalk database 
zv_all_metrics_path = os.path.join(r'C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code', 
                                  'gait_bw_zeno_outputs_' + version, 
                                  'gait_bw_zeno_outputs_' + version + '_pose_metrics_all.csv') 
zv_all_metrics_df = pd.read_csv(zv_all_metrics_path, index_col = 0) 

hv_all_metrics_path = os.path.join(r'C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code', 
                                  'gait_bw_home_outputs_' + version, 
                                  'gait_bw_home_outputs_' + version + '_pose_metrics_all.csv') 
hv_all_metrics_df = pd.read_csv(hv_all_metrics_path, index_col = 0)  

In [5]:
# output path 
output_path = os.path.join(r'C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_bw_zeno_home_analysis',
                           version,
                           'video_segment_metric_summary')

if not os.path.exists(output_path): 
    os.makedirs(output_path)

## Functions 

In [6]:
# function - summarize one column
def video_one_col_stats(df, column_name):
    col_count = round(df[column_name].count(), 2)
    col_mean = round(df[column_name].mean(), 2)
    col_median = round(df[column_name].median(), 2)
    col_max = round(df[column_name].max(), 2)
    col_min = round(df[column_name].min(), 2) 
    col_std = round(df[column_name].std(), 2) 
    col_q1 = df[column_name].quantile(0.25)
    col_q3 = df[column_name].quantile(0.75) 
    col_iqr = round(col_q3 - col_q1, 2) 

    col_stats = {'Value' : column_name,
                 'Count' : [col_count],
                 'Mean' : [col_mean],
                 'Median' : [col_median], 
                 'Maximum' : [col_max],
                 'Minimum' : [col_min],
                 'S.D.' : [col_std],
                 'IQR' : [col_iqr]
                 } 
    
    col_stats_df = pd.DataFrame(col_stats) 

    return col_stats_df 


In [7]:
# function - counts and % of each video 
def col_counts_and_percent(df, column_name): 
    df_n_value_counts = pd.DataFrame(df[column_name].value_counts()) 
    df_n_value_counts_per = pd.DataFrame(round(df[column_name].value_counts(normalize = True) * 100, 1)) 

    counts_per_df = pd.concat([df_n_value_counts, df_n_value_counts_per], axis = 1) 

    return counts_per_df

In [8]:
# function - % of each video in which metrics were calculated 
def calculate_per_vid_w_metric(df): 
    all_cols_per = [] 
    
    # total number of videos included 
    df_len = len(df) 
    
    for col_i, current_col in enumerate(df.columns): 
        # number of values in this column 
        current_col_count = df[current_col].count()

        # % of videos (rows) with data 
        current_col_per = (current_col_count / df_len) * 100 
        current_col_per = round(current_col_per, 2) 

        output = {"column_name": current_col,
                  'num_rows' : df_len, 
                  'rows_with_data' : current_col_count,
                  "percent_with_data": current_col_per
                 }

        all_cols_per.append(output)

    all_cols_per_df = pd.DataFrame(all_cols_per)
    
    # prettier one for tables 
    filter_values = [
        'stride_time_mean_sec_pose',
        'mean_cadence_step_per_min_pose',
        'stride_width_mean_cm_pose',
        'foot1_gait_cycle_time_mean_pose']

    clean_names = ['Stride Time',
                   'Cadence',
                   'Stride Width',
                   'Gait Cycle: Stance, Swing, and Support']

    # Filter rows where 'column_name' matches any value in the list
    filtered_df = all_cols_per_df.loc[all_cols_per_df['column_name'].isin(filter_values)]
    filtered_df = filtered_df[['column_name', 'percent_with_data']]
    
    filtered_df['Metric'] = clean_names 
    filtered_df.rename(columns={'percent_with_data': 'Videos with Metric Calculated (%)'}, 
                                     inplace=True)
    filtered_df = filtered_df[['Metric', 'Videos with Metric Calculated (%)']] 

    return all_cols_per_df, filtered_df
        

## split dataframes by task 

In [9]:
# zv all metrics 
# preferred walking speed 
zv_pws_df = zv_all_metrics_df.loc[zv_all_metrics_df['task_pose'] == 'gait_vertical_PWS_1']
print(zv_pws_df['task_pose'].value_counts())
# fast walking speed 
zv_fw_df = zv_all_metrics_df.loc[zv_all_metrics_df['task_pose'] == 'gait_vertical_FW_1']
print(zv_fw_df['task_pose'].value_counts())

task_pose
gait_vertical_PWS_1    253
Name: count, dtype: int64
task_pose
gait_vertical_FW_1    245
Name: count, dtype: int64


In [10]:
# home videos 
# right 
hv_r_df = hv_all_metrics_df.loc[hv_all_metrics_df['task_pose'] == 'gait_vertical_right']
print(hv_r_df['task_pose'].value_counts())
# left 
hv_l_df = hv_all_metrics_df.loc[hv_all_metrics_df['task_pose'] == 'gait_vertical_left']
print(hv_l_df['task_pose'].value_counts())

task_pose
gait_vertical_right    31
Name: count, dtype: int64
task_pose
gait_vertical_left    30
Name: count, dtype: int64


## Run Functions 

### Zeno Videos - all zeno

In [11]:
# all Zeno videos 
# summary statistics ----------------------------------
# calculate 
zv_all_duration_summary_df = video_one_col_stats(zv_all_metrics_df, 'total_video_duration_sec_pose')
zv_all_fps_summary_df = video_one_col_stats(zv_all_metrics_df, 'frames_per_second_pose')
zv_all_segment_n_summary_df = video_one_col_stats(zv_all_metrics_df, 'walking_segmets_n_pose') 
zv_all_segment_duration_summary_df = video_one_col_stats(zv_all_metrics_df, 'walking_segments_duration_mean_pose')

# compile + save 
zv_all_stats_df = pd.concat([zv_all_duration_summary_df, 
                             zv_all_fps_summary_df, 
                             zv_all_segment_n_summary_df, 
                             zv_all_segment_duration_summary_df]) 

zv_all_stats_df.to_csv(os.path.join(output_path, 
                                    'zv_all_video_segment_stats.csv'))                       

# categorical - counts and % -----------------------
zv_all_fps_counts_df = col_counts_and_percent(zv_all_metrics_df, 'frames_per_second_pose')
zv_all_segment_counts_df = col_counts_and_percent(zv_all_metrics_df, 'walking_segmets_n_pose')

# save 
zv_all_fps_counts_df.to_csv(os.path.join(output_path, 
                                         'zv_all_fps_counts.csv')) 

zv_all_segment_counts_df.to_csv(os.path.join(output_path, 
                                             'zv_all_segment_counts.csv'))  

# % of videos with each video metric calculated --------------------
zv_all_cols_per_df, zv_all_filt_cols_per_df = calculate_per_vid_w_metric(zv_all_metrics_df)
zv_all_cols_per_df.to_csv(os.path.join(output_path, 
                                       'zv_all_percent_w_data.csv'))
zv_all_filt_cols_per_df.to_csv(os.path.join(output_path, 
                                                'zv_all_filtered_percent_w_data.csv'))

In [12]:
# zeno preferred walking speed 

# summary statistics ----------------------------------
# calculate 
zv_pws_duration_summary_df = video_one_col_stats(zv_pws_df, 'total_video_duration_sec_pose')
zv_pws_fps_summary_df = video_one_col_stats(zv_pws_df, 'frames_per_second_pose')
zv_pws_segment_n_summary_df = video_one_col_stats(zv_pws_df, 'walking_segmets_n_pose') 
zv_pws_segment_duration_summary_df = video_one_col_stats(zv_pws_df, 'walking_segments_duration_mean_pose')

# compile + save 
zv_pws_stats_df = pd.concat([zv_pws_duration_summary_df, 
                             zv_pws_fps_summary_df, 
                             zv_pws_segment_n_summary_df, 
                             zv_pws_segment_duration_summary_df]) 

zv_pws_stats_df.to_csv(os.path.join(output_path, 
                                    'zv_pws_video_segment_stats.csv'))                       

# categorical - counts and % -----------------------
zv_pws_fps_counts_df = col_counts_and_percent(zv_pws_df, 'frames_per_second_pose')
zv_pws_segment_counts_df = col_counts_and_percent(zv_pws_df, 'walking_segmets_n_pose')

# save 
zv_pws_fps_counts_df.to_csv(os.path.join(output_path, 
                                         'zv_pws_fps_counts.csv')) 

zv_pws_segment_counts_df.to_csv(os.path.join(output_path, 
                                             'zv_pws_segment_counts.csv'))  

# % of videos with each video metric calculated --------------------
zv_pws_cols_per_df, zv_pws_filt_cols_per_df = calculate_per_vid_w_metric(zv_pws_df)
zv_pws_cols_per_df.to_csv(os.path.join(output_path, 
                                       'zv_pws_percent_w_data.csv')) 

zv_pws_filt_cols_per_df.to_csv(os.path.join(output_path, 
                                                'zv_pws_filtered_percent_w_data.csv'))

In [13]:
# zeno fast walking speed 
# summary statistics ----------------------------------
# calculate 
zv_fw_duration_summary_df = video_one_col_stats(zv_fw_df, 'total_video_duration_sec_pose')
zv_fw_fps_summary_df = video_one_col_stats(zv_fw_df, 'frames_per_second_pose')
zv_fw_segment_n_summary_df = video_one_col_stats(zv_fw_df, 'walking_segmets_n_pose') 
zv_fw_segment_duration_summary_df = video_one_col_stats(zv_fw_df, 'walking_segments_duration_mean_pose')

# compile + save 
zv_fw_stats_df = pd.concat([zv_fw_duration_summary_df, 
                             zv_fw_fps_summary_df, 
                             zv_fw_segment_n_summary_df, 
                             zv_fw_segment_duration_summary_df]) 

zv_fw_stats_df.to_csv(os.path.join(output_path, 
                                    'zv_fw_video_segment_stats.csv'))                       

# categorical - counts and % -----------------------
zv_fw_fps_counts_df = col_counts_and_percent(zv_fw_df, 'frames_per_second_pose')
zv_fw_segment_counts_df = col_counts_and_percent(zv_fw_df, 'walking_segmets_n_pose')

# save 
zv_fw_fps_counts_df.to_csv(os.path.join(output_path, 
                                         'zv_fw_fps_counts.csv')) 

zv_fw_segment_counts_df.to_csv(os.path.join(output_path, 
                                             'zv_fw_segment_counts.csv'))  

# % of videos with each video metric calculated --------------------
zv_fw_cols_per_df, zv_fw_filt_cols_per_df = calculate_per_vid_w_metric(zv_fw_df)
zv_fw_cols_per_df.to_csv(os.path.join(output_path, 
                                       'zv_fw_percent_w_data.csv')) 

zv_fw_filt_cols_per_df.to_csv(os.path.join(output_path, 
                                                'zv_fw_filtered_percent_w_data.csv'))

### Home Videos 

In [14]:
# all home videos 
# summary statistics ----------------------------------
# calculate 
hv_all_duration_summary_df = video_one_col_stats(hv_all_metrics_df, 'total_video_duration_sec_pose')
hv_all_fps_summary_df = video_one_col_stats(hv_all_metrics_df, 'frames_per_second_pose')
hv_all_segment_n_summary_df = video_one_col_stats(hv_all_metrics_df, 'walking_segmets_n_pose') 
hv_all_segment_duration_summary_df = video_one_col_stats(hv_all_metrics_df, 'walking_segments_duration_mean_pose')

# compile + save 
hv_all_stats_df = pd.concat([hv_all_duration_summary_df, 
                             hv_all_fps_summary_df, 
                             hv_all_segment_n_summary_df, 
                             hv_all_segment_duration_summary_df]) 

hv_all_stats_df.to_csv(os.path.join(output_path, 
                                    'hv_all_video_segment_stats.csv'))                       

# categorical - counts and % -----------------------
hv_all_fps_counts_df = col_counts_and_percent(hv_all_metrics_df, 'frames_per_second_pose')
hv_all_segment_counts_df = col_counts_and_percent(hv_all_metrics_df, 'walking_segmets_n_pose')

# save 
hv_all_fps_counts_df.to_csv(os.path.join(output_path, 
                                         'hv_all_fps_counts.csv')) 

hv_all_segment_counts_df.to_csv(os.path.join(output_path, 
                                             'hv_all_segment_counts.csv'))  

# % of videos with each video metric calculated --------------------
hv_all_cols_per_df, hv_all_filt_cols_per_df = calculate_per_vid_w_metric(hv_all_metrics_df)
hv_all_cols_per_df.to_csv(os.path.join(output_path, 
                                       'hv_all_percent_w_data.csv'))

hv_all_filt_cols_per_df.to_csv(os.path.join(output_path, 
                                                'hv_all_filtered_percent_w_data.csv'))

In [15]:
# right turn only 
# summary statistics ----------------------------------
# calculate 
hv_r_duration_summary_df = video_one_col_stats(hv_r_df, 'total_video_duration_sec_pose')
hv_r_fps_summary_df = video_one_col_stats(hv_r_df, 'frames_per_second_pose')
hv_r_segment_n_summary_df = video_one_col_stats(hv_r_df, 'walking_segmets_n_pose') 
hv_r_segment_duration_summary_df = video_one_col_stats(hv_r_df, 'walking_segments_duration_mean_pose')

# compile + save 
hv_r_stats_df = pd.concat([hv_r_duration_summary_df, 
                             hv_r_fps_summary_df, 
                             hv_r_segment_n_summary_df, 
                             hv_r_segment_duration_summary_df]) 

hv_r_stats_df.to_csv(os.path.join(output_path, 
                                    'hv_r_video_segment_stats.csv'))                       

# categorical - counts and % -----------------------
hv_r_fps_counts_df = col_counts_and_percent(hv_r_df, 'frames_per_second_pose')
hv_r_segment_counts_df = col_counts_and_percent(hv_r_df, 'walking_segmets_n_pose')

# save 
hv_r_fps_counts_df.to_csv(os.path.join(output_path, 
                                         'hv_r_fps_counts.csv')) 

hv_r_segment_counts_df.to_csv(os.path.join(output_path, 
                                             'hv_r_segment_counts.csv'))  

# % of videos with each video metric calculated --------------------
hv_r_cols_per_df, hv_r_filt_cols_per_df = calculate_per_vid_w_metric(hv_r_df)
hv_r_cols_per_df.to_csv(os.path.join(output_path, 
                                       'hv_r_percent_w_data.csv'))

hv_r_filt_cols_per_df.to_csv(os.path.join(output_path, 
                                                'hv_r_filtered_percent_w_data.csv'))

In [16]:
# left turn only 
# summary statistics ----------------------------------
# calculate 
hv_l_duration_summary_df = video_one_col_stats(hv_l_df, 'total_video_duration_sec_pose')
hv_l_fps_summary_df = video_one_col_stats(hv_l_df, 'frames_per_second_pose')
hv_l_segment_n_summary_df = video_one_col_stats(hv_l_df, 'walking_segmets_n_pose') 
hv_l_segment_duration_summary_df = video_one_col_stats(hv_l_df, 'walking_segments_duration_mean_pose')

# compile + save 
hv_l_stats_df = pd.concat([hv_l_duration_summary_df, 
                             hv_l_fps_summary_df, 
                             hv_l_segment_n_summary_df, 
                             hv_l_segment_duration_summary_df]) 

hv_l_stats_df.to_csv(os.path.join(output_path, 
                                    'hv_l_video_segment_stats.csv'))                       

# categorical - counts and % -----------------------
hv_l_fps_counts_df = col_counts_and_percent(hv_l_df, 'frames_per_second_pose')
hv_l_segment_counts_df = col_counts_and_percent(hv_l_df, 'walking_segmets_n_pose')

# save 
hv_l_fps_counts_df.to_csv(os.path.join(output_path, 
                                         'hv_l_fps_counts.csv')) 

hv_l_segment_counts_df.to_csv(os.path.join(output_path, 
                                             'hv_l_segment_counts.csv'))  

# % of videos with each video metric calculated --------------------
hv_l_cols_per_df, hv_l_filt_cols_per_df = calculate_per_vid_w_metric(hv_l_df)
hv_l_cols_per_df.to_csv(os.path.join(output_path, 
                                       'hv_l_percent_w_data.csv'))

hv_l_filt_cols_per_df.to_csv(os.path.join(output_path, 
                                                'hv_l_filtered_percent_w_data.csv'))