In [1]:
import pandas as pd 
import numpy as np
import os 
import seaborn as sns 
import matplotlib.pyplot as plt 
import scipy.stats as stats 

## Set input and output folders 

In [2]:
# analysis folder version  
analysis_version = '007'


In [3]:
out_path = os.path.join(r'C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_bw_zeno_home_analysis',
                        analysis_version, 
                        '002_video_vs_mat_metrics')

if not os.path.exists(out_path): 
    os.makedirs(out_path)

### Load Clean Data - no missing BW Data 
May be missing video data - see excel with counts 

Only participants with MS 

In [4]:
# PWS 
zv_pws_bw_clean_path = os.path.join(r'C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_bw_zeno_home_analysis', 
                                    analysis_version, 
                                    '000_merged_cleaned_data\zv_bw_merged_gait_vertical_PWS_1_clean.csv')
zv_pws_bw_clean_df = pd.read_csv(zv_pws_bw_clean_path, index_col = 0)

# FW 
zv_fw_bw_clean_path = os.path.join(r'C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_bw_zeno_home_analysis', 
                                    analysis_version, 
                                   '000_merged_cleaned_data\zv_bw_merged_gait_vertical_FW_1_clean.csv') 
zv_fw_bw_clean_df = pd.read_csv(zv_fw_bw_clean_path, index_col = 0) 

# Home Videos 
hv_bw_clean_path = os.path.join(r'C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_bw_zeno_home_analysis', 
                                analysis_version, 
                                '000_merged_cleaned_data\hv_bw_merged_clean.csv') 

hv_bw_clean_df = pd.read_csv(hv_bw_clean_path, index_col = 0) 

## Columns to compare 
Column pairs to evaluate metrics that should/count be 1:1, not proxy velocity measures  
For each of the column pairs below (zv 1 vs bw1, zv 2 vs bw 3, etc), run and save correlation 

In [5]:
# zeno video metrics 
zv_colnames = ['stride_time_median_sec_pose_zv', 
            #   'stride_time_mean_sec_pose_zv',
            #   'gait_cycle_time_sec_median_pose_zv',
            #   'stride_time_cv_pose_zv', 
               'mean_cadence_step_per_min_pose_zv',
               'stride_width_median_cm_pose_zv',
               'singlesupport_per_mean_pose_zv',
               'tot_dsupport_per_mean_pose_zv',
            #   'tot_dsupport_per_median_pose_zv',
               ]
            #   'singlesupport_per_median_pose_zv',
            #   'stride_width_mean_cm_pose_zv',
            #   'stride_width_std_pose_zv']

# home video metrics 
hv_colnames = ['stride_time_median_sec_pose_hv', 
            #   'stride_time_mean_sec_pose_hv',
            #   'gait_cycle_time_sec_median_pose_hv', 
            #   'stride_time_cv_pose_hv', 
               'mean_cadence_step_per_min_pose_hv',
               'stride_width_median_cm_pose_hv',
               'singlesupport_per_mean_pose_hv',
               'tot_dsupport_per_mean_pose_hv']
            #   'tot_dsupport_per_median_pose_hv',
               
            #   'singlesupport_per_median_pose_hv',
            #   'stride_width_mean_cm_pose_hv',
            #   'stride_width_std_pose_hv']

# Zeno mat preferred walking speed metrics 
bw_pws_colnames = ['PWS_stridetimesecmean', 
                 #  'PWS_stridetimesecmean',
                 #  'PWS_stridetimesecmean',
                 #  'PWS_stridetimeseccv',
                   'PWS_cadencestepsminmean',
                   'PWS_stridewidthcmmean',
                    'PWS_singlesupportmean',
                   'PWS_totaldsupportmean']
                 #  'PWS_totaldsupportmean',
                
                 #  'PWS_singlesupportmean',
                 #  'PWS_stridewidthcmmean',
                  # 'PWS_stridewidthcmsd']

# Zeno mat fast walking speed metrics 
bw_fw_colnames = ['FW_stridetimesecmean', 
                #  'FW_stridetimesecmean', 
                #  'FW_stridetimesecmean', 
                #  'FW_stridetimeseccv',
                   'FW_cadencestepsminmean',
                  'FW_stridewidthcmmean',
                  'FW_singlesupportmean',
                   'FW_totaldsupportmean']
                 # 'FW_totaldsupportmean',
                   
                #  'FW_singlesupportmean',
                #  'FW_stridewidthcmmean',
                #   'FW_stridewidthcmsd']

units = ['seconds',
     #    'seconds',
      #   'seconds',
       #  'CV%',
         'steps/min',
        'cm',
         '%',
         '%']
#  '%',
       #  '%',
       #  'cm',
       #  'cm']

# Correlation - compare metrics from two data sources 

In [6]:
# function - correlation 
def metric_correlation(df, video_columns, bw_columns, output_folder_path, subfolder_name): 
    if not os.path.exists(os.path.join(output_folder_path, subfolder_name)):
        os.makedirs(os.path.join(output_folder_path, subfolder_name))
    
    # create empty list to store results 
    corr_results_all = [] 
    clean_df = pd.DataFrame() 
    
    for metric_i, current_metric in enumerate(video_columns): 
        current_vid_col = video_columns[metric_i]
        current_bw_col = bw_columns[metric_i]

        # Drop rows with NaN values in either column - required to run spearman r 
        clean_df = df.dropna(subset=[current_vid_col, current_bw_col])
        
        # plot 
        plt.figure(figsize=(10, 6))
        sns.scatterplot(x = current_bw_col, y = current_vid_col, 
                        data = clean_df, 
                        alpha = 0.75,
                       )

    
        # Set the x and y axis limits to the same range 
        min_val = min(clean_df[current_vid_col].min(), clean_df[current_vid_col].min())
        max_val = max(clean_df[current_vid_col].max(), clean_df[current_vid_col].max())
        plt.xlim(min_val - (min_val * .1), max_val + (max_val * .1))
        plt.ylim(min_val - (min_val * .1), max_val + (max_val * .1))
        # straight line of perfect agreement 
#        plt.plot([min_val, max_val], [min_val, max_val], color='lightgrey')
        plt.title(subfolder_name)
#        plt.legend(loc = 'upper right')
        plt.savefig(os.path.join(output_folder_path, 
                                 subfolder_name, 
                                 str(current_vid_col + '_vs_' + current_bw_col + '.png')))
        plt.close()

        # run spearman correlation and append   
        statistic, p_value = stats.spearmanr(clean_df[current_bw_col], clean_df[current_vid_col])
        corr_results_all.append({'bw_column': current_bw_col, 
                                 'video_column': current_vid_col, 
                                 'corr_method': 'spearman' , 
                                 'rs': round(statistic, 2), 
                                 'p_value' : round(p_value, 3),
                                 'n_pairs': len(clean_df)})

    # Create DataFrame with results
    corr_results_df = pd.DataFrame(corr_results_all)
   # corr_results_df = corr_results_df.round(3)

    return corr_results_df


In [7]:
# set correlation output folder 
corr_out_path = os.path.join(out_path, 'correlation')
print(corr_out_path)

C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_bw_zeno_home_analysis\007\002_video_vs_mat_metrics\correlation


In [8]:
# PWS 
zv_pws_corr_results_df = metric_correlation(df = zv_pws_bw_clean_df, 
                                     video_columns = zv_colnames, 
                                     bw_columns = bw_pws_colnames, 
                                     output_folder_path = corr_out_path, 
                                     subfolder_name = 'zeno_pws_scatterplots')
zv_pws_corr_results_df.to_csv(os.path.join(corr_out_path, 'zeno_pws_spearman_corr.csv'))

In [9]:
# FW
zv_fw_corr_results_df = metric_correlation(df = zv_fw_bw_clean_df, 
                                     video_columns = zv_colnames, 
                                     bw_columns = bw_fw_colnames, 
                                     output_folder_path = corr_out_path, 
                                     subfolder_name = 'zeno_fw_scatterplots')
zv_fw_corr_results_df.to_csv(os.path.join(corr_out_path, 'zeno_fw_spearman_corr.csv'))

## Home Videos Correlation 
Video metric with most recent in-person data 

In [10]:
# Add delta pixel vs velocity correlation comparison 
hv_colnames_2 = hv_colnames
hv_colnames_2.append('delta_pix_h_rel_median_pose_hv')
print(hv_colnames_2)

bw_pws_colnames_2 = bw_pws_colnames
bw_pws_colnames_2.append('PWS_velocitycmsecmean')
print(bw_pws_colnames_2)

['stride_time_median_sec_pose_hv', 'mean_cadence_step_per_min_pose_hv', 'stride_width_median_cm_pose_hv', 'singlesupport_per_mean_pose_hv', 'tot_dsupport_per_mean_pose_hv', 'delta_pix_h_rel_median_pose_hv']
['PWS_stridetimesecmean', 'PWS_cadencestepsminmean', 'PWS_stridewidthcmmean', 'PWS_singlesupportmean', 'PWS_totaldsupportmean', 'PWS_velocitycmsecmean']


In [11]:
# Home Videos - all pairs with data, right and left turns 
hv_corr_results_df = metric_correlation(df = hv_bw_clean_df, 
                                    video_columns = hv_colnames_2, 
                                     bw_columns = bw_pws_colnames_2, 
                                     output_folder_path = corr_out_path, 
                                     subfolder_name = 'home_scatterplots')
hv_corr_results_df.to_csv(os.path.join(corr_out_path, 'home_spearman_corr.csv'))
hv_corr_results_df

Unnamed: 0,bw_column,video_column,corr_method,rs,p_value,n_pairs
0,PWS_stridetimesecmean,stride_time_median_sec_pose_hv,spearman,0.59,0.0,71
1,PWS_cadencestepsminmean,mean_cadence_step_per_min_pose_hv,spearman,0.39,0.001,71
2,PWS_stridewidthcmmean,stride_width_median_cm_pose_hv,spearman,0.35,0.003,71
3,PWS_singlesupportmean,singlesupport_per_mean_pose_hv,spearman,-0.1,0.406,65
4,PWS_totaldsupportmean,tot_dsupport_per_mean_pose_hv,spearman,0.0,0.998,65
5,PWS_velocitycmsecmean,delta_pix_h_rel_median_pose_hv,spearman,0.53,0.0,71


In [12]:
# only videos with n walking segments >0, include all videos both right and left turns 
# only participants with PWS in person data to match 

# drop if missing video metrics 
hv_bw_clean_df_2 = hv_bw_clean_df.dropna(subset = hv_colnames_2)

# drop if missing any PWS metrics 
hv_bw_clean_df_2 = hv_bw_clean_df_2.dropna(subset = bw_pws_colnames_2)

len(hv_bw_clean_df_2)

# correlation results 
hv_corr_results_df_2 = metric_correlation(df = hv_bw_clean_df_2, 
                                    video_columns = hv_colnames_2, 
                                     bw_columns = bw_pws_colnames_2, 
                                     output_folder_path = corr_out_path, 
                                     subfolder_name = 'home_scatterplots_2')
hv_corr_results_df_2.to_csv(os.path.join(corr_out_path, 'home_spearman_corr_2.csv'))
hv_corr_results_df_2

Unnamed: 0,bw_column,video_column,corr_method,rs,p_value,n_pairs
0,PWS_stridetimesecmean,stride_time_median_sec_pose_hv,spearman,0.53,0.0,64
1,PWS_cadencestepsminmean,mean_cadence_step_per_min_pose_hv,spearman,0.39,0.002,64
2,PWS_stridewidthcmmean,stride_width_median_cm_pose_hv,spearman,0.38,0.002,64
3,PWS_singlesupportmean,singlesupport_per_mean_pose_hv,spearman,-0.07,0.598,64
4,PWS_totaldsupportmean,tot_dsupport_per_mean_pose_hv,spearman,0.03,0.811,64
5,PWS_velocitycmsecmean,delta_pix_h_rel_median_pose_hv,spearman,0.46,0.0,64


In [13]:
# clean task pose column 
hv_bw_clean_df['task_pose_hv'].value_counts()

# clean - right_2 --> right and same for left 
hv_bw_clean_df['task_pose_hv'] = hv_bw_clean_df['task_pose_hv'].replace('gait_vertical_left_2', 'gait_vertical_left')
hv_bw_clean_df['task_pose_hv'] = hv_bw_clean_df['task_pose_hv'].replace('gait_vertical_right_2', 'gait_vertical_right')
hv_bw_clean_df['task_pose_hv'].value_counts()

task_pose_hv
gait_vertical_right    41
gait_vertical_left     40
Name: count, dtype: int64

In [14]:
# right turning only 
hv_bw_clean_df_right = hv_bw_clean_df.loc[hv_bw_clean_df['task_pose_hv'] == 'gait_vertical_right']
hv_bw_clean_df_right['task_pose_hv'].value_counts()

# correlation results 
hv_corr_results_df_right = metric_correlation(df = hv_bw_clean_df_right, 
                                    video_columns = hv_colnames_2, 
                                     bw_columns = bw_pws_colnames_2, 
                                     output_folder_path = corr_out_path, 
                                     subfolder_name = 'home_scatterplots_right')
hv_corr_results_df_right.to_csv(os.path.join(corr_out_path, 'home_spearman_corr_right.csv'))
hv_corr_results_df_right

Unnamed: 0,bw_column,video_column,corr_method,rs,p_value,n_pairs
0,PWS_stridetimesecmean,stride_time_median_sec_pose_hv,spearman,0.59,0.0,37
1,PWS_cadencestepsminmean,mean_cadence_step_per_min_pose_hv,spearman,0.29,0.086,37
2,PWS_stridewidthcmmean,stride_width_median_cm_pose_hv,spearman,0.27,0.104,37
3,PWS_singlesupportmean,singlesupport_per_mean_pose_hv,spearman,-0.16,0.366,33
4,PWS_totaldsupportmean,tot_dsupport_per_mean_pose_hv,spearman,-0.05,0.797,33
5,PWS_velocitycmsecmean,delta_pix_h_rel_median_pose_hv,spearman,0.48,0.003,37


In [15]:
# left turning all  
hv_bw_clean_df_left = hv_bw_clean_df.loc[hv_bw_clean_df['task_pose_hv'] == 'gait_vertical_left']
hv_bw_clean_df_left['task_pose_hv'].value_counts()

# correlation results 
hv_corr_results_df_right = metric_correlation(df = hv_bw_clean_df_left, 
                                    video_columns = hv_colnames_2, 
                                     bw_columns = bw_pws_colnames_2, 
                                     output_folder_path = corr_out_path, 
                                     subfolder_name = 'home_scatterplots_left')
hv_corr_results_df_right.to_csv(os.path.join(corr_out_path, 'home_spearman_corr_left.csv'))
hv_corr_results_df_right


Unnamed: 0,bw_column,video_column,corr_method,rs,p_value,n_pairs
0,PWS_stridetimesecmean,stride_time_median_sec_pose_hv,spearman,0.61,0.0,34
1,PWS_cadencestepsminmean,mean_cadence_step_per_min_pose_hv,spearman,0.5,0.003,34
2,PWS_stridewidthcmmean,stride_width_median_cm_pose_hv,spearman,0.46,0.007,34
3,PWS_singlesupportmean,singlesupport_per_mean_pose_hv,spearman,-0.04,0.82,32
4,PWS_totaldsupportmean,tot_dsupport_per_mean_pose_hv,spearman,0.08,0.675,32
5,PWS_velocitycmsecmean,delta_pix_h_rel_median_pose_hv,spearman,0.59,0.0,34


In [16]:
hv_bw_clean_df.head()

Unnamed: 0,video_id_date_name_pose_hv,id_date_pose_hv,task_pose_hv,frames_per_second_pose_hv,total_video_duration_sec_pose_hv,delta_pix_h_rel_median_pose_hv,walking_segmets_n_pose_hv,walking_segments_duration_mean_pose_hv,walking_segments_duration_median_pose_hv,stride_time_mean_sec_pose_hv,...,EDSS_same_before_after_MM,demoEHR_Vitals_dateDiff,tc_Examinee_Education,edss_severity_num,edss_severity_cat,t25fw_group_num,t25fw_group_cat,race_ethnicity_clean,ms_dx_condensed,bw_hv_date_diff_days
0,gait_vertical_left_BW-0018_10-24-23,BW-0018\10-24-23,gait_vertical_left,30,26.8,0.22,6.0,2.88,2.92,1.182,...,,-0.347106,19.0,2.0,moderate,1.0,under_6,White Non Hispanic,RRMS,0
1,gait_vertical_right_BW-0018_10-24-23,BW-0018\10-24-23,gait_vertical_right,30,24.4,0.31,6.0,3.15,3.2,1.185,...,,-0.347106,19.0,2.0,moderate,1.0,under_6,White Non Hispanic,RRMS,0
2,gait_vertical_left_BW-0023_05-05-23,BW-0023\05-05-23,gait_vertical_left,30,35.966667,0.43,5.0,4.43,4.47,1.054,...,,15.552766,16.0,2.0,moderate,1.0,under_6,Other/Unknown/Declined,RRMS,9
3,gait_vertical_right_BW-0023_05-05-23,BW-0023\05-05-23,gait_vertical_right,30,38.466667,0.39,6.0,4.79,4.77,1.091,...,,15.552766,16.0,2.0,moderate,1.0,under_6,Other/Unknown/Declined,RRMS,9
4,gait_vertical_left_BW-0023_10-23-23,BW-0023\10-23-23,gait_vertical_left,30,36.1,0.3,3.0,4.27,3.93,1.008,...,,-0.560845,16.0,2.0,moderate,1.0,under_6,Other/Unknown/Declined,RRMS,0


In [17]:
# 1 video per participant 
# only videos with n walking segments >0
# if left and right vidoe with walking segment n 0 --> mean of left and right 
# if either just left or right, exclude 

# group by visit id (date and bw_ID), then average right and left if both are present 
averaged_home_df = (
    hv_bw_clean_df
    .groupby('id_date_pose_hv')[hv_colnames_2]
    .mean()
    .reset_index()
)

task_counts = hv_bw_clean_df.groupby('id_date_pose_hv')['task_pose_hv'].nunique().reset_index(name='task_count')

# Merge if you want to keep track of which rows were single-task vs dual-task
result_df = averaged_home_df.merge(task_counts, on='id_date_pose_hv')
result_df

# Merge average with Zeno data
bw_mat_cols = bw_pws_colnames_2
bw_mat_cols.append('id_date_pose_hv') 
bw_mat_df = hv_bw_clean_df[bw_mat_cols] # select PWS columns and id_date_pose_hv
bw_mat_df = bw_mat_df.drop_duplicates(subset=['id_date_pose_hv'], keep='last')
averaged_home_w_bw_df = result_df.merge(bw_mat_df, on = 'id_date_pose_hv')

# correlation results 
hv_corr_results_df_avg= metric_correlation(df = averaged_home_w_bw_df, 
                                    video_columns = hv_colnames_2, 
                                     bw_columns = bw_pws_colnames_2, 
                                     output_folder_path = corr_out_path, 
                                     subfolder_name = 'home_scatterplots_avg')
hv_corr_results_df_avg.to_csv(os.path.join(corr_out_path, 'home_spearman_corr_avg.csv'))
hv_corr_results_df_avg

Unnamed: 0,bw_column,video_column,corr_method,rs,p_value,n_pairs
0,PWS_stridetimesecmean,stride_time_median_sec_pose_hv,spearman,0.63,0.0,38
1,PWS_cadencestepsminmean,mean_cadence_step_per_min_pose_hv,spearman,0.43,0.007,38
2,PWS_stridewidthcmmean,stride_width_median_cm_pose_hv,spearman,0.36,0.027,38
3,PWS_singlesupportmean,singlesupport_per_mean_pose_hv,spearman,-0.11,0.524,34
4,PWS_totaldsupportmean,tot_dsupport_per_mean_pose_hv,spearman,-0.01,0.934,34
5,PWS_velocitycmsecmean,delta_pix_h_rel_median_pose_hv,spearman,0.57,0.0,39


In [18]:
averaged_home_w_bw_df

Unnamed: 0,id_date_pose_hv,stride_time_median_sec_pose_hv,mean_cadence_step_per_min_pose_hv,stride_width_median_cm_pose_hv,singlesupport_per_mean_pose_hv,tot_dsupport_per_mean_pose_hv,delta_pix_h_rel_median_pose_hv,task_count,PWS_stridetimesecmean,PWS_cadencestepsminmean,PWS_stridewidthcmmean,PWS_singlesupportmean,PWS_totaldsupportmean,PWS_velocitycmsecmean
0,BW-0004\03-11-2025,1.25,93.3305,10.485,29.185,37.96,0.395,2,,,,,,
1,BW-0018\10-24-23,1.1165,100.213,12.087,26.45,42.04,0.265,2,1.028,116.129,8.185,34.807,30.295,136.683
2,BW-0023\05-05-23,1.05,108.367,11.169,34.69,28.58,0.41,2,1.065,111.917,8.33,35.801,28.473,115.829
3,BW-0023\10-23-23,0.9915,111.57,11.0225,29.96,38.26,0.345,2,0.945,127.434,8.688,36.945,26.061,139.984
4,BW-0025\11-28-2023,1.0835,105.8525,14.173,36.64,27.085,0.21,2,1.047,113.966,13.049,33.974,32.135,96.55
5,BW-0045\01-17-2024,1.45,79.5435,12.157,27.715,50.99,0.23,2,1.541,77.164,8.943,33.067,32.871,84.669
6,BW-0050\2-9-2024,1.233,98.038,11.889,39.225,25.385,0.49,2,1.207,98.698,4.792,36.269,27.406,141.782
7,BW-0053\4-17-2024,1.142,122.9055,9.7175,45.97,14.13,0.31,2,1.363,88.501,7.478,30.545,34.267,69.783
8,BW-0054\2-13-2024,0.9085,133.7685,13.6,31.06,37.8,0.23,2,1.0,117.317,12.395,36.158,28.957,82.259
9,BW-0114\09-05-2023,1.177,108.107,12.768,32.075,36.035,0.48,2,1.197,100.0,11.633,35.405,29.143,133.574


# Mean Absolute Error - compare metrics from two data sources 

In [19]:
def calculate_metric_mean_error(df, video_columns, bw_columns, units, output_folder_path, subfolder_name):
    
    if not os.path.exists(os.path.join(output_folder_path, subfolder_name)):
        os.makedirs(os.path.join(output_folder_path, subfolder_name)) 
        
    mean_error_all = [] 

    for metric_i, current_metric in enumerate(video_columns): 
        current_vid_col = video_columns[metric_i]
        current_bw_col = bw_columns[metric_i]
        current_unit = units[metric_i]

        # Drop rows with NaN values in either column 
        clean_df = df.dropna(subset=[current_vid_col, current_bw_col])

        # mean and absolute mean diff 
        current_metric_diff = clean_df[current_bw_col] - clean_df[current_vid_col]
        current_mean_diff = current_metric_diff.mean()
        current_abs_mean_diff = abs(current_metric_diff).mean()

        # calculate mean ground truth data 
        bw_mean = clean_df[current_bw_col].mean()
        mean_err_per = (current_mean_diff / bw_mean) * 100 
        mae_per = (current_abs_mean_diff / bw_mean) * 100 

        # 95% limits of agreement 
        current_diff_std = current_metric_diff.std()
        upper_limit = round(current_mean_diff + (1.96 * current_diff_std), 2)
        lower_limit = round(current_mean_diff - (1.96 * current_diff_std), 2)

        # plot 
        fig, ax1 = plt.subplots(figsize=(10, 6))
        sns.boxplot(y=current_metric_diff, ax=ax1, fill = False, dodge = True, fliersize = 0)
        sns.stripplot(y = current_metric_diff, ax = ax1, color = 'black', dodge = True)
        fig.suptitle(subfolder_name)
        ax1.set_title(current_bw_col + ' - ' + current_vid_col)
        # center plot at zero
        ymin, ymax = plt.ylim()
        plt.ylim(min(ymin, -ymax), max(ymax, -ymin))
        plt.ylabel(current_unit)
        # add line at zero
        plt.axhline(y=0, color='grey', linestyle='--')
        plt.tight_layout()
        plt.savefig(os.path.join(output_folder_path, 
                                 subfolder_name,
                                 str(current_vid_col + '_vs_' + current_bw_col + '_diff_box.png')))
        plt.close()

        # mean difference 
        mean_error_all.append({'bw_column': current_bw_col, 
                               'video_column': current_vid_col,
                               'n_pairs' : len(clean_df), 
                               'bw_metric_mean' : bw_mean,
                               'mean_error': current_mean_diff, 
                               'mean_abs_error' : current_abs_mean_diff, 
                               'mean_err_and_mean_abs_err' : f"{current_mean_diff:.2f}, {current_abs_mean_diff:.2f}",
                               'mean_error_%_of_mean' : mean_err_per,
                               'mae_%_of_mean' : mae_per, 
                               'upper_limit' : upper_limit, 
                                'lower_limit' : lower_limit, 
                              'limits_of_agreement' : f"{lower_limit}; {upper_limit}"
                              })

    
     # Create DataFrame with results
    mean_error_df = pd.DataFrame(mean_error_all)
    mean_error_df = mean_error_df.round(2)
    
    return mean_error_df

In [20]:
# set mean error output folder 
mae_out_path = os.path.join(out_path, 'mean_error')
print(mae_out_path)

C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_bw_zeno_home_analysis\007\002_video_vs_mat_metrics\mean_error


## PWS 

### PWS - any video metric is calculated in, regardless if other metrics were calculated successfully 

In [21]:
# PWS 
zv_pws_mae_results_df = calculate_metric_mean_error(df = zv_pws_bw_clean_df, 
                                                    video_columns = zv_colnames, 
                                                    bw_columns = bw_pws_colnames, 
                                                    units = units, 
                                                    output_folder_path = mae_out_path, 
                                                    subfolder_name = 'zeno_pws_boxplots')
zv_pws_mae_results_df.to_csv(os.path.join(mae_out_path, 'zeno_pws_errors.csv'))

### PWS - videos with all metrics successfully calculated 

In [22]:
zv_pws_bw_clean_df_2 = zv_pws_bw_clean_df.dropna(subset = ['delta_pix_h_rel_median_pose_zv',
                                                           'stride_time_median_sec_pose_zv', 
                                                            'mean_cadence_step_per_min_pose_zv', 
                                                           'stride_width_median_cm_pose_zv', 
                                                            'singlesupport_per_mean_pose_zv'])


len(zv_pws_bw_clean_df_2)

175

In [23]:
zv_pws_mae_results_df_2 = calculate_metric_mean_error(df = zv_pws_bw_clean_df_2, 
                                                    video_columns = zv_colnames, 
                                                    bw_columns = bw_pws_colnames, 
                                                    units = units, 
                                                    output_folder_path = mae_out_path, 
                                                    subfolder_name = 'zeno_pws_boxplots_2')
zv_pws_mae_results_df_2.to_csv(os.path.join(mae_out_path, 'zeno_pws_errors_2.csv'))


In [24]:
zv_pws_mae_results_df_2

Unnamed: 0,bw_column,video_column,n_pairs,bw_metric_mean,mean_error,mean_abs_error,mean_err_and_mean_abs_err,mean_error_%_of_mean,mae_%_of_mean,upper_limit,lower_limit,limits_of_agreement
0,PWS_stridetimesecmean,stride_time_median_sec_pose_zv,175,1.22,0.07,0.09,"0.07, 0.09",5.59,7.71,0.47,-0.33,-0.33; 0.47
1,PWS_cadencestepsminmean,mean_cadence_step_per_min_pose_zv,175,101.46,-0.48,8.42,"-0.48, 8.42",-0.47,8.3,21.25,-22.2,-22.2; 21.25
2,PWS_stridewidthcmmean,stride_width_median_cm_pose_zv,175,9.97,-2.37,3.16,"-2.37, 3.16",-23.8,31.7,3.58,-8.32,-8.32; 3.58
3,PWS_singlesupportmean,singlesupport_per_mean_pose_zv,175,33.32,-0.85,3.87,"-0.85, 3.87",-2.56,11.61,8.8,-10.5,-10.5; 8.8
4,PWS_totaldsupportmean,tot_dsupport_per_mean_pose_zv,175,32.84,1.37,7.32,"1.37, 7.32",4.18,22.3,24.7,-21.95,-21.95; 24.7


### PWS - drop videos from slow walkers by velocity on mat 

In [25]:
pws_vel_q1 = zv_pws_bw_clean_df['PWS_velocitycmsecmean'].quantile(0.25)
pws_vel_q1_df = pd.DataFrame([pws_vel_q1], columns = ['q1'])
print(pws_vel_q1)
pws_vel_q1_df.to_csv(os.path.join(mae_out_path, 'zeno_pws_q1_3.csv'))

# run analysis on individuals above 25th percentile of walking speed during video recording 
zv_pws_bw_clean_df_3 = zv_pws_bw_clean_df.loc[zv_pws_bw_clean_df['PWS_velocitycmsecmean'] >= pws_vel_q1]
len(zv_pws_bw_clean_df_3)

82.97925000000001


174

In [26]:
zv_pws_mae_results_df_3 = calculate_metric_mean_error(df = zv_pws_bw_clean_df_3, 
                                                    video_columns = zv_colnames, 
                                                    bw_columns = bw_pws_colnames, 
                                                    units = units, 
                                                    output_folder_path = mae_out_path, 
                                                    subfolder_name = 'zeno_pws_boxplots_3')
zv_pws_mae_results_df_3.to_csv(os.path.join(mae_out_path, 'zeno_pws_errors_3.csv'))
zv_pws_mae_results_df_3

Unnamed: 0,bw_column,video_column,n_pairs,bw_metric_mean,mean_error,mean_abs_error,mean_err_and_mean_abs_err,mean_error_%_of_mean,mae_%_of_mean,upper_limit,lower_limit,limits_of_agreement
0,PWS_stridetimesecmean,stride_time_median_sec_pose_zv,153,1.12,0.01,0.07,"0.01, 0.07",0.85,6.13,0.36,-0.34,-0.34; 0.36
1,PWS_cadencestepsminmean,mean_cadence_step_per_min_pose_zv,157,107.26,2.51,8.53,"2.51, 8.53",2.34,7.96,25.02,-20.0,-20.0; 25.02
2,PWS_stridewidthcmmean,stride_width_median_cm_pose_zv,155,9.19,-2.75,3.33,"-2.75, 3.33",-29.92,36.27,2.97,-8.47,-8.47; 2.97
3,PWS_singlesupportmean,singlesupport_per_mean_pose_zv,148,34.62,-0.11,3.46,"-0.11, 3.46",-0.32,9.99,8.59,-8.81,-8.81; 8.59
4,PWS_totaldsupportmean,tot_dsupport_per_mean_pose_zv,148,29.83,-0.35,6.53,"-0.35, 6.53",-1.18,21.91,16.41,-17.11,-17.11; 16.41


### FW - any video metric is calculated in, regardless if other metrics were calculated successfully 

In [27]:
# FW 
zv_fw_mae_results_df = calculate_metric_mean_error(df = zv_fw_bw_clean_df, 
                                                    video_columns = zv_colnames, 
                                                    bw_columns = bw_fw_colnames, 
                                                    units = units, 
                                                    output_folder_path = mae_out_path, 
                                                    subfolder_name = 'zeno_fw_boxplots')
zv_fw_mae_results_df.to_csv(os.path.join(mae_out_path, 'zeno_fw_errors.csv'))

### FW - no missing video metrics 

In [28]:
zv_fw_bw_clean_df_2 = zv_fw_bw_clean_df.dropna(subset = ['delta_pix_h_rel_median_pose_zv',
                                                           'stride_time_median_sec_pose_zv', 
                                                            'mean_cadence_step_per_min_pose_zv', 
                                                           'stride_width_median_cm_pose_zv', 
                                                            'singlesupport_per_mean_pose_zv'])


len(zv_fw_bw_clean_df_2)

176

### FW - drop slower walkers 

In [29]:
fw_vel_q1 = zv_fw_bw_clean_df['FW_velocitycmsecmean'].quantile(0.25)
fw_vel_q1_df = pd.DataFrame([fw_vel_q1], columns = ['q1'])
print(fw_vel_q1_df)
fw_vel_q1_df.to_csv(os.path.join(mae_out_path, 'zeno_fw_q1_3.csv'))

# run analysis on individuals above 25th percentile of walking speed during video recording 
zv_fw_bw_clean_df_3 = zv_fw_bw_clean_df.loc[zv_fw_bw_clean_df['FW_velocitycmsecmean'] >= fw_vel_q1]
len(zv_fw_bw_clean_df_3)

          q1
0  129.21975


174

In [30]:
# FW 
zv_fw_mae_results_df_3 = calculate_metric_mean_error(df = zv_fw_bw_clean_df_3, 
                                                    video_columns = zv_colnames, 
                                                    bw_columns = bw_fw_colnames, 
                                                    units = units, 
                                                    output_folder_path = mae_out_path, 
                                                    subfolder_name = 'zeno_fw_boxplots_3')
zv_fw_mae_results_df_3.to_csv(os.path.join(mae_out_path, 'zeno_fw_errors_3.csv'))
zv_fw_mae_results_df_3

Unnamed: 0,bw_column,video_column,n_pairs,bw_metric_mean,mean_error,mean_abs_error,mean_err_and_mean_abs_err,mean_error_%_of_mean,mae_%_of_mean,upper_limit,lower_limit,limits_of_agreement
0,FW_stridetimesecmean,stride_time_median_sec_pose_zv,148,0.92,0.02,0.05,"0.02, 0.05",1.93,5.77,0.21,-0.17,-0.17; 0.21
1,FW_cadencestepsminmean,mean_cadence_step_per_min_pose_zv,152,132.31,9.1,16.21,"9.10, 16.21",6.88,12.25,50.53,-32.33,-32.33; 50.53
2,FW_stridewidthcmmean,stride_width_median_cm_pose_zv,151,9.18,-2.47,3.3,"-2.47, 3.30",-26.86,35.92,3.78,-8.71,-8.71; 3.78
3,FW_singlesupportmean,singlesupport_per_mean_pose_zv,140,37.41,4.23,4.84,"4.23, 4.84",11.32,12.95,12.64,-4.17,-4.17; 12.64
4,FW_totaldsupportmean,tot_dsupport_per_mean_pose_zv,140,24.03,-8.49,9.55,"-8.49, 9.55",-35.34,39.72,7.09,-24.07,-24.07; 7.09


In [31]:
# Home Videos  
hv_mae_results_df = calculate_metric_mean_error(df = hv_bw_clean_df, 
                                                    video_columns = hv_colnames, 
                                                    bw_columns = bw_pws_colnames, 
                                                    units = units, 
                                                    output_folder_path = mae_out_path, 
                                                    subfolder_name = 'home_boxplots')
hv_mae_results_df.to_csv(os.path.join(mae_out_path, 'home_errors.csv'))

IndexError: list index out of range

### Bland Altman Plots 

In [None]:
def bland_altman_plot(df, video_columns, bw_columns, col_color_key, units, output_folder_path, subfolder_name):
    
    if not os.path.exists(os.path.join(output_folder_path, subfolder_name)):
        os.makedirs(os.path.join(output_folder_path, subfolder_name)) 

    for metric_i, current_metric in enumerate(video_columns): 
        current_bw_col = bw_columns[metric_i]
        current_vid_col = video_columns[metric_i]
        current_unit = current_unit = units[metric_i]

        clean_df = df.dropna(subset=[current_vid_col, current_bw_col])
    
        # Compute the mean and the difference
        mean_measurements = (clean_df[current_bw_col] + clean_df[current_vid_col]) / 2
        diff_measurements = clean_df[current_bw_col] - clean_df[current_vid_col]  # Difference between measurements

        # Mean difference and standard deviation of the difference
        mean_diff = round(diff_measurements.mean(), 2)
        std_diff = diff_measurements.std()

        # 95% limits of agreement 
        upper_limit = round(mean_diff + 1.96 * std_diff, 2)
        lower_limit = round(mean_diff - 1.96 * std_diff, 2)
        limits_of_agreement = f"{lower_limit}; {upper_limit}"
        
        # Plot the data
        plt.figure(figsize=(3.25, 3), layout = 'tight')
        plt.scatter(mean_measurements, diff_measurements, s = 5, alpha = 0.9, c = clean_df[col_color_key])

        # color points by col_color_key var 
        if col_color_key == 'clean_EDSS':
            color_label = 'EDSS'
        elif col_color_key == 'clean_T25FW_Avg':
            color_label = 'T25FW' 
        elif col_color_key == 'PWS_velocitycmsecmean':
            color_label = 'PWS Velocity'
        elif col_color_key == 'FW_velocitycmsecmean':
            color_label = 'FW Velocity'
        else: 
            color_label = col_color_key
            
    #    plt.colorbar(location = 'right', label = color_label, fraction = 0.05)
        cbar = plt.colorbar(location='right', label=color_label, fraction=0.05, shrink=0.8)
        cbar.set_label(color_label, fontsize=8)
        cbar.ax.tick_params(labelsize=7)
        
        # Add mean difference line and limits of agreement (±1.96*std)
        plt.axhline(mean_diff, color='black', linestyle='--', alpha = 0.75, label=f'Mean diff')
        plt.axhline(upper_limit, color='red', linestyle='--', alpha = 0.75, label=f'+1.96 SD')
        plt.axhline(lower_limit, color='blue', linestyle='--', alpha = 0.75, label=f'-1.96 SD')
        plt.axhline(y=0, color='grey', alpha = 0.75, linestyle='-')
    
        # Labels and title
 #       plt.xlabel('Mean of Zeno Mat vs Video Pose Metric (' + current_unit + ')') 
        plt.xlabel(f'Mean ({current_unit})', fontsize = 9) 
#        plt.ylabel('Video Pose Metric (' + current_unit + ') - Zeno mat', fontsize = 9)
        plt.ylabel(f'Difference ({current_unit})', fontsize = 9)
        plt.tick_params(axis='both', labelsize=7)  
#        plt.legend(scatterpoints=1, title='', 
#                   loc='best', fontsize = 9)

        # if statment for title 
        if current_bw_col == 'PWS_stridetimesecmean' or current_bw_col == 'FW_stridetimesecmean':
            plot_title = 'Stride Time'
        elif current_bw_col == 'PWS_cadencestepsminmean' or current_bw_col == 'FW_cadencestepsminmean':
            plot_title = 'Cadence'
        elif current_bw_col == 'PWS_stridewidthcmmean' or current_bw_col == 'FW_stridewidthcmmean':
            plot_title = 'Stride Width'
        elif current_bw_col == 'PWS_singlesupportmean' or current_bw_col == 'FW_singlesupportmean': 
            plot_title = 'Single Support' 
        elif current_bw_col == 'PWS_totaldsupportmean' or current_bw_col == 'FW_totaldsupportmean':
            plot_title = 'Double Support' 
        else: 
            plot_title = current_bw_col
        
        plt.title(plot_title, fontsize = 9)
        plt.savefig(os.path.join(output_folder_path, 
                                 subfolder_name,
                                 str(current_vid_col + '_vs_' + current_bw_col + '_blandalt.png')),
                    bbox_inches='tight')
        plt.show()
        plt.close()

In [None]:
# set bland altman output folder 
bland_alt_out_path = os.path.join(out_path, 'bland_altman')
print(bland_alt_out_path)

# PWS all videos 

In [None]:
# PWS 
# color by EDSS 
bland_altman_plot(df = zv_pws_bw_clean_df, 
                  video_columns = zv_colnames, 
                  bw_columns = bw_pws_colnames, 
                  col_color_key = 'clean_EDSS', 
                  units = units, 
                  output_folder_path = bland_alt_out_path, 
                  subfolder_name = 'zeno_pws_bland_alt_by_edss')

# color by T25FW 
bland_altman_plot(df = zv_pws_bw_clean_df, 
                  video_columns = zv_colnames, 
                  bw_columns = bw_pws_colnames, 
                  col_color_key = 'clean_T25FW_Avg', 
                  units = units, 
                  output_folder_path = bland_alt_out_path, 
                  subfolder_name = 'zeno_pws_bland_alt_by_t25fw')

# color by that videos velocity from Zeno mat  
bland_altman_plot(df = zv_pws_bw_clean_df, 
                  video_columns = zv_colnames, 
                  bw_columns = bw_pws_colnames, 
                  col_color_key = 'PWS_velocitycmsecmean', 
                  units = units, 
                  output_folder_path = bland_alt_out_path, 
                  subfolder_name = 'zeno_pws_bland_alt_by_pws_vel')

## PWS - only videos with all metrics calculated 

In [None]:
# color by that videos velocity from Zeno mat  
bland_altman_plot(df = zv_pws_bw_clean_df_2, 
                  video_columns = zv_colnames, 
                  bw_columns = bw_pws_colnames, 
                  col_color_key = 'PWS_velocitycmsecmean', 
                  units = units, 
                  output_folder_path = bland_alt_out_path, 
                  subfolder_name = 'zeno_pws_bland_alt_by_pws_vel_2')

## PWS - faster walkers 

In [None]:
zv_pws_bw_clean_df_3
# color by that videos velocity from Zeno mat  
bland_altman_plot(df = zv_pws_bw_clean_df_3, 
                  video_columns = zv_colnames, 
                  bw_columns = bw_pws_colnames, 
                  col_color_key = 'PWS_velocitycmsecmean', 
                  units = units, 
                  output_folder_path = bland_alt_out_path, 
                  subfolder_name = 'zeno_pws_bland_alt_by_pws_vel_3')

### FW all Videos 

In [None]:
# FW 
# color by EDSS 
bland_altman_plot(df = zv_fw_bw_clean_df, 
                  video_columns = zv_colnames, 
                  bw_columns = bw_fw_colnames, 
                  col_color_key = 'clean_EDSS', 
                  units = units, 
                  output_folder_path = bland_alt_out_path, 
                  subfolder_name = 'zeno_fw_bland_alt_by_edss')

# color by T25FW 
bland_altman_plot(df = zv_fw_bw_clean_df, 
                  video_columns = zv_colnames, 
                  bw_columns = bw_fw_colnames, 
                  col_color_key = 'clean_T25FW_Avg', 
                  units = units, 
                  output_folder_path = bland_alt_out_path, 
                  subfolder_name = 'zeno_fw_bland_alt_by_t25fw')

# color by fast walking video velocity
bland_altman_plot(df = zv_fw_bw_clean_df, 
                  video_columns = zv_colnames, 
                  bw_columns = bw_fw_colnames, 
                  col_color_key = 'FW_velocitycmsecmean', 
                  units = units, 
                  output_folder_path = bland_alt_out_path, 
                  subfolder_name = 'zeno_fw_bland_alt_by_fw_vel')

In [None]:
### FW - drop slow walkers 
# color by fast walking video velocity
bland_altman_plot(df = zv_fw_bw_clean_df_3, 
                  video_columns = zv_colnames, 
                  bw_columns = bw_fw_colnames, 
                  col_color_key = 'FW_velocitycmsecmean', 
                  units = units, 
                  output_folder_path = bland_alt_out_path, 
                  subfolder_name = 'zeno_fw_bland_alt_by_fw_vel_3')