In [1]:
import pandas as pd 
import numpy as np
import os 
import seaborn as sns 
import matplotlib.pyplot as plt 

# Analysis goal 
Do people who improve over time on standard clinical measures also show changes in video metrics associated with outcomes?
Clinical outcomes: T25FW and EDSS 
Video Metrics: pixel proxy and stride time from FW videos (most associated with T25FW from mixed effects models) 

# Output folders 

In [2]:
# analysis folder version  
analysis_version = '011'

In [3]:
out_path = os.path.join(r'C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_bw_zeno_home_analysis',
                        analysis_version, 
                        '006_longitudinal')

if not os.path.exists(out_path): 
    os.makedirs(out_path)

# Load FW data 

In [4]:
# FW 
zv_fw_bw_clean_path = os.path.join(r'C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_bw_zeno_home_analysis', 
                                    analysis_version, 
                                   '000_merged_cleaned_data\zv_bw_merged_gait_vertical_FW_1_clean.csv') 
zv_fw_bw_df = pd.read_csv(zv_fw_bw_clean_path, index_col = 0) 
zv_fw_bw_df.head()

Unnamed: 0,video_id_date_name_pose_zv,id_date_pose_zv,task_pose_zv,frames_per_second_pose_zv,total_video_duration_sec_pose_zv,delta_pix_h_rel_median_pose_zv,walking_segmets_n_pose_zv,walking_segments_duration_mean_pose_zv,walking_segments_duration_median_pose_zv,stride_time_num_strides_pose_zv,...,cadence_high_error,stride_width_high_error,ssupport_high_error,dsupport_high_error,metric_notes,potential fixes,correct_person_pixel.1,multiple_people_YN,multiple_people_3,assistive_device_YN
1,gait_vertical_FW_1_BW-0002_2023_09_12,BW-0002\2023_09_12,gait_vertical_FW_1,30,36.5,0.24,3.0,4.57,5.2,14.0,...,,,,,,,,Y,Group_1,N
2,gait_vertical_FW_1_BW-0003_2022_10_24,BW-0003\2022_10_24,gait_vertical_FW_1,30,28.233333,0.23,1.0,2.33,2.33,0.0,...,Y,N,N,N,only one stride in short segment identified\n\...,one peak not identified - not high enough peak...,,N,N,N
4,gait_vertical_FW_1_BW-0006_2022_09_26,BW-0006\2022_09_26,gait_vertical_FW_1,30,24.966667,0.35,2.0,2.87,2.87,4.0,...,,,,,,,,Y,Group_1,N
5,gait_vertical_FW_1_BW-0007_2022_09_27,BW-0007\2022_09_27,gait_vertical_FW_1,30,22.633333,0.44,2.0,3.83,3.83,9.0,...,,,,,,,,N,N,N
6,gait_vertical_FW_1_BW-0007_2024_06_10,BW-0007\2024_06_10,gait_vertical_FW_1,30,23.0,0.28,4.0,3.76,4.02,17.0,...,,,,,,,,N,N,N


In [5]:
# Number Videos 
len(zv_fw_bw_df)

176

In [6]:
# Number participants 
print(zv_fw_bw_df['id_video'].nunique())

125


# Filter to only include participants with multiple visits
Can be any timepoint, not just baseline and year 2 follow up

In [7]:
# Number of participants with single vs multiple visits 
pd.DataFrame(data = { "FW_single_visit" : [(zv_fw_bw_df['bw_id'].value_counts() == 1).sum()], 
                     "FW_multiple_visits" : [zv_fw_bw_df['bw_id'][zv_fw_bw_df['bw_id'].duplicated()].nunique()]})

Unnamed: 0,FW_single_visit,FW_multiple_visits
0,74,51


In [8]:
# filter to only include visits with both delta pixel and stride time 
zv_fw_bw_df_w_metrics = zv_fw_bw_df.dropna(subset = ['delta_pix_h_rel_median_pose_zv',
                                                     'stride_time_median_sec_pose_zv'])


pd.DataFrame(data = {"FW_single_visit" : [(zv_fw_bw_df_w_metrics['bw_id'].value_counts() == 1).sum()], 
                     "FW_multiple_visits" : [zv_fw_bw_df_w_metrics['bw_id'][zv_fw_bw_df_w_metrics['bw_id'].duplicated()].nunique()]})



Unnamed: 0,FW_single_visit,FW_multiple_visits
0,76,40


In [9]:
zv_fw_bw_df_w_metrics['redcap_event_name'].value_counts()

redcap_event_name
Brainwalk: Baseline visit (Arm 1: Baseline visit)    85
Year 2 Visit (Arm 1: Baseline visit)                 61
Year 3 Visit (Arm 1: Baseline visit)                  9
Year 4 Visit (Arm 1: Baseline visit)                  1
Name: count, dtype: int64

In [10]:
# select videos with delta and stride time that are either baseline or year 2 visits  
#base_yr_2 = ['Brainwalk: Baseline visit (Arm 1: Baseline visit)', 'Year 2 Visit (Arm 1: Baseline visit)']
#fw_base_yr2_all_df = zv_fw_bw_df_w_metrics.loc[zv_fw_bw_df_w_metrics['redcap_event_name'].isin(base_yr_2)]
# number videos 
#print(f"number videos: {len(fw_base_yr2_all_df)}")
#fw_base_yr2_all_df['redcap_event_name'].value_counts()

In [11]:
# select participants with duplicate visits (have both a baseline and year 1 visit)
zv_fw_bw_df_w_metrics = zv_fw_bw_df_w_metrics.loc[zv_fw_bw_df_w_metrics['bw_id'].duplicated(keep = False)]

#print('number redcap events - expect to be equal')
print(zv_fw_bw_df_w_metrics['redcap_event_name'].value_counts())

zv_fw_bw_df_w_metrics.to_csv(os.path.join(out_path, 'fw_base_yr2_1_all.csv')) 

redcap_event_name
Year 2 Visit (Arm 1: Baseline visit)                 37
Brainwalk: Baseline visit (Arm 1: Baseline visit)    35
Year 3 Visit (Arm 1: Baseline visit)                  7
Year 4 Visit (Arm 1: Baseline visit)                  1
Name: count, dtype: int64


# next stats steps in R
paired t-test, minimal signficiant difference, etc 