In [1]:
import pandas as pd 
import numpy as np
import os 
import seaborn as sns 
import matplotlib.pyplot as plt 

# Analysis goal 
Do people who improve over time on standard clinical measures also show changes in video metrics associated with outcomes?
Clinical outcomes: T25FW and EDSS 
Video Metrics: pixel proxy and stride time from FW videos (most associated with T25FW from mixed effects models) 

# Output folders 

In [2]:
# analysis folder version  
analysis_version = '007'

In [3]:
out_path = os.path.join(r'C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_bw_zeno_home_analysis',
                        analysis_version, 
                        '006_longitudinal')

if not os.path.exists(out_path): 
    os.makedirs(out_path)

# Load FW data 

In [4]:
# FW 
zv_fw_bw_clean_path = os.path.join(r'C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_bw_zeno_home_analysis', 
                                    analysis_version, 
                                   '000_merged_cleaned_data\zv_bw_merged_gait_vertical_FW_1_clean.csv') 
zv_fw_bw_df = pd.read_csv(zv_fw_bw_clean_path, index_col = 0) 
zv_fw_bw_df.head()

Unnamed: 0,video_id_date_name_pose_zv,id_date_pose_zv,task_pose_zv,frames_per_second_pose_zv,total_video_duration_sec_pose_zv,delta_pix_h_rel_median_pose_zv,walking_segmets_n_pose_zv,walking_segments_duration_mean_pose_zv,walking_segments_duration_median_pose_zv,stride_time_mean_sec_pose_zv,...,Riley_estimated_EDSS,EDSS_same_before_after_MM,demoEHR_Vitals_dateDiff,tc_Examinee_Education,edss_severity_num,edss_severity_cat,t25fw_group_num,t25fw_group_cat,race_ethnicity_clean,ms_dx_condensed
0,gait_vertical_FW_1_BW-0002_2022_09_12,BW-0002\2022_09_12,gait_vertical_FW_1,25,28.96,0.28,,,,,...,,,-0.454109,20.0,2.0,moderate,1.0,under_6,Hispanic or Latino,RRMS
1,gait_vertical_FW_1_BW-0002_2023_09_12,BW-0002\2023_09_12,gait_vertical_FW_1,30,36.5,0.25,3.0,6.71,7.23,1.015,...,,,-11.391412,20.0,2.0,moderate,2.0,6_to_8,Hispanic or Latino,RRMS
2,gait_vertical_FW_1_BW-0003_2022_10_24,BW-0003\2022_10_24,gait_vertical_FW_1,30,28.233333,0.35,1.0,2.33,2.33,,...,,,-7.471505,16.0,1.0,mild,1.0,under_6,Asian,"MS, Subtype Not Specified"
3,gait_vertical_FW_1_BW-0004_2022_09_19,BW-0004\2022_09_19,gait_vertical_FW_1,30,25.3,0.36,2.0,3.15,3.15,1.433,...,,,-0.453264,16.0,2.0,moderate,1.0,under_6,White Non Hispanic,RRMS
4,gait_vertical_FW_1_BW-0006_2022_09_26,BW-0006\2022_09_26,gait_vertical_FW_1,30,24.966667,0.27,2.0,4.63,4.63,0.967,...,,,-0.428669,18.0,2.0,moderate,1.0,under_6,White Non Hispanic,Progressive MS


In [5]:
# Number Videos 
len(zv_fw_bw_df)

232

In [6]:
# Number participants 
print(zv_fw_bw_df['id_video'].nunique())

153


# Filter to only include participants with video data and baseline and one year visits 

In [7]:
pd.DataFrame(data = { "FW_single_visit" : [(zv_fw_bw_df['bw_id'].value_counts() == 1).sum()], 
                     "FW_multiple_visits" : [zv_fw_bw_df['bw_id'][zv_fw_bw_df['bw_id'].duplicated()].nunique()]})

Unnamed: 0,FW_single_visit,FW_multiple_visits
0,80,73


In [8]:
# filter to only include visits with both delta pixel and stride time 
zv_fw_bw_df_w_metrics = zv_fw_bw_df.dropna(subset = ['delta_pix_h_rel_median_pose_zv',
                                                     'stride_time_median_sec_pose_zv'])


pd.DataFrame(data = {"FW_single_visit" : [(zv_fw_bw_df_w_metrics['bw_id'].value_counts() == 1).sum()], 
                     "FW_multiple_visits" : [zv_fw_bw_df_w_metrics['bw_id'][zv_fw_bw_df_w_metrics['bw_id'].duplicated()].nunique()]})



Unnamed: 0,FW_single_visit,FW_multiple_visits
0,86,51


In [9]:
# select videos with delta and stride time that are either baseline or year 2 visits  
base_yr_2 = ['Brainwalk: Baseline visit (Arm 1: Baseline visit)', 'Year 2 Visit (Arm 1: Baseline visit)']
fw_base_yr2_all_df = zv_fw_bw_df_w_metrics.loc[zv_fw_bw_df_w_metrics['redcap_event_name'].isin(base_yr_2)]
# number videos 
print(f"number videos: {len(fw_base_yr2_all_df)}")
fw_base_yr2_all_df['redcap_event_name'].value_counts()

number videos: 183


redcap_event_name
Brainwalk: Baseline visit (Arm 1: Baseline visit)    116
Year 2 Visit (Arm 1: Baseline visit)                  67
Name: count, dtype: int64

In [10]:
# select participants with duplicate visits (have both a baseline and year 1 visit)
fw_base_yr2_df = fw_base_yr2_all_df.loc[fw_base_yr2_all_df['bw_id'].duplicated(keep = False)]

print('number redcap events - expect to be equal')
print(fw_base_yr2_df['redcap_event_name'].value_counts())

fw_base_yr2_df.to_csv(os.path.join(out_path, 'fw_base_yr2_1_all.csv')) 

number redcap events - expect to be equal
redcap_event_name
Brainwalk: Baseline visit (Arm 1: Baseline visit)    47
Year 2 Visit (Arm 1: Baseline visit)                 47
Name: count, dtype: int64


# next stats steps in R
paired t-test, minimal signficiant difference, etc 