In [1]:
# summary metrics of videos included in analysis - video duration, fps, etc 

In [2]:
import os
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt

## Load data and folders

In [3]:
analysis_version = '011'

In [4]:
# input paths - metrics merged with brainwalk data 

# zeno preferred walking speed 
zv_pws_bw_clean_path = os.path.join(r'C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_bw_zeno_home_analysis', 
                                    analysis_version, 
                                    '000_merged_cleaned_data\zv_bw_merged_gait_vertical_PWS_1_clean.csv')
zv_pws_bw_clean_df = pd.read_csv(zv_pws_bw_clean_path, index_col = 0)

# fast walking speed 
zv_fw_bw_clean_path = os.path.join(r'C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_bw_zeno_home_analysis', 
                                    analysis_version, 
                                   '000_merged_cleaned_data\zv_bw_merged_gait_vertical_FW_1_clean.csv') 
zv_fw_bw_clean_df = pd.read_csv(zv_fw_bw_clean_path, index_col = 0) 

# home video 
hv_bw_clean_path = os.path.join(r'C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_bw_zeno_home_analysis', 
                                analysis_version, 
                                '000_merged_cleaned_data\hv_bw_merged_clean.csv') 

hv_bw_clean_df = pd.read_csv(hv_bw_clean_path, index_col = 0) 

In [5]:
# output path 
output_path = os.path.join(r'C:\Users\mmccu\Box\MM_Personal\5_Projects\BoveLab\3_Data_and_Code\gait_bw_zeno_home_analysis',
                           analysis_version,
                           '004b_video_segment_metric_summary')

if not os.path.exists(output_path): 
    os.makedirs(output_path)

In [6]:
# for all datasets, add 0 walking segments identified if walking segments n column = NaN
zv_pws_bw_clean_df['walking_segmets_n_pose_zv'] = zv_pws_bw_clean_df['walking_segmets_n_pose_zv'].fillna(0)
zv_fw_bw_clean_df['walking_segmets_n_pose_zv'] = zv_fw_bw_clean_df['walking_segmets_n_pose_zv'].fillna(0)
hv_bw_clean_df['walking_segmets_n_pose_hv'] = hv_bw_clean_df['walking_segmets_n_pose_hv'].fillna(0)

## Functions 

In [7]:
# function - summarize one column
def video_one_col_stats(df, column_name):
    col_count = round(df[column_name].count(), 1)
    col_mean = round(df[column_name].mean(), 1)
    col_median = round(df[column_name].median(), 1)
    col_max = round(df[column_name].max(), 1)
    col_min = round(df[column_name].min(), 1) 
    col_std = round(df[column_name].std(), 1) 
    col_q1 = df[column_name].quantile(0.25)
    col_q3 = df[column_name].quantile(0.75) 
    col_iqr = round(col_q3 - col_q1, 1) 

    col_stats = {'Value' : column_name,
                 'Count' : [col_count],
                 'Mean' : [col_mean],
                 'Median' : [col_median], 
                 'Maximum' : [col_max],
                 'Minimum' : [col_min],
                 'S.D.' : [col_std],
                 'IQR' : [col_iqr]
                 } 
    
    col_stats_df = pd.DataFrame(col_stats) 

    # plot histogram 
    plt.hist(df[column_name])
    plt.title(column_name)
 #   plt.show()
    plt.close()

    return col_stats_df 


In [8]:
# function - counts and % of each video 
def col_counts_and_percent(df, column_name): 
    df_n_value_counts = pd.DataFrame(df[column_name].value_counts()) 
    df_n_value_counts_per = pd.DataFrame(round(df[column_name].value_counts(normalize = True) * 100, 1)) 

    counts_per_df = pd.concat([df_n_value_counts, df_n_value_counts_per], axis = 1) 
    counts_per_df = counts_per_df.sort_index()

    return counts_per_df

In [9]:
# function - % of each video in which metrics were calculated 

def calculate_per_vid_w_metric(df, video_task): 
    all_cols_per = [] 
    
    # total number of videos included 
    df_len = len(df) 

    # number of videos included in analysis 
    if video_task == 'zeno': 
        df_w_segment_count = df['walking_segmets_n_pose_zv'].loc[df['walking_segmets_n_pose_zv'] > 0].count() 
    elif video_task == 'home': 
        df_w_segment_count = df['walking_segmets_n_pose_hv'].loc[df['walking_segmets_n_pose_hv'] > 0].count() 
    
    for col_i, current_col in enumerate(df.columns): 
        # number of values in this column 
        current_col_count = df[current_col].count()

        # % of all videos with metric calculate 
        current_col_per = (current_col_count / df_len) * 100 
        current_col_per = round(current_col_per, 0) 

        # of videos with segment identified, % with each metric 
        current_col_segment_per =  round((current_col_count / df_w_segment_count) * 100, 0) 

        output = {"column_name": current_col,
                  'num_videos' : df_len, 
                  'num_videos_w_segment' : df_w_segment_count, 
                  'current_col_count' : current_col_count,
                  "current_col_all_per": current_col_per, 
                  'current_col_segment_per': current_col_segment_per
                 }

        all_cols_per.append(output)

    all_cols_per_df = pd.DataFrame(all_cols_per)
    
    # prettier one for tables 
    filter_values = ['delta_pix_h_rel_median_pose', 
                     'stride_time_mean_sec_pose',
                     'mean_cadence_step_per_min_pose',
                     'stride_width_mean_cm_pose',
                     'gait_cycle_time_sec_mean_pose']

    clean_names = ['Pixel Height Per Second', 
                   'Stride Time',
                   'Cadence',
                   'Stride Width',
                   'Gait Cycle Phases: Stance, Swing, and Support']

    # Filter rows where 'column_name' matches any value in the list
    filtered_df = all_cols_per_df.loc[all_cols_per_df['column_name'].str.contains('|'.join(filter_values))]
#    filtered_df = filtered_df[['column_name', 'percent_with_data']]

    filtered_df['Metric'] = clean_names 
#    filtered_df.rename(columns={'percent_with_data': 'Videos with Metric Calculated (%)'}, 
#                                     inplace=True)
#    filtered_df = filtered_df[['Metric', 'Videos with Metric Calculated (%)', 'column_name']] 

    return all_cols_per_df, filtered_df
        

## split zeno vid dataframes by HC vs MS participants  

In [10]:
# zv all metrics 

# fast walk 
print('fast walk') 
zv_fw_MS_w_bw_df = zv_fw_bw_clean_df.loc[zv_fw_bw_clean_df['demographic_diagnosis'] == 'MS']
print('------') 
print(zv_fw_MS_w_bw_df['demographic_diagnosis'].value_counts())
print(zv_fw_MS_w_bw_df['task_pose_zv'].value_counts())

zv_fw_HC_w_bw_df = zv_fw_bw_clean_df.loc[zv_fw_bw_clean_df['demographic_diagnosis'] == 'HC']
print('------') 
print(zv_fw_HC_w_bw_df['demographic_diagnosis'].value_counts())
print(zv_fw_HC_w_bw_df['task_pose_zv'].value_counts())
    
# preferred walk 
print('preferred walking speed') 
zv_pws_MS_w_bw_df = zv_pws_bw_clean_df.loc[zv_pws_bw_clean_df['demographic_diagnosis'] == 'MS']
print('------') 
print(zv_pws_MS_w_bw_df['demographic_diagnosis'].value_counts())
print(zv_pws_MS_w_bw_df['task_pose_zv'].value_counts())

zv_pws_HC_w_bw_df = zv_pws_bw_clean_df.loc[zv_pws_bw_clean_df['demographic_diagnosis'] == 'HC']
print('------') 
print(zv_pws_HC_w_bw_df['demographic_diagnosis'].value_counts())
print(zv_pws_HC_w_bw_df['task_pose_zv'].value_counts())

fast walk
------
demographic_diagnosis
MS    176
Name: count, dtype: int64
task_pose_zv
gait_vertical_FW_1    176
Name: count, dtype: int64
------
Series([], Name: count, dtype: int64)
Series([], Name: count, dtype: int64)
preferred walking speed
------
demographic_diagnosis
MS    178
Name: count, dtype: int64
task_pose_zv
gait_vertical_PWS_1    178
Name: count, dtype: int64
------
Series([], Name: count, dtype: int64)
Series([], Name: count, dtype: int64)


## split home vid dataframes by task 

In [11]:
# home videos 
# right 
hv_r_df = hv_bw_clean_df.loc[hv_bw_clean_df['task_pose_hv'] == 'gait_vertical_right']
print(hv_r_df['task_pose_hv'].value_counts())
# left 
hv_l_df = hv_bw_clean_df.loc[hv_bw_clean_df['task_pose_hv'] == 'gait_vertical_left']
print(hv_l_df['task_pose_hv'].value_counts())

task_pose_hv
gait_vertical_right    40
Name: count, dtype: int64
task_pose_hv
gait_vertical_left    39
Name: count, dtype: int64


## Run Functions 

### Zeno Videos - Fast Walk 

In [12]:
# HC - Fast Walk 
# summary statistics ----------------------------------
# calculate 
#zv_fw_HC_duration_summary_df = video_one_col_stats(zv_fw_HC_w_bw_df, 'total_video_duration_sec_pose_zv')
#zv_fw_HC_fps_summary_df = video_one_col_stats(zv_fw_HC_w_bw_df, 'frames_per_second_pose_zv')
#zv_fw_HC_segment_n_summary_df = video_one_col_stats(zv_fw_HC_w_bw_df, 'walking_segmets_n_pose_zv') 
#zv_fw_HC_segment_duration_summary_df = video_one_col_stats(zv_fw_HC_w_bw_df, 'walking_segments_duration_mean_pose_zv')

# compile + save 
#zv_fw_HC_stats_df = pd.concat([zv_fw_HC_duration_summary_df, 
#                                 zv_fw_HC_fps_summary_df, 
#                                 zv_fw_HC_segment_n_summary_df, 
#                                 zv_fw_HC_segment_duration_summary_df]) 

#zv_fw_HC_stats_df.to_csv(os.path.join(output_path, 
#                                    'zv_fw_HC_video_and_segment_stats.csv'))                       

# categorical - counts and % -----------------------
#zv_fw_HC_fps_counts_df = col_counts_and_percent(zv_fw_HC_w_bw_df, 'frames_per_second_pose_zv')
#zv_fw_HC_segment_counts_df = col_counts_and_percent(zv_fw_HC_w_bw_df, 'walking_segmets_n_pose_zv')

# save 
#zv_fw_HC_fps_counts_df.to_csv(os.path.join(output_path, 
#                                         'zv_fw_HC_fps_counts.csv')) 

#zv_fw_HC_segment_counts_df.to_csv(os.path.join(output_path, 
#                                             'zv_fw_HC_segment_counts.csv'))  

# % of videos with each video metric calculated --------------------
#zv_fw_HC_w_bw_df_w_segment = zv_fw_HC_w_bw_df.loc[zv_fw_HC_w_bw_df['walking_segmets_n_pose_zv'].notnull()]
#zv_fw_HC_cols_per_df, zv_fw_HC_filt_cols_per_df = calculate_per_vid_w_metric(zv_fw_HC_w_bw_df_w_segment)
#zv_fw_HC_cols_per_df.to_csv(os.path.join(output_path, 
#                                       'zv_fw_HC_percent_w_data.csv'))
#zv_fw_HC_filt_cols_per_df.to_csv(os.path.join(output_path, 
#                                                'zv_fw_HC_percent_w_data_metricsSubset.csv'))

In [13]:
# MS - Fast Walk 
# summary statistics ----------------------------------
# calculate - all videos 
zv_fw_MS_duration_summary_df = video_one_col_stats(zv_fw_MS_w_bw_df, 'total_video_duration_sec_pose_zv')
zv_fw_MS_fps_summary_df = video_one_col_stats(zv_fw_MS_w_bw_df, 'frames_per_second_pose_zv')
zv_fw_MS_segment_n_summary_df = video_one_col_stats(zv_fw_MS_w_bw_df, 'walking_segmets_n_pose_zv') 
zv_fw_MS_segment_duration_summary_df = video_one_col_stats(zv_fw_MS_w_bw_df, 'walking_segments_duration_mean_pose_zv')

# compile + save 
zv_fw_MS_stats_df = pd.concat([zv_fw_MS_duration_summary_df, 
                             zv_fw_MS_fps_summary_df, 
                             zv_fw_MS_segment_n_summary_df, 
                             zv_fw_MS_segment_duration_summary_df]) 

zv_fw_MS_stats_df.to_csv(os.path.join(output_path, 
                                    'zv_fw_MS_video_and_segment_stats.csv'))                       

# categorical - counts and % from all videos  -----------------------
zv_fw_MS_fps_counts_df = col_counts_and_percent(zv_fw_MS_w_bw_df, 'frames_per_second_pose_zv')
zv_fw_MS_segment_counts_df = col_counts_and_percent(zv_fw_MS_w_bw_df, 'walking_segmets_n_pose_zv')

# save 
zv_fw_MS_fps_counts_df.to_csv(os.path.join(output_path, 
                                         'zv_fw_MS_fps_counts.csv')) 

zv_fw_MS_segment_counts_df.to_csv(os.path.join(output_path, 
                                             'zv_fw_MS_segment_counts.csv'))  

# of videos with at least 1 walking segment % each video metric calculated --------------------
#zv_fw_MS_w_bw_df_w_segment = zv_fw_MS_w_bw_df.loc[zv_fw_MS_w_bw_df['walking_segmets_n_pose_zv'].notnull()]
zv_fw_MS_cols_per_df, zv_fw_MS_filt_cols_per_df = calculate_per_vid_w_metric(zv_fw_MS_w_bw_df, 'zeno')
zv_fw_MS_cols_per_df.to_csv(os.path.join(output_path, 
                                       'zv_fw_MS_percent_w_data.csv'))
zv_fw_MS_filt_cols_per_df.to_csv(os.path.join(output_path, 
                                                'zv_fw_MS_percent_w_data_metricsSubset.csv'))


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Metric'] = clean_names


In [14]:
zv_fw_MS_filt_cols_per_df

Unnamed: 0,column_name,num_videos,num_videos_w_segment,current_col_count,current_col_all_per,current_col_segment_per,Metric
5,delta_pix_h_rel_median_pose_zv,176,162,174,99.0,107.0,Pixel Height Per Second
10,stride_time_mean_sec_pose_zv,176,162,157,89.0,97.0,Stride Time
16,mean_cadence_step_per_min_pose_zv,176,162,157,89.0,97.0,Cadence
17,stride_width_mean_cm_pose_zv,176,162,161,91.0,99.0,Stride Width
23,gait_cycle_time_sec_mean_pose_zv,176,162,119,68.0,73.0,"Gait Cycle Phases: Stance, Swing, and Support"


### Zeno Videos - Preferred Walk 

In [15]:
# HC - preferred walk 
# summary statistics ----------------------------------
# calculate 
#zv_pws_HC_duration_summary_df = video_one_col_stats(zv_pws_HC_w_bw_df, 'total_video_duration_sec_pose_zv')
#zv_pws_HC_fps_summary_df = video_one_col_stats(zv_pws_HC_w_bw_df, 'frames_per_second_pose_zv')
#zv_pws_HC_segment_n_summary_df = video_one_col_stats(zv_pws_HC_w_bw_df, 'walking_segmets_n_pose_zv') 
#zv_pws_HC_segment_duration_summary_df = video_one_col_stats(zv_pws_HC_w_bw_df, 'walking_segments_duration_mean_pose_zv')

# compile + save 
#zv_pws_HC_stats_df = pd.concat([zv_pws_HC_duration_summary_df, 
#                             zv_pws_HC_fps_summary_df, 
#                             zv_pws_HC_segment_n_summary_df, 
#                             zv_pws_HC_segment_duration_summary_df]) 

#zv_pws_HC_stats_df.to_csv(os.path.join(output_path, 
#                                    'zv_pws_HC_video_segment_stats.csv'))                       

# categorical - counts and % -----------------------
#zv_pws_HC_fps_counts_df = col_counts_and_percent(zv_pws_HC_w_bw_df, 'frames_per_second_pose_zv')
#zv_pws_HC_segment_counts_df = col_counts_and_percent(zv_pws_HC_w_bw_df, 'walking_segmets_n_pose_zv')

# save 
#zv_pws_HC_fps_counts_df.to_csv(os.path.join(output_path, 
#                                         'zv_pws_HC_fps_counts.csv')) 

#zv_pws_HC_segment_counts_df.to_csv(os.path.join(output_path, 
#                                             'zv_pws_HC_segment_counts.csv'))  

# % of videos with each video metric calculated --------------------

#zv_pws_HC_cols_per_df, zv_pws_HC_filt_cols_per_df = calculate_per_vid_w_metric(zv_pws_HC_w_bw_df)
#zv_pws_HC_cols_per_df.to_csv(os.path.join(output_path, 
#                                       'zv_pws_HC_percent_w_data.csv'))
#zv_pws_HC_filt_cols_per_df.to_csv(os.path.join(output_path, 
#                                                'zv_pws_HC_percent_w_data_metricsSubset.csv'))

In [16]:
# MS - preferred walk 
# summary statistics ----------------------------------
# calculate 
zv_pws_MS_duration_summary_df = video_one_col_stats(zv_pws_MS_w_bw_df, 'total_video_duration_sec_pose_zv')
zv_pws_MS_fps_summary_df = video_one_col_stats(zv_pws_MS_w_bw_df, 'frames_per_second_pose_zv')
zv_pws_MS_segment_n_summary_df = video_one_col_stats(zv_pws_MS_w_bw_df, 'walking_segmets_n_pose_zv') 
zv_pws_MS_segment_duration_summary_df = video_one_col_stats(zv_pws_MS_w_bw_df, 'walking_segments_duration_mean_pose_zv')

# compile + save 
zv_pws_MS_stats_df = pd.concat([zv_pws_MS_duration_summary_df, 
                             zv_pws_MS_fps_summary_df, 
                             zv_pws_MS_segment_n_summary_df, 
                             zv_pws_MS_segment_duration_summary_df]) 

zv_pws_MS_stats_df.to_csv(os.path.join(output_path, 
                                    'zv_pws_MS_video_and_segment_stats.csv'))                       

# categorical - counts and % -----------------------
zv_pws_MS_fps_counts_df = col_counts_and_percent(zv_pws_MS_w_bw_df, 'frames_per_second_pose_zv')
zv_pws_MS_segment_counts_df = col_counts_and_percent(zv_pws_MS_w_bw_df, 'walking_segmets_n_pose_zv')

# save 
zv_pws_MS_fps_counts_df.to_csv(os.path.join(output_path, 
                                         'zv_pws_MS_fps_counts.csv')) 

zv_pws_MS_segment_counts_df.to_csv(os.path.join(output_path, 
                                             'zv_pws_MS_segment_counts.csv'))  

# % of videos with each video metric calculated --------------------
#zv_pws_MS_w_bw_df_w_segment = zv_pws_MS_w_bw_df.loc[zv_pws_MS_w_bw_df['walking_segmets_n_pose_zv'].notnull()]
zv_pws_MS_cols_per_df, zv_pws_MS_filt_cols_per_df = calculate_per_vid_w_metric(zv_pws_MS_w_bw_df, 'zeno')
zv_pws_MS_cols_per_df.to_csv(os.path.join(output_path, 
                                       'zv_pws_MS_percent_w_data.csv'))
zv_pws_MS_filt_cols_per_df.to_csv(os.path.join(output_path, 
                                                'zv_pws_MS_percent_w_data_metricsSubset.csv'))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Metric'] = clean_names


In [17]:
zv_pws_MS_filt_cols_per_df

Unnamed: 0,column_name,num_videos,num_videos_w_segment,current_col_count,current_col_all_per,current_col_segment_per,Metric
5,delta_pix_h_rel_median_pose_zv,178,166,175,98.0,105.0,Pixel Height Per Second
10,stride_time_mean_sec_pose_zv,178,166,161,90.0,97.0,Stride Time
16,mean_cadence_step_per_min_pose_zv,178,166,161,90.0,97.0,Cadence
17,stride_width_mean_cm_pose_zv,178,166,165,93.0,99.0,Stride Width
23,gait_cycle_time_sec_mean_pose_zv,178,166,138,78.0,83.0,"Gait Cycle Phases: Stance, Swing, and Support"


### Home Videos 

In [18]:
# all home videos 
# summary statistics ----------------------------------
# calculate 
hv_all_duration_summary_df = video_one_col_stats(hv_bw_clean_df, 'total_video_duration_sec_pose_hv')
hv_all_fps_summary_df = video_one_col_stats(hv_bw_clean_df, 'frames_per_second_pose_hv')
hv_all_segment_n_summary_df = video_one_col_stats(hv_bw_clean_df, 'walking_segmets_n_pose_hv') 
hv_all_segment_duration_summary_df = video_one_col_stats(hv_bw_clean_df, 'walking_segments_duration_mean_pose_hv')

# compile + save 
hv_all_stats_df = pd.concat([hv_all_duration_summary_df, 
                             hv_all_fps_summary_df, 
                             hv_all_segment_n_summary_df, 
                             hv_all_segment_duration_summary_df]) 

hv_all_stats_df.to_csv(os.path.join(output_path, 
                                    'hv_all_video_and_segment_stats.csv'))                       

# categorical - counts and % -----------------------
hv_all_fps_counts_df = col_counts_and_percent(hv_bw_clean_df, 'frames_per_second_pose_hv')
hv_all_segment_counts_df = col_counts_and_percent(hv_bw_clean_df, 'walking_segmets_n_pose_hv')

# save 
hv_all_fps_counts_df.to_csv(os.path.join(output_path, 
                                         'hv_all_fps_counts.csv')) 

hv_all_segment_counts_df.to_csv(os.path.join(output_path, 
                                             'hv_all_segment_counts.csv'))  

# % of videos with each video metric calculated --------------------
# hv_bw_clean_df_w_segment = hv_bw_clean_df.loc[hv_bw_clean_df['walking_segmets_n_pose_hv'].notnull()]
hv_all_cols_per_df, hv_all_filt_cols_per_df = calculate_per_vid_w_metric(hv_bw_clean_df, 'home')
hv_all_cols_per_df.to_csv(os.path.join(output_path, 
                                       'hv_all_percent_w_data.csv'))

hv_all_filt_cols_per_df.to_csv(os.path.join(output_path, 
                                                'hv_all_percent_w_data_metricsSubset.csv'))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Metric'] = clean_names


In [19]:
hv_all_filt_cols_per_df

Unnamed: 0,column_name,num_videos,num_videos_w_segment,current_col_count,current_col_all_per,current_col_segment_per,Metric
5,delta_pix_h_rel_median_pose_hv,81,72,76,94.0,106.0,Pixel Height Per Second
10,stride_time_mean_sec_pose_hv,81,72,70,86.0,97.0,Stride Time
16,mean_cadence_step_per_min_pose_hv,81,72,70,86.0,97.0,Cadence
17,stride_width_mean_cm_pose_hv,81,72,72,89.0,100.0,Stride Width
23,gait_cycle_time_sec_mean_pose_hv,81,72,54,67.0,75.0,"Gait Cycle Phases: Stance, Swing, and Support"


# --------------------------------------------

In [20]:
# right turn only 
# summary statistics ----------------------------------
# calculate 
hv_r_duration_summary_df = video_one_col_stats(hv_r_df, 'total_video_duration_sec_pose_hv')
hv_r_fps_summary_df = video_one_col_stats(hv_r_df, 'frames_per_second_pose_hv')
hv_r_segment_n_summary_df = video_one_col_stats(hv_r_df, 'walking_segmets_n_pose_hv') 
hv_r_segment_duration_summary_df = video_one_col_stats(hv_r_df, 'walking_segments_duration_mean_pose_hv')

# compile + save 
hv_r_stats_df = pd.concat([hv_r_duration_summary_df, 
                             hv_r_fps_summary_df, 
                             hv_r_segment_n_summary_df, 
                             hv_r_segment_duration_summary_df]) 

hv_r_stats_df.to_csv(os.path.join(output_path, 
                                    'hv_r_video_and_segment_stats.csv'))                       

# categorical - counts and % -----------------------
hv_r_fps_counts_df = col_counts_and_percent(hv_r_df, 'frames_per_second_pose_hv')
hv_r_segment_counts_df = col_counts_and_percent(hv_r_df, 'walking_segmets_n_pose_hv')

# save 
hv_r_fps_counts_df.to_csv(os.path.join(output_path, 
                                         'hv_r_fps_counts.csv')) 

hv_r_segment_counts_df.to_csv(os.path.join(output_path, 
                                             'hv_r_segment_counts.csv'))  

# % of videos with each video metric calculated --------------------
hv_r_cols_per_df, hv_r_filt_cols_per_df = calculate_per_vid_w_metric(hv_r_df, 'home')
hv_r_cols_per_df.to_csv(os.path.join(output_path, 
                                       'hv_r_percent_w_data.csv'))

hv_r_filt_cols_per_df.to_csv(os.path.join(output_path, 
                                                'hv_r_percent_w_data_metricsSubset.csv'))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Metric'] = clean_names


In [21]:
# left turn only 
# summary statistics ----------------------------------
# calculate 
hv_l_duration_summary_df = video_one_col_stats(hv_l_df, 'total_video_duration_sec_pose_hv')
hv_l_fps_summary_df = video_one_col_stats(hv_l_df, 'frames_per_second_pose_hv')
hv_l_segment_n_summary_df = video_one_col_stats(hv_l_df, 'walking_segmets_n_pose_hv') 
hv_l_segment_duration_summary_df = video_one_col_stats(hv_l_df, 'walking_segments_duration_mean_pose_hv')

# compile + save 
hv_l_stats_df = pd.concat([hv_l_duration_summary_df, 
                             hv_l_fps_summary_df, 
                             hv_l_segment_n_summary_df, 
                             hv_l_segment_duration_summary_df]) 

hv_l_stats_df.to_csv(os.path.join(output_path, 
                                    'hv_l_video_and_segment_stats.csv'))                       

# categorical - counts and % -----------------------
hv_l_fps_counts_df = col_counts_and_percent(hv_l_df, 'frames_per_second_pose_hv')
hv_l_segment_counts_df = col_counts_and_percent(hv_l_df, 'walking_segmets_n_pose_hv')

# save 
hv_l_fps_counts_df.to_csv(os.path.join(output_path, 
                                         'hv_l_fps_counts.csv')) 

hv_l_segment_counts_df.to_csv(os.path.join(output_path, 
                                             'hv_l_segment_counts.csv'))  

# % of videos with each video metric calculated --------------------
hv_l_cols_per_df, hv_l_filt_cols_per_df = calculate_per_vid_w_metric(hv_l_df, 'home')
hv_l_cols_per_df.to_csv(os.path.join(output_path, 
                                       'hv_l_percent_w_data.csv'))

hv_l_filt_cols_per_df.to_csv(os.path.join(output_path, 
                                                'hv_l_percent_w_data_metricsSubset.csv'))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['Metric'] = clean_names


## Average video segment of those with at least one walking segment identified 

In [22]:
# PWS 

zv_pws_MS_w_bw_df_wWalking = zv_pws_MS_w_bw_df.loc[zv_pws_MS_w_bw_df['walking_segmets_n_pose_zv'] > 0]
print(len(zv_pws_MS_w_bw_df_wWalking))

# calculate 
pws_wWalking_duration_summary_df = video_one_col_stats(zv_pws_MS_w_bw_df_wWalking, 'total_video_duration_sec_pose_zv')
pws_wWalking_fps_summary_df = video_one_col_stats(zv_pws_MS_w_bw_df_wWalking, 'frames_per_second_pose_zv')
pws_wWalking_segment_n_summary_df = video_one_col_stats(zv_pws_MS_w_bw_df_wWalking, 'walking_segmets_n_pose_zv') 
pws_wWalking_segment_duration_summary_df = video_one_col_stats(zv_pws_MS_w_bw_df_wWalking, 'walking_segments_duration_mean_pose_zv')

pws_wWalking_stats_df = pd.concat([pws_wWalking_duration_summary_df, 
                                   pws_wWalking_fps_summary_df, 
                                   pws_wWalking_segment_n_summary_df, 
                                   pws_wWalking_segment_duration_summary_df])

pws_wWalking_stats_df.to_csv(os.path.join(output_path, 
                                          'zv_pws_wWalking_video_and_segment_stats.csv')) 

pws_wWalking_stats_df

166


Unnamed: 0,Value,Count,Mean,Median,Maximum,Minimum,S.D.,IQR
0,total_video_duration_sec_pose_zv,166,32.5,28.6,115.8,14.6,14.5,7.1
0,frames_per_second_pose_zv,166,30.0,30.0,30.0,30.0,0.0,0.0
0,walking_segmets_n_pose_zv,166,3.3,3.0,9.0,1.0,1.4,2.0
0,walking_segments_duration_mean_pose_zv,166,4.5,4.5,7.2,2.3,1.1,1.5


In [23]:
# FW 
zv_fw_MS_w_bw_df_wWalking = zv_fw_MS_w_bw_df.loc[zv_fw_MS_w_bw_df['walking_segmets_n_pose_zv'] > 0]
print(len(zv_fw_MS_w_bw_df_wWalking))

# calculate 
fw_wWalking_duration_summary_df = video_one_col_stats(zv_fw_MS_w_bw_df_wWalking, 'total_video_duration_sec_pose_zv')
fw_wWalking_fps_summary_df = video_one_col_stats(zv_fw_MS_w_bw_df_wWalking, 'frames_per_second_pose_zv')
fw_wWalking_segment_n_summary_df = video_one_col_stats(zv_fw_MS_w_bw_df_wWalking, 'walking_segmets_n_pose_zv') 
fw_wWalking_segment_duration_summary_df = video_one_col_stats(zv_fw_MS_w_bw_df_wWalking, 'walking_segments_duration_mean_pose_zv')

fw_wWalking_stats_df = pd.concat([fw_wWalking_duration_summary_df, 
                                   fw_wWalking_fps_summary_df, 
                                   fw_wWalking_segment_n_summary_df, 
                                   fw_wWalking_segment_duration_summary_df])

fw_wWalking_stats_df.to_csv(os.path.join(output_path, 
                                    'zv_fw_wWalking_video_and_segment_stats.csv')) 

fw_wWalking_stats_df

162


Unnamed: 0,Value,Count,Mean,Median,Maximum,Minimum,S.D.,IQR
0,total_video_duration_sec_pose_zv,162,26.0,22.5,254.4,7.6,20.1,8.7
0,frames_per_second_pose_zv,162,30.0,30.0,30.0,30.0,0.0,0.0
0,walking_segmets_n_pose_zv,162,2.8,3.0,7.0,1.0,1.2,2.0
0,walking_segments_duration_mean_pose_zv,162,3.7,3.5,12.1,2.2,1.0,1.0


In [24]:
## Home videos with at least one walking segment identified 
hv_bw_clean_df_wWalking = hv_bw_clean_df.loc[hv_bw_clean_df['walking_segmets_n_pose_hv'] > 0] 
print(len(hv_bw_clean_df_wWalking))

# calculate 
hv_wWalking_duration_summary_df = video_one_col_stats(hv_bw_clean_df_wWalking, 'total_video_duration_sec_pose_hv')
hv_wWalking_fps_summary_df = video_one_col_stats(hv_bw_clean_df_wWalking, 'frames_per_second_pose_hv')
hv_wWalking_segment_n_summary_df = video_one_col_stats(hv_bw_clean_df_wWalking, 'walking_segmets_n_pose_hv') 
hv_wWalking_segment_duration_summary_df = video_one_col_stats(hv_bw_clean_df_wWalking, 'walking_segments_duration_mean_pose_hv')

# compile + save 
hv_wWalking_stats_df = pd.concat([hv_wWalking_duration_summary_df, 
                                  hv_wWalking_fps_summary_df, 
                                   hv_wWalking_segment_n_summary_df, 
                                   hv_wWalking_segment_duration_summary_df]) 

hv_wWalking_stats_df.to_csv(os.path.join(output_path, 
                                    'hv_wWalking_video_and_segment_stats.csv'))                       
hv_wWalking_stats_df

72


Unnamed: 0,Value,Count,Mean,Median,Maximum,Minimum,S.D.,IQR
0,total_video_duration_sec_pose_hv,72,33.1,30.9,79.1,14.5,12.3,11.1
0,frames_per_second_pose_hv,72,33.0,30.0,60.0,24.0,10.4,0.0
0,walking_segmets_n_pose_hv,72,4.4,4.0,12.0,1.0,1.9,3.0
0,walking_segments_duration_mean_pose_hv,72,3.6,3.5,7.0,2.1,1.1,1.6
