In [1]:
# this script will plot the visibility scores of each landmark from the run.py output in the home video analysis pipeline 
# goal: determine if horizontal vs vertical videos can be used, which pose landmarks to focus on for metrics, etc? 

# reference for plots using seaborn - https://python-graph-gallery.com/36-add-jitter-over-boxplot-seaborn/ 
    # start with this - violin if messy 


In [2]:
# import packages 
import matplotlib.pyplot as plt 
import pandas as pd 
import seaborn as sns
import os

In [3]:
# practice walking through all files in a folder 
# current folder organization - all run.py files in same folder 
    # 3 total files per trial - pose_landmrks, world_landmarks, yolo 

#dir in path = run.py outputs 
dir_in_path = r'..\..\temp\main_branch_outputs\000_run'

# dir out path 
dir_out_path = r'..\..\temp\main_branch_outputs\003_plot_vis_scores'

# create blank data frame 
df = pd.DataFrame(columns = ['X', 'Y', 'Z',	'vis', 'pres', 'frame', 'label',  'filename'])

# read all data from run.py outputs, add column with filename, append to create one data frame 
for (dir_path, dir_names, file_names) in os.walk(dir_in_path):
    print('dir_path: ' + dir_path)

    # loop through each file in dir_in_path
    for file_name in file_names: 
        if 'world' not in file_name and 'yolo' not in file_name: 
            # save file name and save extension 
            name, ext = os.path.splitext(file_name)

            # create full path to input file to read 
            file_in_path = os.path.normpath(os.path.join(dir_path, file_name))
            print('file_in_path: ' + file_in_path)

            # read current df 
            current_df = pd.read_csv(file_in_path, index_col = 0)
            
            # add file name to last column 
            current_df['filename'] = name

            # append current_df to df 
            df = pd.concat([df, current_df]) 

# drop rows with all missing data 
df = df.dropna(how='all')

dir_path: ..\..\temp\main_branch_outputs\000_run
file_in_path: ..\..\temp\main_branch_outputs\000_run\DS_HC_gait_horizontal_left_mediapipe.csv
file_in_path: ..\..\temp\main_branch_outputs\000_run\DS_HC_gait_horizontal_right_mediapipe.csv


  df = pd.concat([df, current_df])


file_in_path: ..\..\temp\main_branch_outputs\000_run\DS_HC_gait_vertical_left_mediapipe.csv
file_in_path: ..\..\temp\main_branch_outputs\000_run\DS_HC_gait_vertical_right_mediapipe.csv
file_in_path: ..\..\temp\main_branch_outputs\000_run\MM_HC_10steps_away_0_mediapipe.csv
file_in_path: ..\..\temp\main_branch_outputs\000_run\MM_HC_10steps_away_1_mediapipe.csv
file_in_path: ..\..\temp\main_branch_outputs\000_run\MM_HC_10steps_towards_0_mediapipe.csv
file_in_path: ..\..\temp\main_branch_outputs\000_run\MM_HC_10steps_towards_1_mediapipe.csv
file_in_path: ..\..\temp\main_branch_outputs\000_run\MM_HC_17ft_walk_mediapipe.csv
file_in_path: ..\..\temp\main_branch_outputs\000_run\MM_HC_vertical_0_mediapipe.csv
file_in_path: ..\..\temp\main_branch_outputs\000_run\MM_HC_vertical_1_mediapipe.csv
file_in_path: ..\..\temp\main_branch_outputs\000_run\MM_HC_vertical_2_mediapipe.csv
file_in_path: ..\..\temp\main_branch_outputs\000_run\NW_HC_gait_vertical_left_mediapipe.csv
file_in_path: ..\..\temp\main_

In [4]:
# add new column with horizontal vs vertical video based on task 
task = df['filename'].apply(
    lambda x: 'horizontal' if 'horizontal' in x else ('vertical' if 'vertical' in x else None)
)

df['task'] = task 

In [5]:
# filter out MM files - I did different task and wasn't recorded at home 
filtered_df = df[~df['filename'].str.contains('MM')]

# check filtered correctly 
print(pd.unique(filtered_df['filename']))

['DS_HC_gait_horizontal_left_mediapipe'
 'DS_HC_gait_horizontal_right_mediapipe'
 'DS_HC_gait_vertical_left_mediapipe'
 'DS_HC_gait_vertical_right_mediapipe'
 'NW_HC_gait_vertical_left_mediapipe'
 'NW_HC_gait_vertical_right_mediapipe'
 'RB_HC_gait_horizontal_left_head_mediapipe'
 'RB_HC_gait_horizontal_left_mediapipe'
 'RB_HC_gait_horizontal_right_head_mediapipe'
 'RB_HC_gait_horizontal_right_mediapipe'
 'RB_HC_gait_vertical_left_mediapipe'
 'RB_HC_gait_vertical_right_mediapipe']


In [6]:
# save mean, median, and standard deviation for each marker
def mean_med_std_vis(df):

    # horizontal 
    h_df = df.loc[df['task'] == 'horizontal']
    
    h_mean_vis = h_df['vis'].mean()
    h_median_vis = h_df['vis'].median()
    h_std_vis = h_df['vis'].std()
    
    # vertical 
    v_df = df.loc[df['task'] == 'vertical']
    
    v_mean_vis = v_df['vis'].mean()
    v_median_vis = v_df['vis'].median()
    v_std_vis = v_df['vis'].std()
    
    return[h_mean_vis, h_median_vis, h_std_vis, v_mean_vis, v_median_vis, v_std_vis]


In [7]:
# all trials on single boxplot, separated by horizontal vs vertical tasks 

def single_label_bxplt_all(df, label): 
    # all values -  boxplot 
    ax = sns.boxplot(x='task', y='vis', data=df)
    plt.title(label)

    #save fig 
    output_file_name = 'all_' + label + '_boxplot.png'
    out_path = os.path.normpath(os.path.join(dir_out_path, 'all', output_file_name))
    print('saving: ' + out_path)
    plt.savefig(out_path)
    plt.close() 


In [8]:
# all trials on single histogram, separated by horizontal vs vertical tasks 

def single_label_hist_all(df, label): 
    fig, axs = plt.subplots(nrows=2)
    
    axs[0].hist(df['vis'].loc[df['task'] == 'horizontal'])
    axs[0].set_title(label + ' horizontal') 
    
    axs[1].hist(df['vis'].loc[df['task'] == 'vertical'])
    axs[1].set_title(label + 'vertical') 
    
    #save fig 
    output_file_name = 'all_' + label + '_histogram.png'
    out_path = os.path.normpath(os.path.join(dir_out_path, 'all', output_file_name))
    print('saving: ' + out_path)
    plt.savefig(out_path)
    plt.close() 

In [9]:
# all trials on single lineplot, separated by horizontal vs vertical tasks 

def single_label_line_all(df, label): 
    
    # Create the line plot
        # x = frame, y = visibility score 
        # colored by task: vertical or horizontl 
        # each video = 1 line 
    plt.figure(figsize=(10, 6))
    sns.lineplot(data=df, x='frame', y='vis', hue='task', units='filename', markers=True, dashes=False, estimator = None)

    # Add labels and title
    plt.xlabel('Frame')
    plt.ylabel('Visibility (vis)')
    plt.title(label + ': Visibility per video')

    # Show the plot
    plt.legend(title='Task')
    

    #save fig 
    output_file_name = 'all_' + label + '_line.png'
    out_path = os.path.normpath(os.path.join(dir_out_path, 'all', output_file_name))
    print('saving: ' + out_path)
    plt.savefig(out_path)
    plt.close() 

In [None]:
# create for loop to loop through all landmarks 

vis_stats_all_df = pd.DataFrame(columns = ['label', 
                                           'h_mean_vis',
                                           'h_median_vis',
                                           'h_std_vis',
                                           'v_mean_vis', 
                                           'v_median_vis',
                                           'v_std_vis'])

# drop all na 
filtered_df = filtered_df.dropna(how='all')
                                                                           
for label in pd.unique(filtered_df['label']):
    if pd.isna(label): 
        print('is na: skipped')
    else: 
        print('label: ' + label)

        # filter to data frame that only includes one mediapose landmark (nose, right foot, etc) 
        current_label_df = filtered_df.loc[filtered_df['label'] == label]
    
        # boxplot all values 
        single_label_bxplt_all(current_label_df, label)

        #histogram of all values 
        single_label_hist_all(current_label_df, label)

        # line plot of all values, colored by task 
        single_label_line_all(current_label_df, label)

        # save mean, median, and standard deviation: horizontal and vertical  
        current_vis_stats = mean_med_std_vis(current_label_df)
    
        row_current_vis_stats = pd.DataFrame([label, 
                                              current_vis_stats[0],
                                              current_vis_stats[1], 
                                              current_vis_stats[2],
                                              current_vis_stats[3],
                                              current_vis_stats[4], 
                                              current_vis_stats[5],
                                             ])

        row_current_vis_stats = row_current_vis_stats.transpose()
        row_current_vis_stats = row_current_vis_stats.rename(columns = {0:'label',
                                                                        1:'h_mean_vis',
                                                                        2: 'h_median_vis',
                                                                        3: 'h_std_vis',
                                                                        4: 'v_mean_vis',
                                                                        5: 'v_median_vis',
                                                                        6: 'v_std_vis'}
                                                                   )

    
    
        vis_stats_all_df = pd.concat([vis_stats_all_df, row_current_vis_stats]) 

label: nose
saving: ..\..\temp\main_branch_outputs\003_plot_vis_scores\all\all_nose_boxplot.png
saving: ..\..\temp\main_branch_outputs\003_plot_vis_scores\all\all_nose_histogram.png
saving: ..\..\temp\main_branch_outputs\003_plot_vis_scores\all\all_nose_line.png
label: left_eye_inner
saving: ..\..\temp\main_branch_outputs\003_plot_vis_scores\all\all_left_eye_inner_boxplot.png
saving: ..\..\temp\main_branch_outputs\003_plot_vis_scores\all\all_left_eye_inner_histogram.png
saving: ..\..\temp\main_branch_outputs\003_plot_vis_scores\all\all_left_eye_inner_line.png
label: left_eye
saving: ..\..\temp\main_branch_outputs\003_plot_vis_scores\all\all_left_eye_boxplot.png
saving: ..\..\temp\main_branch_outputs\003_plot_vis_scores\all\all_left_eye_histogram.png
saving: ..\..\temp\main_branch_outputs\003_plot_vis_scores\all\all_left_eye_line.png
label: left_eye_outer
saving: ..\..\temp\main_branch_outputs\003_plot_vis_scores\all\all_left_eye_outer_boxplot.png
saving: ..\..\temp\main_branch_outputs\

In [None]:
# add columns for difference between median and mean vis scores between vertical and horizontal videos 

# mean diff 
v_minus_h_mean = vis_stats_all_df['v_mean_vis'] - vis_stats_all_df['h_mean_vis']
v_minus_h_mean.head()
vis_stats_all_df['v_minus_h_mean'] = v_minus_h_mean

# median diff 
v_minus_h_median = vis_stats_all_df['v_median_vis'] - vis_stats_all_df['h_median_vis']
v_minus_h_median.head()
vis_stats_all_df['v_minus_h_median'] = v_minus_h_median

# save vis_stats_all_df as .csv file 
output_stats_file_name = 'vis_stats_by_label.csv'
out_stats_path = os.path.normpath(os.path.join(dir_out_path, 'all', output_stats_file_name))
print('saving: ' + out_stats_path)
vis_stats_all_df.to_csv(out_stats_path)

In [None]:
if pd.isna(label): 
        print('is na: skipped')