In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gaitFunctions
import combineClips
import os
import glob
import seaborn as sns
import cv2

In [2]:
# navigate to directory that contains the data
os.chdir('/Users/iwoods/OneDrive - Ithaca College/movement_disorder_paper_2023/videos_exemplaris/')
# list directories within this directory
dirs = []
for dirname, dirnames, filenames in os.walk('.'):
    # print path to all subdirectories first.
    for subdirname in dirnames:
        folder = os.path.join(dirname, subdirname).split('/')[1]
        if folder not in dirs:
            dirs.append(folder)
dirs = np.sort(np.unique(np.array(dirs)))
for folder in dirs:
    if 'exemplaris' in folder:
        print(folder)

iw_11Dec23_exemplaris_inverted
iw_12Dec22_exemplaris_inverted
iw_14Dec22_exemplaris_inverted
iw_1Feb23_exemplaris_inverted
iw_21Dec23_exemplaris_inverted
iw_30Jan23_exemplaris_inverted
iw_6Dec22_exemplaris_olympus
iw_9Nov23_exemplaris_inverted
jb_1Feb23_exemplaris_inverted
kt_8Feb23_exemplaris_inverted


In [3]:
folders_in_analysis = ['iw_9Nov23_exemplaris_inverted',
                       'iw_11Dec23_exemplaris_inverted',
                       'iw_21Dec23_exemplaris_inverted']

In [4]:
# get durations of videos (in seconds)
# if there is a 'vid_info.csv' file already, then great!
# if not, we need to make this file ... and the videos need to be downloaded on the hard drive
# (if they are in the cloud, this will freeze)

def meanstdstr(vals):
    return str(np.round(np.mean(vals),2)) + ' ± ' + str(np.round(np.std(vals),2))

vids = []
video_folder = 'original_videos'
vid_info_file = 'vid_info.csv'
vid_df = pd.DataFrame()

for folder in folders_in_analysis:
    vids_in_folder = []
    have_vid_info = False
    
    # collect names of videos in this folder
    for extension in ['*mp4','*mov']:
        vids_in_folder.extend(glob.glob(os.path.join(folder,video_folder,extension)))
    
    # add names of videos in this folder to overall list
    vids_in_folder = sorted(vids_in_folder)
    vids.extend(vids_in_folder)
    
    # do we already have vid info for this folder?
    vid_info_path = os.path.join(folder,video_folder,vid_info_file)
    if len(glob.glob(vid_info_path)) > 0:
        these_vids_df = pd.read_csv(vid_info_path)
        have_vid_info = True
        print('Found vid info in ' + folder)
        if len(vid_df) > 0:
            vid_df = pd.concat([vid_df, these_vids_df])
        else:
            vid_df = these_vids_df
            
    if have_vid_info == False:
        print('We need vid info for ' + folder)
        vid_names = []
        fps = []
        width = []
        height = []
        frames = []
        duration = []
        for vid in vids_in_folder:
            parent_dir, original_video_dir, vid_name = vid.split('/')
            vid_names.append(vid_name)
#             print(vid_name) # testing OK
            cap = cv2.VideoCapture(vid)
            frames_per_sec = cap.get(cv2.CAP_PROP_FPS)
            fps.append(frames_per_sec)
            width.append(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
            height.append(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
            totalNoFrames = cap.get(cv2.CAP_PROP_FRAME_COUNT)
            frames.append(totalNoFrames)
            duration.append(totalNoFrames / frames_per_sec)
            cap.release()

        vid_info_dict = {
            'Video File':vid_names,
            'fps':fps,
            'width':width,
            'height':height,
            'frames':frames,
            'duration':duration
        }
        these_vids_df = pd.DataFrame(vid_info_dict)
        
        if len(vid_df) > 0:
            vid_df = pd.concat([vid_df, these_vids_df])
        else:
            vid_df = these_vids_df

        # save dataFrame to csv
        these_vids_df.to_csv(os.path.join(folder,video_folder,vid_info_file), index=False)

num_videos = len(vids)
vid_files = sorted(vids)
print()
print('Total number of videos: ' + str(num_videos)) 
print('Mean video length: ' + meanstdstr(vid_df.duration.values) + ' seconds')
vid_df.head(3)

Found vid info in iw_9Nov23_exemplaris_inverted
Found vid info in iw_11Dec23_exemplaris_inverted
Found vid info in iw_21Dec23_exemplaris_inverted
Total number of videos: 38
Mean video length: 138.42 ± 21.92 seconds


Unnamed: 0,Video File,fps,width,height,frames,duration
0,iw_9Nov_tardigrade01_exemplaris.mp4,33.0,1024.0,768.0,4432.0,134.30303
1,iw_9Nov_tardigrade02_exemplaris.mp4,33.0,1024.0,768.0,4779.0,144.818182
2,iw_9Nov_tardigrade03_exemplaris.mp4,33.0,1024.0,768.0,4650.0,140.909091


In [5]:
# combine data from these folders into dataframes ... takes a bit
this_treatment = 'exemplaris'

current_dir = os.getcwd()

path_summaries_df = pd.DataFrame()
step_timing_combined_df = pd.DataFrame()
step_summaries_df = pd.DataFrame()
gait_summaries_df = pd.DataFrame()
gait_styles_speeds_df = pd.DataFrame()

def combineDF(original,new):
    if len(original) == 0:
        original = new
    else:
        new = pd.concat([original, new])
    return new

for folder in folders_in_analysis:
    os.chdir(folder)
    print('Getting data from ' + folder)
    pdf, stcdf, sdf, gdf, gssdf = combineClips.main(False)
    os.chdir(current_dir)
    path_summaries_df = combineDF(path_summaries_df, pdf)
    step_timing_combined_df = combineDF(step_timing_combined_df, stcdf)
    step_summaries_df = combineDF(step_summaries_df, sdf)
    gait_summaries_df = combineDF(gait_summaries_df, gdf)
    gait_styles_speeds_df = combineDF(gait_styles_speeds_df, gssdf)
    
path_summaries_df['treatment'] = this_treatment * len(path_summaries_df)
step_summaries_df['treatment'] = this_treatment * len(step_summaries_df)
gait_summaries_df['treatment'] = this_treatment * len(gait_summaries_df)

Getting data from iw_9Nov23_exemplaris_inverted
... loading data for clips in this folder
Getting data from iw_11Dec23_exemplaris_inverted
... loading data for clips in this folder
Getting data from iw_21Dec23_exemplaris_inverted
... loading data for clips in this folder


In [9]:
print('Mean duration analyzed: ' + meanstdstr(path_summaries_df['Duration analyzed (sec)'].values) + ' seconds')
path_summaries_df.head(3)

Mean duration analyzed: 102.3 ± 12.0 seconds


Unnamed: 0,Identifier,treatment,individual,date,Scale (pixels in 1mm),Body Size (mm^2),Body Length (mm),Body Width (mm),Body Width/Length Ratio,Duration analyzed (sec),...,Percentage of time cruising,Speed (mm/sec cruising),Speed (body lengths / sec cruising),Total bearing change (deg),Bearing change (deg) / sec,Bearing change (deg) / sec cruising,Number of stops,Stops / sec,Number of turns,Turns / sec
0,iw_9nov_tardigrade01_exemplaris,exemplarisexemplarisexemplarisexemplarisexempl...,tardigrade01,9nov,956.024,0.006016,0.153164,0.05035,0.328734,118.037,...,94.716594,0.026622,0.173814,2310.6042,19.575254,17.372546,7,0.059303,5,0.04236
1,iw_9nov_tardigrade02_exemplaris,exemplarisexemplarisexemplarisexemplarisexempl...,tardigrade02,9nov,956.03,0.01442,0.253292,0.07305,0.288401,90.5143,...,96.829105,0.09232,0.36448,1158.160667,12.795334,9.039503,3,0.033144,3,0.033144
2,iw_9nov_tardigrade03_exemplaris,exemplarisexemplarisexemplarisexemplarisexempl...,tardigrade03,9nov,956.03,0.01124,0.215429,0.067348,0.312623,106.3914,...,97.894737,0.066134,0.30699,1335.798067,12.555508,10.159832,3,0.028198,2,0.018799
