In [1]:
# crawl through the root directory and count number of files in folder GoPro
import os
import os.path
import re
import subprocess
import json
import pandas as pd

# root_dir = '/standard/storage/CognitiveEMS_Datasets/EMS_Interventions/videos/' # 2023 August
# root_dir = '/standard/storage/CognitiveEMS_Datasets/North_Garden/May_2024/May24_updated_structure/ego_camera/' # 2024 May
# root_dir = '/standard/storage/CognitiveEMS_Datasets/North_Garden/Sep_2024/Raw/' # 2024 September

root_dir = '/standard/storage/EgoExoEMS_CVPR2025/Dataset/Final/'

output_csv = './final_video_stats.csv'


In [6]:
import os
import csv
import subprocess
import json
import re

def get_stats(root_dir, output_csv):
    count = 0
    # Open CSV file for writing
    with open(output_csv, mode='w', newline='') as csv_file:
        fieldnames = ['filename', 'duration', 'total_frames']
        writer = csv.DictWriter(csv_file, fieldnames=fieldnames)
        
        # Write the header
        writer.writeheader()
        
        for root, dirs, files in os.walk(root_dir):
            for file in files:
                if re.match(r'.*encoded_trimmed\.mp4$', file, re.IGNORECASE):
                    # Get full path of the file
                    file_path = os.path.join(root, file)
                    duration, num_frames = get_video_info(file_path) # for ffprobe
                    # duration, num_frames = get_video_info_ffprobe(file_path) # for opencv
                    
                    if duration is not None and num_frames is not None:
                        print(f"Processing {file_path}: Duration = {duration} seconds, Frames = {num_frames}")
                        
                        # Write the video info to the CSV file
                        writer.writerow({
                            'filename': file_path,
                            'duration': duration,
                            'total_frames': num_frames
                        })
                        
                    else:
                        print(f"Could not retrieve video information for {file_path}")
                    
                    count += 1
    
    return count

def get_video_info(file_path):
    # Use ffprobe to get video information
    command = [
        'ffprobe',
        '-v', 'error',
        '-select_streams', 'v:0',
        '-count_packets',
        '-show_entries', 'stream=duration,nb_frames',
        '-of', 'json',
        file_path
    ]
    try:
        print(f"Fetching info for {file_path}")
        result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        video_info = json.loads(result.stdout)
        

        if 'streams' in video_info and len(video_info['streams']) > 0:
            duration = float(video_info['streams'][0].get('duration', 0))
            num_frames = int(video_info['streams'][0].get('nb_frames', 0))
            return duration, num_frames
        else:
            return None, None
    except Exception as e:
        print(f"Error while fetching video info: {e}")
        return None, None




In [7]:
# number of GoPro recordings in the root directory
file_count = get_stats(root_dir, output_csv)
print(f"Processed {file_count} video files. Stats saved to {output_csv}.")


Fetching info for /standard/storage/EgoExoEMS_CVPR2025/Dataset/Final/P4/cardiac_arrest/s5/gopro/GH010055_encoded_trimmed.mp4
Processing /standard/storage/EgoExoEMS_CVPR2025/Dataset/Final/P4/cardiac_arrest/s5/gopro/GH010055_encoded_trimmed.mp4: Duration = 67.133333 seconds, Frames = 2014
Fetching info for /standard/storage/EgoExoEMS_CVPR2025/Dataset/Final/P4/cardiac_arrest/s8/gopro/GH010058_encoded_trimmed.mp4
Processing /standard/storage/EgoExoEMS_CVPR2025/Dataset/Final/P4/cardiac_arrest/s8/gopro/GH010058_encoded_trimmed.mp4: Duration = 68.633333 seconds, Frames = 2059
Fetching info for /standard/storage/EgoExoEMS_CVPR2025/Dataset/Final/P4/cardiac_arrest/s7/gopro/GH010057_encoded_trimmed.mp4
Processing /standard/storage/EgoExoEMS_CVPR2025/Dataset/Final/P4/cardiac_arrest/s7/gopro/GH010057_encoded_trimmed.mp4: Duration = 69.866667 seconds, Frames = 2096
Fetching info for /standard/storage/EgoExoEMS_CVPR2025/Dataset/Final/P4/cardiac_arrest/s4/gopro/GH010054_encoded_trimmed.mp4
Processing 

# find length scenario wise

In [4]:
final_video_stats = pd.read_csv(output_csv)


# Function to extract subject, trial, and scenario from filename
def extract_metadata(row):
    parts = row['filename'].split('/')
    subject = parts[-5]  # Example: 'P4'
    scenario = parts[-4]  # Example: 'cardiac_arrest'
    trial = parts[-3]     # Example: 's5' or '0' for numbered trials
    return pd.Series([subject, trial, scenario])

# Apply the function to add new columns to the DataFrame
final_video_stats[['subject', 'trial', 'scenario']] = final_video_stats.apply(extract_metadata, axis=1)

# Save the updated DataFrame to a new CSV file
final_video_stats.to_csv('updated_final_video_stats.csv', index=False)

In [8]:
import pandas as pd


# Function to classify subjects
def classify_subject(subject):
    if subject.startswith("P"):
        return "General Public"
    elif subject in ["ng7", "ng8", "ng9"]:
        return "Members"
    else:
        return "EMTs"

# Apply classification to add a new column for subject type
final_video_stats['subject_type'] = final_video_stats['subject'].apply(classify_subject)

# Group by subject type and scenario, then calculate the total duration
duration_summary = final_video_stats.groupby(['subject_type', 'scenario'])['duration'].sum().reset_index()

# add duration in minutes column
duration_summary['duration_min'] = duration_summary['duration'] / 60

# add duration in hours column
duration_summary['duration_hr'] = duration_summary['duration_min'] / 60

# Save the summary to a new CSV file (optional)
duration_summary.to_csv('duration_summary_by_subject_type_and_scenario.csv', index=False)

# total duration in hours
total_duration = duration_summary['duration_hr'].sum()
print(f"Total duration of all videos: {total_duration:.2f} hours")
# Print or return the summary DataFrame
print(duration_summary)


Total duration of all videos: 5.96 hours
     subject_type        scenario     duration  duration_min  duration_hr
0            EMTs  cardiac_arrest  9170.961794    152.849363     2.547489
1            EMTs      chest_pain  3474.404266     57.906738     0.965112
2  General Public  cardiac_arrest  6965.866669    116.097778     1.934963
3         Members  cardiac_arrest  1847.712533     30.795209     0.513253
