In [5]:
import os
import sys

import pandas as pd

# base directory for organized data
BASE_DIR = "/standard/UVA-DSA/NIST EMS Project Data/DataCollection_Spring_2025/OPVRS/organized/"

In [20]:

# create a csv file for each subject, scenario, and trial and paths

data_csv = pd.DataFrame(columns=["Subject", "Scenario", "Trial"])

# walk through the directory structure
for root, dirs, files in os.walk(BASE_DIR):
    for file in files:
        # check if the file is a .csv file
        if file.endswith(".csv"):
            # construct the full file path
            file_path = os.path.join(root, file)
            # read the file and print its contents
            print(f"file path: {file_path}")
            
            # extract subject, scenario, trial from the file path
            parts = file_path.split(os.sep)
            
            # extract trial base path from the file path 
            # example: "/standard/UVA-DSA/NIST EMS Project Data/DataCollection_Spring_2025/OPVRS/organized/opvrs_7/chest_pain_noncardiac/0/GoPro/GX010029.csv"
            # trial_base = "/standard/UVA-DSA/NIST EMS Project Data/DataCollection_Spring_2025/OPVRS/organized/opvrs_7/chest_pain_noncardiac/0"
            
            trial_base = os.path.join(*parts[:-2])
            trial_base = f"/{trial_base}"
            
            if len(parts) >= 6:
                subject = parts[-5]
                scenario = parts[-4]
                trial = parts[-3].split('.')[0]
                print(f"Subject: {subject}, Scenario: {scenario}, Trial: {trial}, Path: {trial_base}")
                
                # get gopro file path 
                gopro_path = os.path.join(trial_base, "GoPro")
                # find the gopro file inside the gopro path
                gopro_file = [f for f in os.listdir(gopro_path) if f.endswith(".MP4")]
                if gopro_file:
                    gopro_file_path = os.path.join(gopro_path, gopro_file[0])
                    print(f"GoPro file path: {gopro_file_path}")
                else:
                    print("No GoPro file found")
                    
                # get the smartwatch csv file path
                smartwatch_path = os.path.join(trial_base, "smartwatch_data")
                # find the smartwatch csv file inside the smartwatch path
                smartwatch_file = [f for f in os.listdir(smartwatch_path) if f.endswith(".csv")]
                if smartwatch_file:
                    smartwatch_file_path = os.path.join(smartwatch_path, smartwatch_file[0])
                    print(f"Smartwatch file path: {smartwatch_file_path}")
                else:
                    print("No smartwatch file found")
                    
                
                
                # # append the data to the DataFrame
                new_row = pd.DataFrame([{
                    "Subject": subject,
                    "Scenario": scenario,
                    "Trial": trial,
                    "Base_Path": trial_base,
                    "GoPro_Path": gopro_file_path if gopro_file else None,
                    "Smartwatch_Path": smartwatch_file_path if smartwatch_file else None
                }])
                data_csv = pd.concat([data_csv, new_row], ignore_index=True)


            print("--" * 20)

# remove duplicate rows
data_csv = data_csv.drop_duplicates()
# save the DataFrame to a CSV file
data_csv.to_csv("organized_data.csv", index=False)
print("Data saved to organized_data.csv")

file path: /standard/UVA-DSA/NIST EMS Project Data/DataCollection_Spring_2025/OPVRS/organized/opvrs_15/stroke_unresponsive/0/smartwatch_data/synchronized_smartwatch_03.csv
Subject: opvrs_15, Scenario: stroke_unresponsive, Trial: 0, Path: /standard/UVA-DSA/NIST EMS Project Data/DataCollection_Spring_2025/OPVRS/organized/opvrs_15/stroke_unresponsive/0
GoPro file path: /standard/UVA-DSA/NIST EMS Project Data/DataCollection_Spring_2025/OPVRS/organized/opvrs_15/stroke_unresponsive/0/GoPro/GX010464.MP4
Smartwatch file path: /standard/UVA-DSA/NIST EMS Project Data/DataCollection_Spring_2025/OPVRS/organized/opvrs_15/stroke_unresponsive/0/smartwatch_data/synchronized_smartwatch_03.csv
----------------------------------------
file path: /standard/UVA-DSA/NIST EMS Project Data/DataCollection_Spring_2025/OPVRS/organized/opvrs_15/stroke_unresponsive/0/GoPro/GX010464.csv
Subject: opvrs_15, Scenario: stroke_unresponsive, Trial: 0, Path: /standard/UVA-DSA/NIST EMS Project Data/DataCollection_Spring_20

In [22]:
import pandas as pd

# Load the data from CSV
df = pd.read_csv("../../file_handling/opvrs_data_mappings.csv")  # Replace with actual file name

# Strip any whitespace in 'Smartwatch' values
df['Smartwatch'] = df['Smartwatch'].astype(str).str.strip()

# Create a unique trial ID per unique Smartwatch file
smartwatch_to_trial = {sw: idx for idx, sw in enumerate(df['Smartwatch'].unique())}
df['Trial'] = df['Smartwatch'].map(smartwatch_to_trial)

print(df)

# Save the grouped output to a new CSV
df.to_csv("grouped_by_smartwatch.csv", index=False)

print("✅ Trial numbers assigned and saved to 'grouped_by_smartwatch.csv'")


                                             Filepath           Video ID  \
0   /standard/UVA-DSA/NIST EMS Project Data/DataCo...  GP01/GX010017.MP4   
1   /standard/UVA-DSA/NIST EMS Project Data/DataCo...  GP01/GX010018.MP4   
2   /standard/UVA-DSA/NIST EMS Project Data/DataCo...  GP01/GX010019.MP4   
3   /standard/UVA-DSA/NIST EMS Project Data/DataCo...  GP01/GX010021.MP4   
4   /standard/UVA-DSA/NIST EMS Project Data/DataCo...  GP01/GX010022.MP4   
5   /standard/UVA-DSA/NIST EMS Project Data/DataCo...  GP01/GX010023.MP4   
6   /standard/UVA-DSA/NIST EMS Project Data/DataCo...  GP01/GX010024.MP4   
7   /standard/UVA-DSA/NIST EMS Project Data/DataCo...  GP01/GX010025.MP4   
8   /standard/UVA-DSA/NIST EMS Project Data/DataCo...  GP01/GX010026.MP4   
9   /standard/UVA-DSA/NIST EMS Project Data/DataCo...  GP01/GX010028.MP4   
10  /standard/UVA-DSA/NIST EMS Project Data/DataCo...  GP01/GX010029.MP4   
11  /standard/UVA-DSA/NIST EMS Project Data/DataCo...  GP01/GX010032.MP4   
12  /standar

In [None]:
import pandas as pd

# Load both files
mappings_df = pd.read_csv("../../file_handling/opvrs_data_mappings.csv")       # contains 'Video ID' or filepath
organized_df = pd.read_csv("./organized_data.csv")           # contains 'Video ID' or similar column

# Preview columns to confirm match key
print("Mappings Columns:", mappings_df.columns)
print("Organized Columns:", organized_df.columns)

# Extract video ID from GoPro path, e.g., GX010017.MP4
organized_df["Video ID"] = organized_df["GoPro_Path"].apply(lambda x: os.path.basename(str(x)) if pd.notna(x) else None)

# Strip and standardize both sides
organized_df["Video ID"] = organized_df["Video ID"].str.strip()
mappings_df["Video ID"] = mappings_df["Video ID"].astype(str).str.strip()

# Merge them on Video ID
merged_df = pd.merge(organized_df, mappings_df, on="Video ID", how="left")

# Save merged result
merged_df.to_csv("merged_organized_data.csv", index=False)
print("✅ Merged CSV written to merged_organized_data.csv")

Mappings Columns: Index(['Filepath', 'Video ID', 'Scenario', 'Subject', 'Unique_ID',
       'Duration_minutes', 'Annotation Time (estimated)', 'Smartwatch'],
      dtype='object')
Organized Columns: Index(['Subject', 'Scenario', 'Trial', 'Base_Path', 'GoPro_Path',
       'Smartwatch_Path'],
      dtype='object')
✅ Merged CSV written to merged_organized_data.csv


In [26]:
import pandas as pd

# Load the CSV file
df = pd.read_csv("../../file_handling/opvrs_data_mappings.csv")  # Replace with your actual CSV path

# Drop rows missing required info
df = df[pd.notna(df["Main_Trial"]) & pd.notna(df["GoPro_Path"])]

# Group GoPro paths by Main_Trial
grouped = df.groupby("Main_Trial")["GoPro_Path"].apply(list).reset_index()

# Expand the list into separate columns
max_len = grouped["GoPro_Path"].apply(len).max()
columns = [f"GoPro_{i+1:02d}_path" for i in range(max_len)]
expanded = pd.DataFrame(grouped["GoPro_Path"].to_list(), columns=columns)
expanded.insert(0, "Main_Trial", grouped["Main_Trial"])

# Save the output
expanded.to_csv("grouped_gopro_paths.csv", index=False)
print("✅ Grouped GoPro paths written to 'grouped_gopro_paths.csv'")


✅ Grouped GoPro paths written to 'grouped_gopro_paths.csv'
