In [2]:
import os
import pandas as pd

# Function to display the first 2 rows of CSV files for each participant
def display_first_2_rows(data_dir):
    # List all participant directories
    participants = [p for p in os.listdir(data_dir) if os.path.isdir(os.path.join(data_dir, p)) and not p.startswith('.')]

    # Loop through each participant
    for participant in participants:
        participant_path = os.path.join(data_dir, participant)

        # Extract participant ID (assuming it's in the format 'PID_P')
        pid = participant.split("_P")[0]

        # Debugging: Print current participant being processed
        print(f"Displaying first 2 rows for participant: {participant}")

        # === TEXT ===
        text_file = f"{pid}_Transcript_processed_scaled.csv"
        text_path = os.path.join(participant_path, "text", text_file)
        if os.path.exists(text_path):
            try:
                text_df = pd.read_csv(text_path)
                print(f"\n[TEXT] {text_file} - First 2 Rows:")
                print(text_df.head(2))
            except Exception as e:
                print(f"[Error Reading] {participant} - text - {text_file}: {e}")
        else:
            print(f"[Missing File] {participant} - text - {text_file}")

        # === CLINICAL ===
        clinical_file = f"{pid}_Transcript_processed_scaled_biobert_features.csv"
        clinical_path = os.path.join(participant_path, "clinical", clinical_file)
        if os.path.exists(clinical_path):
            try:
                clinical_df = pd.read_csv(clinical_path)
                print(f"\n[CLINICAL] {clinical_file} - First 2 Rows:")
                print(clinical_df.head(2))
            except Exception as e:
                print(f"[Error Reading] {participant} - clinical - {clinical_file}: {e}")
        else:
            print(f"[Missing File] {participant} - clinical - {clinical_file}")

        # === AUDIO ===
        audio_files = [
            f"{pid}_vgg16.csv",
            f"{pid}_BoAW_openSMILE_2.3.0_MFCC.csv",
            f"{pid}_OpenSMILE2.3.0_mfcc.csv",
            f"{pid}_OpenSMILE2.3.0_egemaps.csv",
            f"{pid}_densenet201.csv",
            f"{pid}_BoAW_openSMILE_2.3.0_eGeMAPS.csv"
        ]
        for af in audio_files:
            af_path = os.path.join(participant_path, "audio", af)
            if os.path.exists(af_path):
                try:
                    audio_df = pd.read_csv(af_path)
                    print(f"\n[AUDIO] {af} - First 2 Rows:")
                    print(audio_df.head(2))
                except Exception as e:
                    print(f"[Error Reading] {participant} - audio - {af}: {e}")
            else:
                print(f"[Missing File] {participant} - audio - {af}")

        # === VIDEO ===
        video_file = f"{pid}_BoVW_openFace_2.1.0_Pose_Gaze_AUs.csv"
        video_path = os.path.join(participant_path, "video", video_file)
        if os.path.exists(video_path):
            try:
                video_df = pd.read_csv(video_path)
                print(f"\n[VIDEO] {video_file} - First 2 Rows:")
                print(video_df.head(2))
            except Exception as e:
                print(f"[Error Reading] {participant} - video - {video_file}: {e}")
        else:
            print(f"[Missing File] {participant} - video - {video_file}")

# Set the path to your data directory here
data_dir = '../data'

# Call the function to display the first 2 rows of each file
display_first_2_rows(data_dir)


Displaying first 2 rows for participant: 300_P

[TEXT] 300_Transcript_processed_scaled.csv - First 2 Rows:
          0         1         2         3         4         5         6  \
0 -0.056392  0.138372 -0.126985 -0.167206 -0.115812 -0.129136 -0.121044   
1 -0.201225 -0.001245 -0.116618 -0.165399 -0.149390 -0.129316 -0.120709   

          7         8         9  ...        40        41        42        43  \
0 -0.235486  0.657910 -0.088610  ...  0.016926 -0.084856 -0.116798  0.064243   
1 -0.234631 -0.139504 -0.117499  ... -0.018964 -0.105576 -0.108521  0.093315   

         44        45        46        47        48        49  
0 -0.194753 -0.064119 -0.282139 -0.247824 -0.092130 -0.149415  
1 -0.199672 -0.076216 -0.302802 -0.213096 -0.095855 -0.208828  

[2 rows x 50 columns]
[Missing File] 300_P - clinical - 300_Transcript_processed_scaled_biobert_features.csv

[AUDIO] 300_vgg16.csv - First 2 Rows:
            name  timeStamp  neuron_0  neuron_1  neuron_2  neuron_3  neuron_4  \
0  3