In [6]:
import pandas as pd

# Read Excel file
df = pd.read_excel(r"..\raw_dataset\DAPPER\Psychol_Rec\ESM.xlsx")

# Create category labels for Valence and Arousal
df['ESM_Valence'] = 'Class_' + df['Valence'].astype(str)
df['ESM_Arousal'] = 'Class_' + df['Arousal'].astype(str)

# Calculate counts for Valence and Arousal
valence_counts = df['ESM_Valence'].value_counts().sort_index()
arousal_counts = df['ESM_Arousal'].value_counts().sort_index()

# Create columns for summary DataFrame
columns = [f'Class {i}' for i in range(1, 6)]  # Assuming 5 classes
summary_df = pd.DataFrame(columns=columns, index=['ESM_Valence', 'ESM_Arousal'])

# Calculate and fill Valence percentages
valence_total = valence_counts.sum()
for i, (index, count) in enumerate(valence_counts.items(), 1):
    summary_df.loc['ESM_Valence', f'Class {i}'] = f"{count} ({(count/valence_total*100):.1f}%)"

# Calculate and fill Arousal percentages
arousal_total = arousal_counts.sum()
for i, (index, count) in enumerate(arousal_counts.items(), 1):
    summary_df.loc['ESM_Arousal', f'Class {i}'] = f"{count} ({(count/arousal_total*100):.1f}%)"

# Display the table
print(summary_df)

                Class 1       Class 2       Class 3       Class 4     Class 5
ESM_Valence   83 (2.2%)   613 (16.2%)  1110 (29.3%)  1612 (42.5%)  371 (9.8%)
ESM_Arousal  318 (8.4%)  1236 (32.6%)   998 (26.3%)  1030 (27.2%)  207 (5.5%)


In [8]:
import pandas as pd
import os

# Get participant IDs from the Physiol_Rec directory
physio_path = r"..\raw_dataset\DAPPER\Physiol_Rec"
participant_ids = [folder for folder in os.listdir(physio_path) 
                  if os.path.isdir(os.path.join(physio_path, folder))]

# Read ESM data
df = pd.read_excel(r"..\raw_dataset\DAPPER\Psychol_Rec\ESM.xlsx")

# Convert participant IDs to integers for matching
participant_ids = [int(pid) for pid in participant_ids]

# Create a copy of filtered data to avoid the warning
filtered_df = df[df['Participant ID'].isin(participant_ids)].copy()

# Create category labels for Valence and Arousal using .loc
filtered_df.loc[:, 'ESM_Valence'] = 'Class_' + filtered_df['Valence'].astype(str)
filtered_df.loc[:, 'ESM_Arousal'] = 'Class_' + filtered_df['Arousal'].astype(str)

# Calculate counts for Valence and Arousal
valence_counts = filtered_df['ESM_Valence'].value_counts().sort_index()
arousal_counts = filtered_df['ESM_Arousal'].value_counts().sort_index()

# Create summary DataFrame
columns = [f'Class {i}' for i in range(1, 6)]
summary_df = pd.DataFrame(columns=columns, index=['ESM_Valence', 'ESM_Arousal'])

# Calculate and fill Valence percentages
valence_total = valence_counts.sum()
for i, (index, count) in enumerate(valence_counts.items(), 1):
    summary_df.loc['ESM_Valence', f'Class {i}'] = f"{count} ({(count/valence_total*100):.1f}%)"

# Calculate and fill Arousal percentages
arousal_total = arousal_counts.sum()
for i, (index, count) in enumerate(arousal_counts.items(), 1):
    summary_df.loc['ESM_Arousal', f'Class {i}'] = f"{count} ({(count/arousal_total*100):.1f}%)"

# Display the table
print(f"Total number of participants with physiological data: {len(participant_ids)}")
print(f"Total number of ESM records for these participants: {len(filtered_df)}\n")
print(summary_df)

Total number of participants with physiological data: 88
Total number of ESM records for these participants: 2400

                Class 1      Class 2      Class 3      Class 4     Class 5
ESM_Valence   53 (2.2%)  385 (16.0%)  756 (31.5%)  997 (41.5%)  209 (8.7%)
ESM_Arousal  181 (7.5%)  748 (31.2%)  690 (28.7%)  659 (27.5%)  122 (5.1%)


In [9]:
import pandas as pd
import os

# Read ESM data
df = pd.read_excel(r"..\raw_dataset\DAPPER\Psychol_Rec\ESM.xlsx")

# Get unique participants from ESM data
esm_participants = df['Participant ID'].unique()
n_esm_participants = len(esm_participants)

# Get participant IDs from the Physiol_Rec directory
physio_path = r"..\raw_dataset\DAPPER\Physiol_Rec"
physio_participants = [int(folder) for folder in os.listdir(physio_path) 
                      if os.path.isdir(os.path.join(physio_path, folder))]
n_physio_participants = len(physio_participants)

# Find participants that have both ESM and physiological data
participants_both = set(esm_participants) & set(physio_participants)
n_participants_both = len(participants_both)

# Print summary
print(f"Total participants in ESM data: {n_esm_participants}")
print(f"Total participants with physiological data: {n_physio_participants}")
print(f"Participants with both ESM and physiological data: {n_participants_both}")

# Create filtered dataset for participants with both data types
filtered_df = df[df['Participant ID'].isin(participants_both)].copy()

# Create category labels for Valence and Arousal using .loc
filtered_df.loc[:, 'ESM_Valence'] = 'Class_' + filtered_df['Valence'].astype(str)
filtered_df.loc[:, 'ESM_Arousal'] = 'Class_' + filtered_df['Arousal'].astype(str)

# Calculate counts for Valence and Arousal
valence_counts = filtered_df['ESM_Valence'].value_counts().sort_index()
arousal_counts = filtered_df['ESM_Arousal'].value_counts().sort_index()

# Create summary DataFrame
columns = [f'Class {i}' for i in range(1, 6)]
summary_df = pd.DataFrame(columns=columns, index=['ESM_Valence', 'ESM_Arousal'])

# Calculate and fill Valence percentages
valence_total = valence_counts.sum()
for i, (index, count) in enumerate(valence_counts.items(), 1):
    summary_df.loc['ESM_Valence', f'Class {i}'] = f"{count} ({(count/valence_total*100):.1f}%)"

# Calculate and fill Arousal percentages
arousal_total = arousal_counts.sum()
for i, (index, count) in enumerate(arousal_counts.items(), 1):
    summary_df.loc['ESM_Arousal', f'Class {i}'] = f"{count} ({(count/arousal_total*100):.1f}%)"

print("\nStatistics for participants with both ESM and physiological data:")
print(f"Total number of ESM records: {len(filtered_df)}\n")
print(summary_df)

Total participants in ESM data: 142
Total participants with physiological data: 88
Participants with both ESM and physiological data: 88

Statistics for participants with both ESM and physiological data:
Total number of ESM records: 2400

                Class 1      Class 2      Class 3      Class 4     Class 5
ESM_Valence   53 (2.2%)  385 (16.0%)  756 (31.5%)  997 (41.5%)  209 (8.7%)
ESM_Arousal  181 (7.5%)  748 (31.2%)  690 (28.7%)  659 (27.5%)  122 (5.1%)


In [10]:
import pandas as pd
import os

# Define emotion mappings
emotion_map = {
    'PANAS_1': 'upset',
    'PANAS_2': 'hostile',
    'PANAS_3': 'alert',
    'PANAS_4': 'ashamed',
    'PANAS_5': 'inspired',
    'PANAS_6': 'nervous',
    'PANAS_7': 'determined',
    'PANAS_8': 'attentive',
    'PANAS_9': 'afraid',
    'PANAS_10': 'active',
}

# Define positive and negative items
positive_items = ['PANAS_5', 'PANAS_7', 'PANAS_8', 'PANAS_10']  # inspired, determined, attentive, active
negative_items = ['PANAS_1', 'PANAS_2', 'PANAS_3', 'PANAS_4', 'PANAS_6', 'PANAS_9']  # upset, hostile, alert, ashamed, nervous, afraid

# Read ESM data
df = pd.read_excel(r"..\raw_dataset\DAPPER\Psychol_Rec\ESM.xlsx")

# Calculate positive and negative scores
df['positive_score'] = df[positive_items].sum(axis=1)
df['negative_score'] = df[negative_items].sum(axis=1)

# Classify PANAS category (1 for positive, 0 for negative)
df['PANAS_category'] = (df['positive_score'] >= df['negative_score']).astype(int)

# Get physiological participants
physio_path = r"..\raw_dataset\DAPPER\Physiol_Rec"
physio_participants = [int(folder) for folder in os.listdir(physio_path) 
                      if os.path.isdir(os.path.join(physio_path, folder))]

# Create filtered dataset for participants with physiological data
filtered_df = df[df['Participant ID'].isin(physio_participants)].copy()

# Function to calculate and format statistics
def get_panas_stats(data):
    total = len(data)
    counts = data['PANAS_category'].value_counts().sort_index()
    return pd.Series({
        'Class 0 (Negative)': f"{counts[0]} ({(counts[0]/total*100):.1f}%)",
        'Class 1 (Positive)': f"{counts[1]} ({(counts[1]/total*100):.1f}%)",
        'Total Records': total
    })

# Calculate statistics for both groups
all_stats = get_panas_stats(df)
physio_stats = get_panas_stats(filtered_df)

# Create summary DataFrame
summary_df = pd.DataFrame({
    'All Participants': all_stats,
    'Participants with Physiological Data': physio_stats
})

# Display results
print("PANAS Category Distribution:")
print(summary_df)

# Additional participant count information
print("\nParticipant Counts:")
print(f"Total unique participants: {df['Participant ID'].nunique()}")
print(f"Participants with physiological data: {len(physio_participants)}")

PANAS Category Distribution:
                   All Participants Participants with Physiological Data
Class 0 (Negative)     1508 (39.8%)                          959 (40.0%)
Class 1 (Positive)     2281 (60.2%)                         1441 (60.0%)
Total Records                  3789                                 2400

Participant Counts:
Total unique participants: 142
Participants with physiological data: 88
