In [4]:
import pandas as pd

# Read the original depression scale file
df = pd.read_csv('depression_factors.tsv', sep='\t')

# Define the mapping for the binary label
mapping = {
    'moderate depression': 1,
    'severe depression': 1,
    'mild depression': 0,
    'normal/symptom absent': 0,
}

# Apply the mapping; unmapped values become NaN
df['depression_binary'] = df['scale'].map(mapping)

# Select relevant columns and drop any rows where depression_binary is NaN
binary_df = df[['participant_id', 'depression_binary']].dropna()

# Optional sanity check: ensure one row per participant
assert binary_df['participant_id'].is_unique, "Duplicate participant IDs found after mapping."

# Write to a new TSV file
binary_df.to_csv('depression_factors_binary.tsv', sep='\t', index=False)
print('Saved binary labels for', len(binary_df), 'participants.')


Saved binary labels for 226 participants.
