### EATD-Corpus organizer

 SDS scores are classified as 
 normal (<50)
 mild depression (50 to 59)
 moderate to marked major depression (60 to 69), 
 severe to extreme major depression (>70). The raw score can be converted to an SDS Index score by multiplying the raw score times 1.25.

In [1]:
import os
import pandas as pd

In [2]:
# read in all foldera in the dataset/EATD-Coprus folder
BASE_PATH = "datasets/EATD-Corpus"
folders = os.listdir(BASE_PATH)


In [None]:
data = []
# Loop through the folders
for folder in folders:
    # Get the path to the folder
    folder_path = os.path.join(BASE_PATH, folder)
    # Check if the folder is a directory
    if os.path.isdir(folder_path):
        # Get the path to the label.txt file
        label_path = os.path.join(folder_path, "label.txt")
        # Check if the file exists
        if os.path.exists(label_path):
            # Read the value from the file
            with open(label_path, "r") as file:
                label = file.read().strip()
            # Get the path to the new_label.txt file
            new_label_path = os.path.join(folder_path, "new_label.txt")
            # Check if the file exists
            if os.path.exists(new_label_path):
                # Read the value from the file
                with open(new_label_path, "r") as file:
                    new_label = file.read().strip()
                # Append the data to the list
                data.append([folder, label, new_label])

# Create a DataFrame from the data
df = pd.DataFrame(data, columns=["folder", "label", "new_label"])
# rename label to SDS
df.rename(columns={"label": "SDS_raw"}, inplace=True)
df.rename(columns={"new_label": "SDS_index"}, inplace=True)
# convert  SDS_raw SDS_index to float
df["SDS_raw"] = df["SDS_raw"].astype(float)
df["SDS_index"] = df["SDS_index"].astype(float)
# categorize the depression based on the SDS_index. Add new column to the dataframe
df["depression"] = pd.cut(df["SDS_index"], bins=[0, 50, 59, 69, 100], labels=["normal", "mild", "moderate", "severe"])
# add new colums: type. It can be "training" or "validation" based on the folder name (if it contains t or v )
df["type"] = df["folder"].apply(lambda x: "training" if "t" in x else "validation")
"""
normal (<50)
mild depression (50 to 59)
moderate to marked major depression (60 to 69), 
severe to extreme major depression (>70).
"""
# save the dataframe to a csv file
df.to_csv("datasets/depression_categories_EATD-Corpus.csv", index=False)

In [12]:
df

Unnamed: 0,folder,SDS_raw,SDS_index,depression,type
0,t_1,37.0,46.25,normal,training
1,t_10,30.0,37.50,normal,training
2,t_101,42.0,52.50,mild,training
3,t_102,66.0,82.50,severe,training
4,t_103,41.0,51.25,mild,training
...,...,...,...,...,...
157,v_94,41.0,51.25,mild,validation
158,v_96,43.0,53.75,mild,validation
159,v_97,39.0,48.75,normal,validation
160,v_98,52.0,65.00,moderate,validation
