In [8]:
# This script moves all folders ending with '_P' into a 'data' directory after user confirmation.

import os
import shutil

user_input = input("Do you want to set up the folder structure? ").strip().lower()

if user_input == 'yes':
    base_dir = "."
    destination_dir = os.path.join(base_dir, "data")
    os.makedirs(destination_dir, exist_ok=True)


    for parent_folder in os.listdir(base_dir):
        if parent_folder.endswith('_P') and os.path.isdir(os.path.join(base_dir, parent_folder)):
            # Define the source and destination subfolders
            source_subfolder = os.path.join(base_dir, parent_folder)
            destination_subfolder = os.path.join(destination_dir, parent_folder)

            # Step 2: Move the subfolder to the new location
            if os.path.exists(source_subfolder):
                shutil.move(source_subfolder, destination_subfolder)
                print(f"📂 Moved {source_subfolder} → {destination_subfolder}")
            else:
                print(f"❌ Subfolder not found: {source_subfolder}")
                continue

            # Step 3: Delete the now-empty parent folder
            parent_path = os.path.join(base_dir, parent_folder)
            try:
                shutil.rmtree(parent_path)
                print(f"🗑️ Deleted folder: {parent_path}")
            except Exception as e:
                print(f"⚠️ Failed to delete {parent_path}: {e}")
else:
    print("❌ Operation cancelled. Folder structure setup was not performed.")


Do you want to set up the folder structure?  yes


In [9]:
# This script organizes features and transcript files into 'text', 'audio', 'video', and 'clinical' folders inside each *_P subject folder, after user confirmation.

import os
import shutil

# Ask user for confirmation
user_input = input("Do you want to restructure the subject folders? ").strip().lower()

if user_input == 'yes':
    # Set your base directory (adjust if needed)
    base_dir = "./data"

    # Keywords for classification
    category_keywords = {
        "text": ["Transcript.csv"],
        "audio": [
            "BoAW", "OpenSMILE", "mfcc", "egemaps", "densenet", "vgg16"
        ],
        "video": [
            "BoVW", "OpenFace", "Pose", "Gaze", "AUs", "CNN_ResNet", "CNN_VGG"
        ],
        "clinical": ["Transcript.csv"]
    }

    # Traverse subject folders
    for folder_name in os.listdir(base_dir):
        subject_path = os.path.join(base_dir, folder_name)
        print(subject_path)

        if not os.path.isdir(subject_path):
            print(f"❌ Skipping non-folder: {subject_path}")
            continue

        if not folder_name.endswith("_P"):
            print(f"❌ Skipping non-subject folder: {subject_path}")
            continue

        print(f"\n🔄 Processing: {folder_name}")
        subject_id = folder_name.split("_")[0]

        # Create subfolders for each category (text, audio, video, clinical)
        for cat in category_keywords:
            os.makedirs(os.path.join(subject_path, cat), exist_ok=True)

        # --- Move transcript ---
        transcript_path = os.path.join(subject_path, f"{subject_id}_Transcript.csv")
        if os.path.exists(transcript_path):
            dest_text = os.path.join(subject_path, "text", f"{subject_id}_Transcript.csv")
            dest_clinical = os.path.join(subject_path, "clinical", f"{subject_id}_Transcript.csv")
            shutil.move(transcript_path, dest_text)
            shutil.copy(dest_text, dest_clinical)
            print(f"📄 Moved and copied transcript to text/ and clinical/")
        else:
            print(f"⚠️ Transcript not found: {transcript_path}")

        # --- Process feature files from 'features' subfolder ---
        features_path = os.path.join(subject_path, "features")  # Correct folder path for features
        if os.path.exists(features_path):
            for file in os.listdir(features_path):
                src_file = os.path.join(features_path, file)
                moved = False
                # Process features based on category keywords
                for cat, keywords in category_keywords.items():
                    # Skip text and clinical as they are already processed
                    if cat in ["text", "clinical"]:
                        continue

                    if any(k.lower() in file.lower() for k in keywords):
                        dst = os.path.join(subject_path, cat, file)
                        shutil.move(src_file, dst)
                        print(f"📦 Moved {file} → {cat}/")
                        moved = True
                        break  # Once moved, exit loop and move on to the next file
                
                if not moved:
                    print(f"❓ Unclassified feature file: {file}")
        else:
            print(f"⚠️ Features folder not found: {features_path}")

        # Clean up empty 'features' folder
        if os.path.exists(features_path) and not os.listdir(features_path):
            os.rmdir(features_path)
            print(f"🗑️ Removed empty features/ folder")

        # Clean up empty directories
        for cat in category_keywords:
            cat_path = os.path.join(subject_path, cat)
            if os.path.exists(cat_path) and not os.listdir(cat_path):
                os.rmdir(cat_path)
                print(f"🗑️ Removed empty {cat}/ folder")

    print("\n✅ Done restructuring each *_P folder.")
else:
    print("❌ Operation cancelled. Folder restructuring was not performed.")


Do you want to restructure the subject folders?  yes


./data/300_P

🔄 Processing: 300_P
⚠️ Transcript not found: ./data/300_P/300_Transcript.csv
⚠️ Features folder not found: ./data/300_P/features
./data/.DS_Store
❌ Skipping non-folder: ./data/.DS_Store
./data/lables
❌ Skipping non-subject folder: ./data/lables
./data/302_P

🔄 Processing: 302_P
⚠️ Transcript not found: ./data/302_P/302_Transcript.csv
⚠️ Features folder not found: ./data/302_P/features
./data/308_P

🔄 Processing: 308_P
⚠️ Transcript not found: ./data/308_P/308_Transcript.csv
⚠️ Features folder not found: ./data/308_P/features
./data/aligned_multimodal_dataset.csv
❌ Skipping non-folder: ./data/aligned_multimodal_dataset.csv
./data/301_P

🔄 Processing: 301_P
⚠️ Transcript not found: ./data/301_P/301_Transcript.csv
⚠️ Features folder not found: ./data/301_P/features
./data/.ipynb_checkpoints
❌ Skipping non-subject folder: ./data/.ipynb_checkpoints

✅ Done restructuring each *_P folder.


In [10]:
# This script deletes all .wav files from the './data' directory after user confirmation.

import os

# Ask user for confirmation
user_input = input("Do you want to delete all .wav files in the /data folder? ").strip().lower()

if user_input == 'yes':
    # Path to the /data directory
    data_dir = "../data"

    # Walk through the /data directory and delete .wav files
    deleted_files = []

    for dirpath, dirnames, filenames in os.walk(data_dir):
        for file in filenames:
            if file.endswith(".wav"):
                file_path = os.path.join(dirpath, file)
                try:
                    os.remove(file_path)
                    deleted_files.append(file_path)
                except Exception as e:
                    print(f"Failed to delete {file_path}: {e}")

    print(f"✅ Deleted {len(deleted_files)} .wav files from /data folder.")
else:
    print("❌ Operation cancelled. No .wav files were deleted.")


Do you want to delete all .wav files in the /data folder?  yes


✅ Deleted 0 .wav files from /data folder.


In [23]:
#Use this to check the folder structure
import os

def print_tree(directory, level=0):
    # List all files and directories in the current directory
    with os.scandir(directory) as entries:
        for entry in entries:
            # Indentation for tree structure
            print(' ' * (level * 4) + '|--', entry.name)
            # Recursively call the function for directories
            if entry.is_dir():
                print_tree(entry.path, level + 1)

# Specify the root directory of your project
root_directory = '.'  # Change this path to your project directory

print(f"Directory structure of {root_directory}:")
print_tree(root_directory)


Directory structure of .:
|-- main.ipynb
|-- .DS_Store
|-- requirements.txt
|-- training
    |-- train_text.ipynb
    |-- train_multimodal.ipynb
    |-- train_video.ipynb
    |-- train_audio.ipynb
|-- utils
    |-- metrics.ipynb
    |-- __pycache__
        |-- feature_validator.cpython-310.pyc
    |-- logger.ipynb
    |-- helpers.ipynb
    |-- config.ipynb
    |-- feature_analycis.ipynb
    |-- .ipynb_checkpoints
        |-- feature_analycis-checkpoint.ipynb
        |-- config-checkpoint.ipynb
|-- models
    |-- audio_model.ipynb
    |-- video_model.ipynb
    |-- multimodal_fusion.ipynb
    |-- text_model.ipynb
|-- README.md
|-- saved_models
    |-- video
    |-- .DS_Store
    |-- multimodal
    |-- audio
    |-- text
|-- evaluation
    |-- evaluate_multimodal.ipynb
    |-- evaluate_video.ipynb
    |-- evaluate_text.ipynb
    |-- evaluate_audio.ipynb
|-- .ipynb_checkpoints
    |-- requirements-checkpoint.txt
    |-- main-checkpoint.ipynb
    |-- README-checkpoint.md
|-- preprocessing
 

In [16]:
# This script converts all Jupyter notebooks (except itself) to .py scripts after user confirmation.

import os
from pathlib import Path
import nbformat
from nbconvert import PythonExporter

# Ask user for confirmation
user_input = input("Do you want to convert all .ipynb files (excluding this script) to .py? ").strip().lower()

if user_input == 'yes':
    # Set root directory to start from the location of the current notebook
    root_dir = Path('.').resolve()

    # Folders to exclude
    exclude_folders = {'data','utils'}

    # Function to convert notebook to .py
    def convert_ipynb_to_py(ipynb_path):
        with open(ipynb_path, 'r', encoding='utf-8') as f:
            notebook = nbformat.read(f, as_version=4)

        exporter = PythonExporter()
        source_code, _ = exporter.from_notebook_node(notebook)

        py_path = ipynb_path.with_suffix('.py')
        with open(py_path, 'w', encoding='utf-8') as f:
            f.write(source_code)
        print(f"✅ Converted: {ipynb_path.relative_to(root_dir)} → {py_path.relative_to(root_dir)}")

    # Recursively walk through directories
    for dirpath, dirnames, filenames in os.walk(root_dir):
        # Skip excluded folders
        if any(excluded in Path(dirpath).parts for excluded in exclude_folders):
            continue
        
        for filename in filenames:
            if filename.endswith('.ipynb') and filename != 'main.ipynb':
                file_path = Path(dirpath) / filename
                convert_ipynb_to_py(file_path)
else:
    print("❌ Operation cancelled. No notebooks were converted.")


Do you want to convert all .ipynb files (excluding this script) to .py?  yes


✅ Converted: training/train_text.ipynb → training/train_text.py
✅ Converted: training/train_multimodal.ipynb → training/train_multimodal.py
✅ Converted: training/train_video.ipynb → training/train_video.py
✅ Converted: training/train_audio.ipynb → training/train_audio.py
✅ Converted: models/audio_model.ipynb → models/audio_model.py
✅ Converted: models/video_model.ipynb → models/video_model.py
✅ Converted: models/multimodal_fusion.ipynb → models/multimodal_fusion.py
✅ Converted: models/text_model.ipynb → models/text_model.py
✅ Converted: evaluation/evaluate_multimodal.ipynb → evaluation/evaluate_multimodal.py
✅ Converted: evaluation/evaluate_video.ipynb → evaluation/evaluate_video.py
✅ Converted: evaluation/evaluate_text.ipynb → evaluation/evaluate_text.py
✅ Converted: evaluation/evaluate_audio.ipynb → evaluation/evaluate_audio.py
✅ Converted: .ipynb_checkpoints/main-checkpoint.ipynb → .ipynb_checkpoints/main-checkpoint.py
✅ Converted: preprocessing/align_modalities.ipynb → preprocessin

In [17]:
# This script deletes .py files that were generated from .ipynb files after user confirmation.

import os
from pathlib import Path

# Ask user for confirmation
user_input = input("Do you want to delete all .py files corresponding to .ipynb notebooks? ").strip().lower()

if user_input == 'yes':
    # Set root directory to current notebook's location
    root_dir = Path('.').resolve()
    exclude_folders = {'data','utils'}

    # Function to delete .py file corresponding to .ipynb file
    def delete_py_file(ipynb_path):
        py_path = ipynb_path.with_suffix('.py')
        if py_path.exists():
            py_path.unlink()
            print(f"🗑️ Deleted: {py_path.relative_to(root_dir)}")

    # Recursively walk through directories
    for dirpath, dirnames, filenames in os.walk(root_dir):
        # Skip excluded folders
        if any(excluded in Path(dirpath).parts for excluded in exclude_folders):
            continue

        for filename in filenames:
            if filename.endswith('.ipynb') and filename != 'main.ipynb':
                file_path = Path(dirpath) / filename
                delete_py_file(file_path)
else:
    print("❌ Operation cancelled. No .py files were deleted.")


Do you want to delete all .py files corresponding to .ipynb notebooks?  yes


🗑️ Deleted: training/train_text.py
🗑️ Deleted: training/train_multimodal.py
🗑️ Deleted: training/train_video.py
🗑️ Deleted: training/train_audio.py
🗑️ Deleted: models/audio_model.py
🗑️ Deleted: models/video_model.py
🗑️ Deleted: models/multimodal_fusion.py
🗑️ Deleted: models/text_model.py
🗑️ Deleted: evaluation/evaluate_multimodal.py
🗑️ Deleted: evaluation/evaluate_video.py
🗑️ Deleted: evaluation/evaluate_text.py
🗑️ Deleted: evaluation/evaluate_audio.py
🗑️ Deleted: .ipynb_checkpoints/main-checkpoint.py
🗑️ Deleted: preprocessing/align_modalities.py
🗑️ Deleted: preprocessing/clinical_preprocess.py
🗑️ Deleted: preprocessing/text_preprocess.py
🗑️ Deleted: preprocessing/video_preprocess.py
🗑️ Deleted: preprocessing/audio_preprocess.py
🗑️ Deleted: preprocessing/.ipynb_checkpoints/clinical_preprocess-checkpoint.py
🗑️ Deleted: preprocessing/.ipynb_checkpoints/audio_preprocess-checkpoint.py
🗑️ Deleted: preprocessing/.ipynb_checkpoints/video_preprocess-checkpoint.py
🗑️ Deleted: preprocessing/.ipy

In [19]:
import os
import pandas as pd

# Base data directory
base_dir = "data"

# List to collect file shapes
csv_shapes = []

# Walk through the directory structure
for root, dirs, files in os.walk(base_dir):
    if os.path.basename(root) == "processed":
        for file in files:
            if file.endswith(".csv"):
                file_path = os.path.join(root, file)
                try:
                    df = pd.read_csv(file_path)
                    shape = df.shape
                    csv_shapes.append((file_path, shape))
                except Exception as e:
                    csv_shapes.append((file_path, f"Error reading file: {e}"))

# Display all file paths with their shapes
for file_path, shape in csv_shapes:
    print(f"{file_path}: {shape}")


data/300_P/video/processed/processed_300_CNN_VGG_feature.csv: (19458, 4096)
data/300_P/video/processed/processed_300_BoVW_openFace_2.1.0_Pose_Gaze_AUs.csv: (6485, 102)
data/300_P/video/processed/processed_300_CNN_ResNet_feature.csv: (19458, 2048)
data/300_P/video/processed/processed_300_OpenFace2.1.0_Pose_gaze_AUs.csv: (19458, 53)
data/300_P/audio/processed/processed_300_vgg16.csv: (648, 1)
data/300_P/audio/processed/processed_300_OpenSMILE2.3.0_egemaps.csv: (64846, 25)
data/300_P/audio/processed/processed_300_BoAW_openSMILE_2.3.0_MFCC.csv: (6484, 1)
data/300_P/audio/processed/processed_300_OpenSMILE2.3.0_mfcc.csv: (64848, 41)
data/300_P/audio/processed/processed_300_densenet201.csv: (648, 1)
data/300_P/audio/processed/processed_300_BoAW_openSMILE_2.3.0_eGeMAPS.csv: (6484, 1)
data/300_P/clinical/processed/processed_300_Transcript_biobert_features.csv: (70, 768)
data/300_P/text/processed/processed_300_Transcript_processed_scaled.csv: (68, 50)
data/lables/processed/depression_labels.csv:

In [20]:
19458 - 278
6485  -93
648   - 9
64846 - 926
70

19458