In [1]:
# @title 1) Setup paths & mount Drive
from google.colab import drive
import os, re, shutil, pandas as pd
from collections import Counter

drive.mount('/content/drive')

BASE_DIR = '/content/drive/MyDrive/Fall25/EE502/FinalProjects/Ch17'
DATA_DIR = f'{BASE_DIR}/data'
TMP_DIR  = '/content/ravdess_tmp'
ZIP_PATH = '/content/ravdess_speech.zip'

import os, shutil, pathlib
for p in [BASE_DIR, DATA_DIR, TMP_DIR]:
    os.makedirs(p, exist_ok=True)

print('BASE_DIR:', BASE_DIR)
print('DATA_DIR:', DATA_DIR)

Mounted at /content/drive
BASE_DIR: /content/drive/MyDrive/Fall25/EE502/FinalProjects/Ch17
DATA_DIR: /content/drive/MyDrive/Fall25/EE502/FinalProjects/Ch17/data


In [None]:
# @title 2) Download RAVDESS Speech (phrases) from Zenodo
if not (os.path.exists(ZIP_PATH) and os.path.getsize(ZIP_PATH) > 100_000_000):
    !wget -O /content/ravdess_speech.zip "https://zenodo.org/record/1188976/files/Audio_Speech_Actors_01-24.zip?download=1"
else:
    print("Zip already present, skipping download.")

--2025-11-01 21:52:30--  https://zenodo.org/record/1188976/files/Audio_Speech_Actors_01-24.zip?download=1
Resolving zenodo.org (zenodo.org)... 188.185.43.153, 188.185.48.75, 137.138.52.235, ...
Connecting to zenodo.org (zenodo.org)|188.185.43.153|:443... connected.
HTTP request sent, awaiting response... 301 MOVED PERMANENTLY
Location: /records/1188976/files/Audio_Speech_Actors_01-24.zip [following]
--2025-11-01 21:52:31--  https://zenodo.org/records/1188976/files/Audio_Speech_Actors_01-24.zip
Reusing existing connection to zenodo.org:443.
HTTP request sent, awaiting response... 200 OK
Length: 208468073 (199M) [application/octet-stream]
Saving to: ‘/content/ravdess_speech.zip’


2025-11-01 21:52:43 (17.8 MB/s) - ‘/content/ravdess_speech.zip’ saved [208468073/208468073]



In [3]:
# @title 3) Extract the archive
!unzip -q -o /content/ravdess_speech.zip -d /content/ravdess_tmp
print("Extracted to:", TMP_DIR)

Extracted to: /content/ravdess_tmp


In [None]:
# @title 4) Filter to the 5 emotions and copy into data/ 
# RAVDESS filename parts: MM-VC-EM-IT-ST-RP-AC.wav
# MM=03 (Audio-only), VC=01 (Speech), EM=01..08, IT=intensity(01 normal, 02 strong), ST=statement(01/02), RP=repeat(01/02), AC=actor(01..24)

emotion_codes_keep = {'01': 'neutral', '03': 'happy', '04': 'sad', '05': 'angry', '06': 'fearful'}
emotion_map        = {'01':'neutral','02':'calm','03':'happy','04':'sad','05':'angry','06':'fearful','07':'disgust','08':'surprised'}

# Clean per-emotion target dirs
for label in emotion_codes_keep.values():
    os.makedirs(os.path.join(DATA_DIR, label), exist_ok=True)

rows = []

for root, _, files in os.walk(TMP_DIR):
    for f in files:
        if not f.lower().endswith('.wav'):
            continue
        parts = f.split('-')
        if len(parts) != 7:
            continue  # skip unexpected names
        modality   = parts[0]          # expect '03'
        channel    = parts[1]          # expect '01' (speech)
        em_code    = parts[2]          # '01'..'08'
        intensity  = parts[3]          # '01' or '02'
        statement  = parts[4]          # '01' or '02'
        repeat     = parts[5]          # '01' or '02'
        actor_str  = parts[6].split('.')[0]  # '01'..'24'

        # Keep only Audio-only Speech
        if modality != '03' or channel != '01':
            continue

        # Keep only selected emotions
        if em_code not in emotion_codes_keep:
            continue

        src_path = os.path.join(root, f)
        em_name  = emotion_codes_keep[em_code]
        dst_path = os.path.join(DATA_DIR, em_name, f)

        # Avoid duplicate copies on re-run
        if not os.path.exists(dst_path):
            shutil.copy2(src_path, dst_path)

        actor = int(actor_str)
        gender = 'male' if actor % 2 != 0 else 'female'

        rows.append({
            'filename': f,
            'filepath': dst_path,
            'emotion_code': em_code,
            'emotion': emotion_map[em_code],
            'intensity': 'normal' if intensity=='01' else 'strong',
            'statement': statement,
            'repeat': repeat,
            'actor': actor,
            'gender': gender
        })

df = pd.DataFrame(rows).sort_values(['emotion','actor','filename']).reset_index(drop=True)
manifest_path = os.path.join(DATA_DIR, 'manifest_ser_5emotions.csv')
df.to_csv(manifest_path, index=False)
print("Saved manifest:", manifest_path)

# Quick per-emotion counts
print("\nCounts by emotion:")
print(df['emotion'].value_counts().sort_index())

df.head()

Saved manifest: /content/drive/MyDrive/Fall25/EE502/FinalProjects/Ch17/data/manifest_ser_5emotions.csv

Counts by emotion:
emotion
angry      192
fearful    192
happy      192
neutral     96
sad        192
Name: count, dtype: int64


Unnamed: 0,filename,filepath,emotion_code,emotion,intensity,statement,repeat,actor,gender
0,03-01-05-01-01-01-01.wav,/content/drive/MyDrive/Fall25/EE502/FinalProje...,5,angry,normal,1,1,1,male
1,03-01-05-01-01-02-01.wav,/content/drive/MyDrive/Fall25/EE502/FinalProje...,5,angry,normal,1,2,1,male
2,03-01-05-01-02-01-01.wav,/content/drive/MyDrive/Fall25/EE502/FinalProje...,5,angry,normal,2,1,1,male
3,03-01-05-01-02-02-01.wav,/content/drive/MyDrive/Fall25/EE502/FinalProje...,5,angry,normal,2,2,1,male
4,03-01-05-02-01-01-01.wav,/content/drive/MyDrive/Fall25/EE502/FinalProje...,5,angry,strong,1,1,1,male


In [5]:
# @title 5) Quick sanity checks
print("Statements present:", sorted(df['statement'].unique()))
print("Intensity present:", sorted(df['intensity'].unique()))
print("Actors (min..max):", df['actor'].min(), "to", df['actor'].max())
print("\nGender counts:", Counter(df['gender']))

Statements present: ['01', '02']
Intensity present: ['normal', 'strong']
Actors (min..max): 1 to 24

Gender counts: Counter({'male': 432, 'female': 432})


In [6]:
#@title 6) Speaker-independent 70/15/15 split
# Grouped by actor, stratified by emotion, gender balanced as a secondary check
import os, pandas as pd, numpy as np

MANIFEST = '/content/drive/MyDrive/Fall25/EE502/FinalProjects/Ch17/data/manifest_ser_5emotions.csv'
df = pd.read_csv(MANIFEST)

# Sanity checks
assert {'filepath','emotion','actor','gender'}.issubset(df.columns), "Manifest missing required columns."

# Encode target for stratification
y = df['emotion'].values
groups = df['actor'].values

# --- Try StratifiedGroupKFold (best) ---
USE_SGKF = False
try:
    from sklearn.model_selection import StratifiedGroupKFold
    USE_SGKF = True
except Exception:
    from sklearn.model_selection import GroupShuffleSplit

def summarize(tag, d):
    print(f"\n=== {tag} summary ===")
    print("By emotion:")
    print(d['emotion'].value_counts().sort_index())
    print("\nBy gender:")
    print(d['gender'].value_counts().sort_index())
    print("\nActors count:", d['actor'].nunique())

def no_overlap(a, b):
    return set(a['actor'].unique()).isdisjoint(set(b['actor'].unique()))

if USE_SGKF:
    # We’ll do a 70/30 split, then split the 30 into 15/15
    sgkf = StratifiedGroupKFold(n_splits=10, shuffle=True, random_state=42)

    # Choose the split closest to 70/30 by sample count
    best = None
    n = len(df)
    target_train = 0.70 * n
    for tr_idx, te_idx in sgkf.split(np.zeros(len(y)), y, groups):
        diff = abs(len(tr_idx) - target_train)
        if (best is None) or (diff < best[0]):
            best = (diff, tr_idx, te_idx)

    tr_idx, hold_idx = best[1], best[2]
    train = df.iloc[tr_idx].copy()
    hold = df.iloc[hold_idx].copy()

    # Now split hold (≈30%) into val/test ≈ 15/15 with another SGKF on the hold set
    y_hold = hold['emotion'].values
    g_hold = hold['actor'].values

    sgkf2 = StratifiedGroupKFold(n_splits=5, shuffle=True, random_state=123)
    best2 = None
    target_val = 0.50 * len(hold)  # half of hold → ~15% overall
    for v_idx, t_idx in sgkf2.split(np.zeros(len(y_hold)), y_hold, g_hold):
        diff = abs(len(v_idx) - target_val)
        if (best2 is None) or (diff < best2[0]):
            best2 = (diff, v_idx, t_idx)

    val = hold.iloc[best2[1]].copy()
    test = hold.iloc[best2[2]].copy()

else:
    # Fallback: GroupShuffleSplit (not stratified). We add light post-checks.
    from sklearn.model_selection import GroupShuffleSplit
    gss1 = GroupShuffleSplit(n_splits=1, test_size=0.30, random_state=42)
    tr_idx, hold_idx = next(gss1.split(df, groups=groups))
    train = df.iloc[tr_idx].copy()
    hold = df.iloc[hold_idx].copy()

    gss2 = GroupShuffleSplit(n_splits=1, test_size=0.50, random_state=123)
    v_idx, t_idx = next(gss2.split(hold, groups=hold['actor'].values))
    val = hold.iloc[v_idx].copy()
    test = hold.iloc[t_idx].copy()

# Verify no actor leakage
assert no_overlap(train, val) and no_overlap(train, test) and no_overlap(val, test), "Actor leakage across splits!"

# Tag splits and save
train['split'] = 'train'
val['split']   = 'val'
test['split']  = 'test'
df_split = pd.concat([train, val, test], ignore_index=True)

OUT_PATH = '/content/drive/MyDrive/Fall25/EE502/FinalProjects/Ch17/data/manifest_ser_5emotions_split.csv'
df_split.to_csv(OUT_PATH, index=False)
print("Saved split manifest to:", OUT_PATH)

# Show summaries
summarize('Train', train)
summarize('Val',   val)
summarize('Test',  test)

# Quick percentages by emotion per split
print("\nEmotion distribution (%) by split:")
print(
    df_split.pivot_table(index='emotion', columns='split', values='filename', aggfunc='count')
            .apply(lambda c: 100*c/c.sum(), axis=0)
            .round(1)
)

Saved split manifest to: /content/drive/MyDrive/Fall25/EE502/FinalProjects/Ch17/data/manifest_ser_5emotions_split.csv

=== Train summary ===
By emotion:
emotion
angry      168
fearful    168
happy      168
neutral     84
sad        168
Name: count, dtype: int64

By gender:
gender
female    396
male      360
Name: count, dtype: int64

Actors count: 21

=== Val summary ===
By emotion:
emotion
angry      16
fearful    16
happy      16
neutral     8
sad        16
Name: count, dtype: int64

By gender:
gender
female    36
male      36
Name: count, dtype: int64

Actors count: 2

=== Test summary ===
By emotion:
emotion
angry      8
fearful    8
happy      8
neutral    4
sad        8
Name: count, dtype: int64

By gender:
gender
male    36
Name: count, dtype: int64

Actors count: 1

Emotion distribution (%) by split:
split    test  train   val
emotion                   
angry    22.2   22.2  22.2
fearful  22.2   22.2  22.2
happy    22.2   22.2  22.2
neutral  11.1   11.1  11.1
sad      22.2   22