# TESS Dataset - Toronto Emotional Speech Set

This notebook loads and explores the TESS (Toronto Emotional Speech Set) dataset.

The dataset contains audio files organized by emotion categories:
- Angry
- Disgust
- Fear
- Happy
- Neutral
- Pleasant Surprise
- Sad

Two speakers: OAF (Older Adult Female) and YAF (Younger Adult Female)

In [1]:
# Import required libraries
import numpy as np
import pandas as pd
import os
import librosa
import librosa.display
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from IPython.display import Audio

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

In [2]:
# Define dataset path
DATASET_PATH = Path('../datasets/kaggle_speech_emotion/TESS Toronto emotional speech set data')

# Check if dataset exists
if not DATASET_PATH.exists():
    print(f"ERROR: Dataset not found at {DATASET_PATH}")
    print("Please run the download script first.")
else:
    print(f"Dataset found at: {DATASET_PATH}")
    print(f"\nSubdirectories:")
    for item in sorted(DATASET_PATH.iterdir()):
        if item.is_dir():
            num_files = len(list(item.glob('*.wav')))
            print(f"  {item.name}: {num_files} files")

ERROR: Dataset not found at ../datasets/kaggle_speech_emotion/TESS Toronto emotional speech set data
Please run the download script first.


In [None]:
# Load all audio file paths and create a dataframe
def load_dataset_info(dataset_path):
    """
    Load information about all audio files in the dataset.
    Returns a pandas DataFrame with file paths, emotions, and speakers.
    """
    data = []
    
    for emotion_dir in sorted(dataset_path.iterdir()):
        if not emotion_dir.is_dir():
            continue
            
        # Parse directory name to extract speaker and emotion
        dir_name = emotion_dir.name
        
        # Skip nested TESS directory if it exists
        if 'TESS Toronto' in dir_name:
            continue
            
        parts = dir_name.split('_')
        if len(parts) >= 2:
            speaker = parts[0]  # OAF or YAF
            emotion = '_'.join(parts[1:])  # emotion name
            
            # Find all .wav files
            for audio_file in emotion_dir.glob('*.wav'):
                data.append({
                    'file_path': str(audio_file),
                    'file_name': audio_file.name,
                    'speaker': speaker,
                    'emotion': emotion.lower(),
                    'emotion_dir': dir_name
                })
    
    return pd.DataFrame(data)

# Load dataset
df = load_dataset_info(DATASET_PATH)
print(f"Total audio files: {len(df)}")
print(f"\nDataset info:")
print(df.head(10))

In [None]:
# Dataset statistics
print("=== Dataset Statistics ===")
print(f"\nTotal samples: {len(df)}")
print(f"\nEmotions distribution:")
print(df['emotion'].value_counts().sort_index())
print(f"\nSpeakers distribution:")
print(df['speaker'].value_counts())
print(f"\nSamples per speaker and emotion:")
print(pd.crosstab(df['speaker'], df['emotion']))

In [None]:
# Visualize emotion distribution
fig, axes = plt.subplots(1, 2, figsize=(16, 5))

# Emotion distribution
emotion_counts = df['emotion'].value_counts().sort_index()
axes[0].bar(range(len(emotion_counts)), emotion_counts.values, color='steelblue')
axes[0].set_xticks(range(len(emotion_counts)))
axes[0].set_xticklabels(emotion_counts.index, rotation=45, ha='right')
axes[0].set_xlabel('Emotion')
axes[0].set_ylabel('Number of Samples')
axes[0].set_title('Emotion Distribution in TESS Dataset')
axes[0].grid(axis='y', alpha=0.3)

# Speaker and emotion distribution
speaker_emotion = pd.crosstab(df['emotion'], df['speaker'])
speaker_emotion.plot(kind='bar', ax=axes[1], color=['coral', 'skyblue'])
axes[1].set_xlabel('Emotion')
axes[1].set_ylabel('Number of Samples')
axes[1].set_title('Samples per Speaker and Emotion')
axes[1].legend(title='Speaker')
axes[1].set_xticklabels(axes[1].get_xticklabels(), rotation=45, ha='right')
axes[1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

In [None]:
# Load and analyze a sample audio file
def analyze_audio(file_path, sr=22050):
    """
    Load an audio file and extract basic information.
    """
    # Load audio
    y, sr = librosa.load(file_path, sr=sr)
    
    # Calculate duration
    duration = librosa.get_duration(y=y, sr=sr)
    
    return y, sr, duration

# Sample one file from each emotion
print("Sample audio files from each emotion:\n")
sample_files = df.groupby('emotion').first()

durations = []
for emotion, row in sample_files.iterrows():
    y, sr, duration = analyze_audio(row['file_path'])
    durations.append(duration)
    print(f"{emotion:20s}: {row['file_name']:40s} - {duration:.2f}s")

print(f"\nAverage duration: {np.mean(durations):.2f}s")

In [None]:
# Visualize sample waveforms and spectrograms for different emotions
emotions_to_plot = ['angry', 'happy', 'sad', 'neutral']

fig, axes = plt.subplots(len(emotions_to_plot), 2, figsize=(16, 12))

for idx, emotion in enumerate(emotions_to_plot):
    # Get a sample file for this emotion
    sample = df[df['emotion'] == emotion].iloc[0]
    y, sr = librosa.load(sample['file_path'], sr=22050)
    
    # Plot waveform
    librosa.display.waveshow(y, sr=sr, ax=axes[idx, 0])
    axes[idx, 0].set_title(f'{emotion.capitalize()} - Waveform')
    axes[idx, 0].set_xlabel('Time (s)')
    axes[idx, 0].set_ylabel('Amplitude')
    
    # Plot spectrogram
    D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max)
    img = librosa.display.specshow(D, sr=sr, x_axis='time', y_axis='hz', ax=axes[idx, 1])
    axes[idx, 1].set_title(f'{emotion.capitalize()} - Spectrogram')
    fig.colorbar(img, ax=axes[idx, 1], format='%+2.0f dB')

plt.tight_layout()
plt.show()

print("\nVisualized waveforms and spectrograms for sample emotions.")

In [None]:
# Play sample audio files (one from each emotion)
print("Play sample audio files:\n")

for emotion in df['emotion'].unique()[:4]:  # Play first 4 emotions
    sample = df[df['emotion'] == emotion].iloc[0]
    print(f"\n{emotion.capitalize()}: {sample['file_name']}")
    display(Audio(sample['file_path']))

In [None]:
# Extract audio features for analysis
def extract_features(file_path, sr=22050):
    """
    Extract audio features using librosa.
    Returns a dictionary of features.
    """
    # Load audio
    y, sr = librosa.load(file_path, sr=sr)
    
    # Extract features
    features = {}
    
    # MFCCs (Mel-frequency cepstral coefficients)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    features['mfcc_mean'] = np.mean(mfccs, axis=1)
    features['mfcc_std'] = np.std(mfccs, axis=1)
    
    # Spectral features
    features['spectral_centroid'] = np.mean(librosa.feature.spectral_centroid(y=y, sr=sr))
    features['spectral_rolloff'] = np.mean(librosa.feature.spectral_rolloff(y=y, sr=sr))
    features['zero_crossing_rate'] = np.mean(librosa.feature.zero_crossing_rate(y))
    
    # Chroma features
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    features['chroma_mean'] = np.mean(chroma)
    
    # RMS energy
    features['rms'] = np.mean(librosa.feature.rms(y=y))
    
    return features

# Extract features for a few samples
print("Extracting features from sample files...")
sample_features = []

for emotion in df['emotion'].unique()[:3]:
    sample = df[df['emotion'] == emotion].iloc[0]
    features = extract_features(sample['file_path'])
    features['emotion'] = emotion
    sample_features.append(features)

# Display features
features_df = pd.DataFrame(sample_features)
print("\nSample features:")
print(features_df[['emotion', 'spectral_centroid', 'spectral_rolloff', 'zero_crossing_rate', 'rms']])

## Summary

This notebook demonstrated:
1. ✅ Loading the TESS dataset structure
2. ✅ Analyzing dataset distribution by emotion and speaker
3. ✅ Visualizing waveforms and spectrograms
4. ✅ Playing sample audio files
5. ✅ Extracting audio features (MFCCs, spectral features, etc.)

**Next steps:**
- Extract features from all audio files
- Build a machine learning model for emotion classification
- Train and evaluate the model
- Perform cross-validation and hyperparameter tuning