## RAVDESS Feature Extraction

In [12]:
import os
import numpy as np
import pandas as pd
import librosa
from scipy.stats import skew, kurtosis

# Define target duration (in seconds) and sampling rate
target_duration = 3.0  # in seconds
target_sampling_rate = 22050  # in Hz

# Unified emotion mapping
emotion_mapping = {
    "neutral": "neutral",
    "calm": "neutral",
    "happy": "happy",
    "sad": "sad",
    "angry": "angry",
    "fearful": "fearful",
    "disgust": "disgust",
    "surprised": "surprised"
}

# Function to preprocess audio file to ensure consistent duration and sampling rate
def preprocess_audio(audio_path):
    audio, sr = librosa.load(audio_path, sr=target_sampling_rate)
    target_length = int(target_duration * target_sampling_rate)

    if len(audio) < target_length:
        # Pad with zeros to reach the target length
        audio = np.pad(audio, (0, target_length - len(audio)))
    else:
        # Truncate to the target length
        audio = audio[:target_length]

    return audio, target_sampling_rate

# Function to extract features from a single audio file
def extract_features(audio_path):
    features = {}

    # Preprocess the audio file
    audio, sr = preprocess_audio(audio_path)

    # --- Features from your notebook ---
    FRAME_LENGTH = 1024
    HOP_LENGTH = 512

    # 1. RMSE
    rms = librosa.feature.rms(y=audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH).mean()
    features['RMSE'] = rms

    # 2. Zero Crossing Rate
    zcr = librosa.feature.zero_crossing_rate(y=audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH).mean()
    features['Zero_Crossing_Rate'] = zcr

    # 3. Mel Spectrogram
    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH, n_mels=90)
    mel_spec_db = librosa.power_to_db(mel_spec).mean()
    features['Mel_Spectrogram_Mean'] = mel_spec_db

    # 4. MFCCs and derivatives
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13, n_fft=1024, hop_length=HOP_LENGTH)
    mfccs_mean = mfccs.mean(axis=1)
    delta_mfccs = librosa.feature.delta(mfccs)
    delta2_mfccs = librosa.feature.delta(mfccs, order=2)
    features.update({f'MFCC_{i+1}': val for i, val in enumerate(mfccs_mean)})
    features.update({f'Delta_MFCC_{i+1}': val for i, val in enumerate(delta_mfccs.mean(axis=1))})
    features.update({f'Delta2_MFCC_{i+1}': val for i, val in enumerate(delta2_mfccs.mean(axis=1))})

    # 5. Spectral Centroid
    spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH).mean()
    features['Spectral_Centroid'] = spectral_centroid

    # 6. Spectral Bandwidth
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH).mean()
    features['Spectral_Bandwidth'] = spectral_bandwidth

    # --- Additional Features ---
    # 7. Pitch (Fundamental Frequency)
    pitches, magnitudes = librosa.piptrack(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH)
    pitch_mean = np.mean(pitches[pitches > 0]) if np.any(pitches > 0) else 0
    features['Pitch_Mean'] = pitch_mean

    # 8. Harmonic-to-Noise Ratio (HNR)
    harmonic, percussive = librosa.effects.hpss(audio)
    hnr = 10 * np.log10(np.mean(harmonic**2) / np.mean(percussive**2)) if np.mean(percussive**2) > 0 else 0
    features['HNR'] = hnr

    # 9. Spectral Contrast
    spectral_contrast = librosa.feature.spectral_contrast(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH).mean()
    features['Spectral_Contrast'] = spectral_contrast

    # 10. Chroma Features
    chroma = librosa.feature.chroma_stft(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH).mean(axis=1)
    features.update({f'Chroma_{i+1}': val for i, val in enumerate(chroma)})

    # 11. Tempo (Rhythm)
    onset_env = librosa.onset.onset_strength(y=audio, sr=sr, hop_length=HOP_LENGTH)
    tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr).item()
    features['Tempo'] = tempo

    # 12. Jitter and Shimmer (Approximation)
    jitter = np.std(np.diff(pitches[pitches > 0])) / pitch_mean if pitch_mean > 0 else 0
    shimmer = np.std(np.abs(np.diff(audio))) / np.mean(np.abs(audio)) if np.mean(np.abs(audio)) > 0 else 0
    features['Jitter'] = jitter
    features['Shimmer'] = shimmer

    # 13. Tonality and Timbre
    spectral_flatness = librosa.feature.spectral_flatness(y=audio).mean()
    features['Spectral_Flatness'] = spectral_flatness

    spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH).mean()
    features['Spectral_Rolloff'] = spectral_rolloff

    # 14. Duration and Silence
    duration = librosa.get_duration(y=audio, sr=sr)
    silence = np.mean(audio == 0)
    features['Duration'] = duration
    features['Silence'] = silence

    return features

# Function to process all audio files in the RAVDESS dataset
def process_ravdess(dataset_path):
    data = []

    raw_emotion_mapping = {
        "01": "neutral",
        "02": "calm",
        "03": "happy",
        "04": "sad",
        "05": "angry",
        "06": "fearful",
        "07": "disgust",
        "08": "surprised"
    }

    for root, _, files in os.walk(dataset_path):
        for file in files:
            if file.endswith('.wav'):
                file_path = os.path.join(root, file)

                # Extract emotion from filename
                file_parts = file.split('-')
                emotion_code = file_parts[2]
                raw_emotion = raw_emotion_mapping.get(emotion_code, "unknown")
                emotion = emotion_mapping.get(raw_emotion, "unknown")

                features = extract_features(file_path)
                features['Emotion'] = emotion
                data.append(features)

    return pd.DataFrame(data)

# Path to RAVDESS dataset
dataset_path = "Dataset/RAVDESS"
ravdess_dataset = process_ravdess(dataset_path)

# Save the dataset to a CSV file
ravdess_dataset.to_csv("Dataset/RAVDESS_features_dataset.csv", index=False)

print("RAVDESS feature extraction completed and dataset saved.")



	This function was moved to 'librosa.feature.rhythm.tempo' in librosa version 0.10.0.
	This alias will be removed in librosa version 1.0.
  tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr).item()


RAVDESS feature extraction completed and dataset saved.


In [9]:
import pandas as pd

# Load the dataset
ravdess_dataset_path = 'Dataset/RAVDESS_features_dataset.csv'  # Replace with your file path
ravdess_dataset = pd.read_csv(ravdess_dataset_path)

# Display the first few rows of the dataset
print(ravdess_dataset.head())

# Check the dataset's structure
print(ravdess_dataset.info())


       RMSE  Zero_Crossing_Rate  Mel_Spectrogram_Mean     MFCC_1     MFCC_2  \
0  0.014747            0.181806            -49.928920 -565.68760  43.782420   
1  0.010142            0.164250            -53.197456 -605.44180  45.174736   
2  0.047152            0.237027            -38.741500 -443.21610  24.900938   
3  0.030824            0.148445            -41.572445 -473.73538  44.729717   
4  0.005151            0.137492            -58.592377 -663.29486  60.307990   

      MFCC_3    MFCC_4     MFCC_5     MFCC_6     MFCC_7  ...  Chroma_11  \
0  -3.419823  4.879561 -11.828972  -7.854157 -10.851251  ...   0.356888   
1  -5.537606  6.142505  -2.014863 -10.605732 -15.667695  ...   0.345317   
2 -30.117960  0.428244 -11.970781 -20.010164 -16.112022  ...   0.492545   
3 -10.718471  6.005490 -12.931274 -14.678432  -5.036605  ...   0.286623   
4   1.032603  9.540112  -6.581423  -2.948161 -10.793342  ...   0.264617   

   Chroma_12       Tempo    Jitter   Shimmer  Spectral_Flatness  \
0   0.3

In [10]:
# Check for missing values
print(ravdess_dataset.isnull().sum())
ravdess_dataset.shape

RMSE                    0
Zero_Crossing_Rate      0
Mel_Spectrogram_Mean    0
MFCC_1                  0
MFCC_2                  0
                       ..
Spectral_Flatness       0
Spectral_Rolloff        0
Duration                0
Silence                 0
Emotion                 0
Length: 67, dtype: int64


(2880, 67)

## TESS Feature Extraction

In [16]:
import os
import numpy as np
import pandas as pd
import librosa
from scipy.stats import skew, kurtosis

# Define target duration (in seconds) and sampling rate
target_duration = 3.0  # in seconds
target_sampling_rate = 22050  # in Hz

# Unified emotion mapping
emotion_mapping = {
    "neutral": "neutral",
    "calm": "neutral",
    "happy": "happy",
    "sad": "sad",
    "angry": "angry",
    "fearful": "fearful",
    "disgust": "disgust",
    "surprised": "surprised"
}

# Function to preprocess audio file to ensure consistent duration and sampling rate
def preprocess_audio(audio_path):
    audio, sr = librosa.load(audio_path, sr=target_sampling_rate)
    target_length = int(target_duration * target_sampling_rate)

    if len(audio) < target_length:
        # Pad with zeros to reach the target length
        audio = np.pad(audio, (0, target_length - len(audio)))
    else:
        # Truncate to the target length
        audio = audio[:target_length]

    return audio, target_sampling_rate

# Function to extract features from a single audio file
def extract_features(audio_path):
    features = {}

    # Preprocess the audio file
    audio, sr = preprocess_audio(audio_path)

    # --- Features from your notebook ---
    FRAME_LENGTH = 1024
    HOP_LENGTH = 512

    # 1. RMSE
    rms = librosa.feature.rms(y=audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH).mean()
    features['RMSE'] = rms

    # 2. Zero Crossing Rate
    zcr = librosa.feature.zero_crossing_rate(y=audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH).mean()
    features['Zero_Crossing_Rate'] = zcr

    # 3. Mel Spectrogram
    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH, n_mels=90)
    mel_spec_db = librosa.power_to_db(mel_spec).mean()
    features['Mel_Spectrogram_Mean'] = mel_spec_db

    # 4. MFCCs and derivatives
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13, n_fft=1024, hop_length=HOP_LENGTH)
    mfccs_mean = mfccs.mean(axis=1)
    delta_mfccs = librosa.feature.delta(mfccs)
    delta2_mfccs = librosa.feature.delta(mfccs, order=2)
    features.update({f'MFCC_{i+1}': val for i, val in enumerate(mfccs_mean)})
    features.update({f'Delta_MFCC_{i+1}': val for i, val in enumerate(delta_mfccs.mean(axis=1))})
    features.update({f'Delta2_MFCC_{i+1}': val for i, val in enumerate(delta2_mfccs.mean(axis=1))})

    # 5. Spectral Centroid
    spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH).mean()
    features['Spectral_Centroid'] = spectral_centroid

    # 6. Spectral Bandwidth
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH).mean()
    features['Spectral_Bandwidth'] = spectral_bandwidth

    # --- Additional Features ---
    # 7. Pitch (Fundamental Frequency)
    pitches, magnitudes = librosa.piptrack(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH)
    pitch_mean = np.mean(pitches[pitches > 0]) if np.any(pitches > 0) else 0
    features['Pitch_Mean'] = pitch_mean

    # 8. Harmonic-to-Noise Ratio (HNR)
    harmonic, percussive = librosa.effects.hpss(audio)
    hnr = 10 * np.log10(np.mean(harmonic**2) / np.mean(percussive**2)) if np.mean(percussive**2) > 0 else 0
    features['HNR'] = hnr

    # 9. Spectral Contrast
    spectral_contrast = librosa.feature.spectral_contrast(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH).mean()
    features['Spectral_Contrast'] = spectral_contrast

    # 10. Chroma Features
    chroma = librosa.feature.chroma_stft(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH).mean(axis=1)
    features.update({f'Chroma_{i+1}': val for i, val in enumerate(chroma)})

    # 11. Tempo (Rhythm)
    onset_env = librosa.onset.onset_strength(y=audio, sr=sr, hop_length=HOP_LENGTH)
    tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr).item()
    features['Tempo'] = tempo

    # 12. Jitter and Shimmer (Approximation)
    jitter = np.std(np.diff(pitches[pitches > 0])) / pitch_mean if pitch_mean > 0 else 0
    shimmer = np.std(np.abs(np.diff(audio))) / np.mean(np.abs(audio)) if np.mean(np.abs(audio)) > 0 else 0
    features['Jitter'] = jitter
    features['Shimmer'] = shimmer

    # 13. Tonality and Timbre
    spectral_flatness = librosa.feature.spectral_flatness(y=audio).mean()
    features['Spectral_Flatness'] = spectral_flatness

    spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH).mean()
    features['Spectral_Rolloff'] = spectral_rolloff

    # 14. Duration and Silence
    duration = librosa.get_duration(y=audio, sr=sr)
    silence = np.mean(audio == 0)
    features['Duration'] = duration
    features['Silence'] = silence

    return features

# Function to process all audio files in the TESS dataset
def process_tess(dataset_path):
    data = []

    for root, _, files in os.walk(dataset_path):
        for file in files:
            if file.endswith('.wav'):
                file_path = os.path.join(root, file)

                # Extract emotion from filename (e.g., "OAF_back_angry.wav")
                emotion_raw = file.split('_')[-1].split('.')[0]  # Get the last part before .wav
                emotion = emotion_mapping.get(emotion_raw.lower(), "unknown")

                features = extract_features(file_path)
                features['Emotion'] = emotion
                data.append(features)

    return pd.DataFrame(data)

# Path to TESS dataset
dataset_path = "Dataset/TESS"
tess_dataset = process_tess(dataset_path)

# Save the dataset to a CSV file
tess_dataset.to_csv("Dataset/TESS_features_dataset.csv", index=False)

print("TESS feature extraction completed and dataset saved.")

	This function was moved to 'librosa.feature.rhythm.tempo' in librosa version 0.10.0.
	This alias will be removed in librosa version 1.0.
  tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr).item()


TESS feature extraction completed and dataset saved.


In [7]:
import pandas as pd

# Load the dataset
tess_dataset_path = 'Dataset/TESS_features_dataset.csv'  # Replace with your file path
tess_dataset = pd.read_csv(tess_dataset_path)

# Display the first few rows of the dataset
print(tess_dataset.head())

# Check the dataset's structure
print(tess_dataset.info())

       RMSE  Zero_Crossing_Rate  Mel_Spectrogram_Mean     MFCC_1     MFCC_2  \
0  0.012297            0.156393            -50.361750 -573.77203  43.899857   
1  0.017376            0.129087            -47.293182 -538.34050  60.010815   
2  0.018511            0.197994            -45.697884 -523.14594  55.392982   
3  0.012757            0.167969            -49.176210 -560.29030  41.832100   
4  0.021586            0.128403            -46.906307 -530.83180  66.517960   

      MFCC_3     MFCC_4    MFCC_5    MFCC_6    MFCC_7  ...  Chroma_11  \
0   4.228216  29.829117 -3.161358 -4.192998 -5.612931  ...   0.308867   
1  -3.632375  13.105334 -3.761131 -7.761571 -9.315113  ...   0.315968   
2  11.233569  16.927362 -2.280778 -7.342680 -8.059875  ...   0.299951   
3   1.466254  27.888405 -6.716061  0.907265 -9.043744  ...   0.340164   
4  13.794174   3.213084 -3.236690 -0.956316 -6.556373  ...   0.278320   

   Chroma_12       Tempo    Jitter   Shimmer  Spectral_Flatness  \
0   0.327924   95.7

In [8]:
# Check for missing values
print(tess_dataset.isnull().sum())
tess_dataset.shape

RMSE                    0
Zero_Crossing_Rate      0
Mel_Spectrogram_Mean    0
MFCC_1                  0
MFCC_2                  0
                       ..
Spectral_Flatness       0
Spectral_Rolloff        0
Duration                0
Silence                 0
Emotion                 0
Length: 67, dtype: int64


(5600, 67)

## CREMA-D Dataset

In [19]:
import os
import numpy as np
import pandas as pd
import librosa
from scipy.stats import skew, kurtosis

# Define target duration (in seconds) and sampling rate
target_duration = 3.0  # in seconds
target_sampling_rate = 22050  # in Hz

# Unified emotion mapping
emotion_mapping = {
    "neutral": "neutral",
    "calm": "neutral",
    "happy": "happy",
    "sad": "sad",
    "angry": "angry",
    "fearful": "fearful",
    "disgust": "disgust",
    "surprised": "surprised"
}

# Function to preprocess audio file to ensure consistent duration and sampling rate
def preprocess_audio(audio_path):
    audio, sr = librosa.load(audio_path, sr=target_sampling_rate)
    target_length = int(target_duration * target_sampling_rate)

    if len(audio) < target_length:
        # Pad with zeros to reach the target length
        audio = np.pad(audio, (0, target_length - len(audio)))
    else:
        # Truncate to the target length
        audio = audio[:target_length]

    return audio, target_sampling_rate

# Function to extract features from a single audio file
def extract_features(audio_path):
    features = {}

    # Preprocess the audio file
    audio, sr = preprocess_audio(audio_path)

    # --- Features from your notebook ---
    FRAME_LENGTH = 1024
    HOP_LENGTH = 512

    # 1. RMSE
    rms = librosa.feature.rms(y=audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH).mean()
    features['RMSE'] = rms

    # 2. Zero Crossing Rate
    zcr = librosa.feature.zero_crossing_rate(y=audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH).mean()
    features['Zero_Crossing_Rate'] = zcr

    # 3. Mel Spectrogram
    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH, n_mels=90)
    mel_spec_db = librosa.power_to_db(mel_spec).mean()
    features['Mel_Spectrogram_Mean'] = mel_spec_db

    # 4. MFCCs and derivatives
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13, n_fft=1024, hop_length=HOP_LENGTH)
    mfccs_mean = mfccs.mean(axis=1)
    delta_mfccs = librosa.feature.delta(mfccs)
    delta2_mfccs = librosa.feature.delta(mfccs, order=2)
    features.update({f'MFCC_{i+1}': val for i, val in enumerate(mfccs_mean)})
    features.update({f'Delta_MFCC_{i+1}': val for i, val in enumerate(delta_mfccs.mean(axis=1))})
    features.update({f'Delta2_MFCC_{i+1}': val for i, val in enumerate(delta2_mfccs.mean(axis=1))})

    # 5. Spectral Centroid
    spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH).mean()
    features['Spectral_Centroid'] = spectral_centroid

    # 6. Spectral Bandwidth
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH).mean()
    features['Spectral_Bandwidth'] = spectral_bandwidth

    # --- Additional Features ---
    # 7. Pitch (Fundamental Frequency)
    pitches, magnitudes = librosa.piptrack(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH)
    pitch_mean = np.mean(pitches[pitches > 0]) if np.any(pitches > 0) else 0
    features['Pitch_Mean'] = pitch_mean

    # 8. Harmonic-to-Noise Ratio (HNR)
    harmonic, percussive = librosa.effects.hpss(audio)
    hnr = 10 * np.log10(np.mean(harmonic**2) / np.mean(percussive**2)) if np.mean(percussive**2) > 0 else 0
    features['HNR'] = hnr

    # 9. Spectral Contrast
    spectral_contrast = librosa.feature.spectral_contrast(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH).mean()
    features['Spectral_Contrast'] = spectral_contrast

    # 10. Chroma Features
    chroma = librosa.feature.chroma_stft(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH).mean(axis=1)
    features.update({f'Chroma_{i+1}': val for i, val in enumerate(chroma)})

    # 11. Tempo (Rhythm)
    onset_env = librosa.onset.onset_strength(y=audio, sr=sr, hop_length=HOP_LENGTH)
    tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr).item()
    features['Tempo'] = tempo

    # 12. Jitter and Shimmer (Approximation)
    jitter = np.std(np.diff(pitches[pitches > 0])) / pitch_mean if pitch_mean > 0 else 0
    shimmer = np.std(np.abs(np.diff(audio))) / np.mean(np.abs(audio)) if np.mean(np.abs(audio)) > 0 else 0
    features['Jitter'] = jitter
    features['Shimmer'] = shimmer

    # 13. Tonality and Timbre
    spectral_flatness = librosa.feature.spectral_flatness(y=audio).mean()
    features['Spectral_Flatness'] = spectral_flatness

    spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH).mean()
    features['Spectral_Rolloff'] = spectral_rolloff

    # 14. Duration and Silence
    duration = librosa.get_duration(y=audio, sr=sr)
    silence = np.mean(audio == 0)
    features['Duration'] = duration
    features['Silence'] = silence

    return features

# Function to process all audio files in the CREMA-D dataset
def process_crema_d(dataset_path):
    data = []

    for root, _, files in os.walk(dataset_path):
        for file in files:
            if file.endswith('.wav'):
                file_path = os.path.join(root, file)

                # Extract emotion from filename (e.g., "1001_DFA_ANG_XX.wav")
                file_parts = file.split('_')
                raw_emotion_code = file_parts[2]
                raw_emotion = {
                    "ANG": "angry",
                    "DIS": "disgust",
                    "FEA": "fearful",
                    "HAP": "happy",
                    "NEU": "neutral",
                    "SAD": "sad"
                }.get(raw_emotion_code, "unknown")

                emotion = emotion_mapping.get(raw_emotion, "unknown")

                features = extract_features(file_path)
                features['Emotion'] = emotion
                data.append(features)

    return pd.DataFrame(data)

# Path to CREMA-D dataset
dataset_path = "Dataset/CREMA-D"
crema_d_dataset = process_crema_d(dataset_path)

# Save the dataset to a CSV file
crema_d_dataset.to_csv("Dataset/CREMA-D_features_dataset.csv", index=False)

print("CREMA-D feature extraction completed and dataset saved.")



	This function was moved to 'librosa.feature.rhythm.tempo' in librosa version 0.10.0.
	This alias will be removed in librosa version 1.0.
  tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr).item()
  return pitch_tuning(


CREMA-D feature extraction completed and dataset saved.


In [5]:
import pandas as pd

# Load the dataset
crema_d_dataset_path = 'Dataset/CREMA-D_features_dataset.csv'  # Replace with your file path
crema_d_dataset = pd.read_csv(crema_d_dataset_path)

# Display the first few rows of the dataset
print(crema_d_dataset.head())

# Check the dataset's structure
print(crema_d_dataset.info())

       RMSE  Zero_Crossing_Rate  Mel_Spectrogram_Mean     MFCC_1      MFCC_2  \
0  0.073580            0.078305            -33.864310 -383.62805   85.888760   
1  0.037229            0.091309            -37.033104 -423.04224  125.774460   
2  0.007056            0.048941            -50.110780 -565.71550  111.819885   
3  0.009229            0.051352            -48.222622 -545.74884  111.447610   
4  0.014069            0.039776            -44.119440 -501.25912  130.523680   

      MFCC_3     MFCC_4     MFCC_5     MFCC_6     MFCC_7  ...  Chroma_11  \
0   1.510415  38.184490  -2.107945  -2.918928 -12.555401  ...   0.373348   
1  12.974215  51.769170 -17.094017  10.942081 -18.850365  ...   0.475053   
2   2.412334  45.476185 -12.045537  21.651108 -13.544354  ...   0.488740   
3  14.475146  46.292442 -11.371361  13.052764 -13.202646  ...   0.424008   
4  15.468277  57.672870  -2.552489  15.085404  -6.516215  ...   0.547534   

   Chroma_12       Tempo    Jitter   Shimmer  Spectral_Flatnes

In [6]:
crema_d_dataset.shape

(7442, 67)

## SAVEE Dataset

In [1]:
import os
import numpy as np
import pandas as pd
import librosa
from scipy.stats import skew, kurtosis

# Define target duration (in seconds) and sampling rate
target_duration = 3.0  # in seconds
target_sampling_rate = 22050  # in Hz

# Unified emotion mapping
emotion_mapping = {
    "n": "neutral",
    "h": "happy",
    "sa": "sad",
    "a": "angry",
    "f": "fearful",
    "d": "disgust",
    "su": "surprised"
}

# Function to preprocess audio file to ensure consistent duration and sampling rate
def preprocess_audio(audio_path):
    audio, sr = librosa.load(audio_path, sr=target_sampling_rate)
    target_length = int(target_duration * target_sampling_rate)

    if len(audio) < target_length:
        # Pad with zeros to reach the target length
        audio = np.pad(audio, (0, target_length - len(audio)))
    else:
        # Truncate to the target length
        audio = audio[:target_length]

    return audio, target_sampling_rate

# Function to extract features from a single audio file
def extract_features(audio_path):
    features = {}

    # Preprocess the audio file
    audio, sr = preprocess_audio(audio_path)

    # --- Features from your notebook ---
    FRAME_LENGTH = 1024
    HOP_LENGTH = 512

    # 1. RMSE
    rms = librosa.feature.rms(y=audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH).mean()
    features['RMSE'] = rms

    # 2. Zero Crossing Rate
    zcr = librosa.feature.zero_crossing_rate(y=audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH).mean()
    features['Zero_Crossing_Rate'] = zcr

    # 3. Mel Spectrogram
    mel_spec = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH, n_mels=90)
    mel_spec_db = librosa.power_to_db(mel_spec).mean()
    features['Mel_Spectrogram_Mean'] = mel_spec_db

    # 4. MFCCs and derivatives
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13, n_fft=1024, hop_length=HOP_LENGTH)
    mfccs_mean = mfccs.mean(axis=1)
    delta_mfccs = librosa.feature.delta(mfccs)
    delta2_mfccs = librosa.feature.delta(mfccs, order=2)
    features.update({f'MFCC_{i+1}': val for i, val in enumerate(mfccs_mean)})
    features.update({f'Delta_MFCC_{i+1}': val for i, val in enumerate(delta_mfccs.mean(axis=1))})
    features.update({f'Delta2_MFCC_{i+1}': val for i, val in enumerate(delta2_mfccs.mean(axis=1))})

    # 5. Spectral Centroid
    spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH).mean()
    features['Spectral_Centroid'] = spectral_centroid

    # 6. Spectral Bandwidth
    spectral_bandwidth = librosa.feature.spectral_bandwidth(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH).mean()
    features['Spectral_Bandwidth'] = spectral_bandwidth

    # --- Additional Features ---
    # 7. Pitch (Fundamental Frequency)
    pitches, magnitudes = librosa.piptrack(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH)
    pitch_mean = np.mean(pitches[pitches > 0]) if np.any(pitches > 0) else 0
    features['Pitch_Mean'] = pitch_mean

    # 8. Harmonic-to-Noise Ratio (HNR)
    harmonic, percussive = librosa.effects.hpss(audio)
    hnr = 10 * np.log10(np.mean(harmonic**2) / np.mean(percussive**2)) if np.mean(percussive**2) > 0 else 0
    features['HNR'] = hnr

    # 9. Spectral Contrast
    spectral_contrast = librosa.feature.spectral_contrast(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH).mean()
    features['Spectral_Contrast'] = spectral_contrast

    # 10. Chroma Features
    chroma = librosa.feature.chroma_stft(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH).mean(axis=1)
    features.update({f'Chroma_{i+1}': val for i, val in enumerate(chroma)})

    # 11. Tempo (Rhythm)
    onset_env = librosa.onset.onset_strength(y=audio, sr=sr, hop_length=HOP_LENGTH)
    tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr).item()
    features['Tempo'] = tempo

    # 12. Jitter and Shimmer (Approximation)
    jitter = np.std(np.diff(pitches[pitches > 0])) / pitch_mean if pitch_mean > 0 else 0
    shimmer = np.std(np.abs(np.diff(audio))) / np.mean(np.abs(audio)) if np.mean(np.abs(audio)) > 0 else 0
    features['Jitter'] = jitter
    features['Shimmer'] = shimmer

    # 13. Tonality and Timbre
    spectral_flatness = librosa.feature.spectral_flatness(y=audio).mean()
    features['Spectral_Flatness'] = spectral_flatness

    spectral_rolloff = librosa.feature.spectral_rolloff(y=audio, sr=sr, n_fft=1024, hop_length=HOP_LENGTH).mean()
    features['Spectral_Rolloff'] = spectral_rolloff

    # 14. Duration and Silence
    duration = librosa.get_duration(y=audio, sr=sr)
    silence = np.mean(audio == 0)
    features['Duration'] = duration
    features['Silence'] = silence

    return features

# Function to process all audio files in the SAVEE dataset
def process_savee(dataset_path):
    data = []

    for root, _, files in os.walk(dataset_path):
        for file in files:
            if file.endswith('.wav'):
                file_path = os.path.join(root, file)

                # Extract emotion from filename (e.g., "JK_su01.wav")
                file_parts = file.split('_')
                emotion_code = file_parts[1][:2]  # Get the emotion code (e.g., 'su', 'sa')
                emotion = emotion_mapping.get(emotion_code, "unknown")

                features = extract_features(file_path)
                features['Emotion'] = emotion
                data.append(features)

    return pd.DataFrame(data)

# Path to SAVEE dataset
dataset_path = "Dataset/SAVEE"
savee_dataset = process_savee(dataset_path)

# Save the dataset to a CSV file
savee_dataset.to_csv("Dataset/SAVEE_features_dataset.csv", index=False)

print("SAVEE feature extraction completed and dataset saved.")


	This function was moved to 'librosa.feature.rhythm.tempo' in librosa version 0.10.0.
	This alias will be removed in librosa version 1.0.
  tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=sr).item()


SAVEE feature extraction completed and dataset saved.


In [3]:
import pandas as pd

# Load the dataset
savee_dataset_path = 'Dataset/SAVEE_features_dataset.csv'  # Replace with your file path
saveee_dataset = pd.read_csv(savee_dataset_path)

# Display the first few rows of the dataset
print(savee_dataset.head())

# Check the dataset's structure
print(savee_dataset.info())

saveee_dataset.shape

       RMSE  Zero_Crossing_Rate  Mel_Spectrogram_Mean      MFCC_1      MFCC_2  \
0  0.096243            0.019524            -35.962654 -408.271881  101.782509   
1  0.073336            0.014979            -37.673794 -432.946167  121.255760   
2  0.061558            0.011208            -44.537773 -506.484436  120.200455   
3  0.134778            0.019028            -35.241142 -403.779358   98.904404   
4  0.072211            0.016677            -41.526272 -471.727753  145.821045   

      MFCC_3     MFCC_4     MFCC_5     MFCC_6     MFCC_7  ...  Chroma_11  \
0  14.735724  57.775494  -3.375690 -10.940758   5.251171  ...   0.478388   
1   9.786642  53.720806  16.386436 -15.011709   0.361910  ...   0.629491   
2  28.003235  27.287685  18.309204   5.175481 -11.274021  ...   0.558801   
3  18.473347  35.634796  -1.421025   7.098048 -27.093967  ...   0.529298   
4  22.681051  37.001007  30.451714  -8.311298 -17.866390  ...   0.553176   

   Chroma_12       Tempo    Jitter   Shimmer  Spectral_F

(480, 67)