# Required Installation of Packages

In [1]:
!pip install noisereduce librosa soundfile

Collecting noisereduce
  Downloading noisereduce-3.0.3-py3-none-any.whl.metadata (14 kB)
Downloading noisereduce-3.0.3-py3-none-any.whl (22 kB)
Installing collected packages: noisereduce
Successfully installed noisereduce-3.0.3


---

# Functions to Save and Read Data in JSON Format Using Python

In [2]:
import json

In [3]:
def save_to_json(data, file_path):
    """
    Save data to a JSON file.
    
    Args:
    - data: Python object (e.g., dict or list) to save.
    - file_path: Path to the JSON file.
    """
    try:
        with open(file_path, "w") as json_file:
            json.dump(data, json_file, indent=4)  # Save with pretty formatting
        print(f"Data successfully saved to {file_path}")
    except Exception as e:
        print(f"Error saving data to JSON: {e}")

In [4]:
def read_from_json(file_path):
    """
    Read data from a JSON file.
    
    Args:
    - file_path: Path to the JSON file.
    
    Returns:
    - The Python object (e.g., dict or list) loaded from the JSON file.
    """
    try:
        with open(file_path, "r") as json_file:
            data = json.load(json_file)
        print(f"Data successfully loaded from {file_path}")
        return data
    except Exception as e:
        print(f"Error reading data from JSON: {e}")
        return None

---

# Necessary Installations

In [5]:
# Install pyOpenSMILE for feature extraction
!pip install opensmile

Collecting opensmile
  Downloading opensmile-2.5.1-py3-none-manylinux_2_17_x86_64.whl.metadata (15 kB)
Collecting audobject>=0.6.1 (from opensmile)
  Downloading audobject-0.7.12-py3-none-any.whl.metadata (2.7 kB)
Collecting audinterface>=0.7.0 (from opensmile)
  Downloading audinterface-1.3.1-py3-none-any.whl.metadata (4.3 kB)
Collecting audeer>=2.1.1 (from audinterface>=0.7.0->opensmile)
  Downloading audeer-2.2.2-py3-none-any.whl.metadata (4.1 kB)
Collecting audformat<2.0.0,>=1.0.1 (from audinterface>=0.7.0->opensmile)
  Downloading audformat-1.3.2-py3-none-any.whl.metadata (4.7 kB)
Collecting audiofile>=1.3.0 (from audinterface>=0.7.0->opensmile)
  Downloading audiofile-1.5.1-py3-none-any.whl.metadata (4.9 kB)
Collecting audmath>=1.4.1 (from audinterface>=0.7.0->opensmile)
  Downloading audmath-1.4.2-py3-none-any.whl.metadata (3.7 kB)
Collecting audresample<2.0.0,>=1.1.0 (from audinterface>=0.7.0->opensmile)
  Downloading audresample-1.3.4-py3-none-manylinux_2_17_x86

---

# Extracting Audio Features

In [6]:
import opensmile
import os
import torch
import librosa
import noisereduce as nr
import numpy as np
import soundfile as sf
import tempfile
import json

In [7]:
def preprocess_audio(audio_path):
    """
    Apply noise reduction and amplitude normalization to audio
    while preserving the original sample rate (44kHz)
    
    Returns path to temporary processed audio file
    """
    try:
        # Load audio at original 44kHz sample rate
        y, sr = librosa.load(audio_path, sr=44100, mono=True)
        
        # 1. Noise reduction
        reduced_noise = nr.reduce_noise(
            y=y, 
            sr=sr,
            stationary=True,
            prop_decrease=0.75
        )
        
        # 2. Amplitude normalization (peak to -3dB)
        max_amplitude = np.max(np.abs(reduced_noise))
        if max_amplitude > 0:
            scaling_factor = 10 ** (-3 / 20) / max_amplitude
            normalized_audio = reduced_noise * scaling_factor
        else:
            normalized_audio = reduced_noise  # Avoid division by zero
        
        # Create temporary file for processed audio
        temp_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False)
        temp_path = temp_file.name
        temp_file.close()
        
        # Save processed audio with original sample rate
        sf.write(temp_path, normalized_audio, sr, subtype='PCM_16')
        
        return temp_path
        
    except Exception as e:
        print(f"Error preprocessing audio {audio_path}: {e}")
        return None

In [8]:
def extract_audio_features(audio_path, device='cpu'):
    """
    Extract audio features using OpenSMILE with preprocessing and return them as a list.

    Args:
    - audio_path: Path to the input audio file (.wav or .mp4).
    - device: Placeholder for future GPU support (OpenSMILE is CPU-only).

    Returns:
    - features_list: List containing the extracted features.
    """
    try:
        # Preprocess audio (returns path to temp file)
        processed_path = preprocess_audio(audio_path)

        if not processed_path:
            return None
            
        # Initialize OpenSMILE with the eGeMAPS configuration
        smile = opensmile.Smile(
            feature_set=opensmile.FeatureSet.ComParE_2016,  # Use ComParE_2016 feature set
            feature_level=opensmile.FeatureLevel.Functionals  # Functionals for aggregated features
        )

        # Extract features from processed audio
        features_df = smile.process_file(processed_path)
        
        # Clean up temporary file
        if os.path.exists(processed_path):
            os.remove(processed_path)

        # Convert DataFrame to list
        features_list = features_df.values.flatten().tolist()
        
        # Placeholder for GPU conversion if using other methods
        if device == 'cuda':
            print("Note: OpenSMILE runs on CPU. GPU not utilized for feature extraction.")
        
        return features_list

    except Exception as e:
        # Clean up if error occurs
        if processed_path and os.path.exists(processed_path):
            os.remove(processed_path)
            
        print(f"Error extracting features from {audio_path}: {e}")
        return None

In [9]:
def process_audio_data(audio_data, device='cpu'):
    """
    Process the nested audio data structure to extract features using OpenSMILE.

    Args:
    - audio_data: Dictionary containing train/dev/test splits with audio file paths.
    - device: Specify 'cpu' or 'cuda' (GPU). OpenSMILE uses CPU regardless.

    Returns:
    - audio_features_opensmile: Dictionary with extracted features in the desired format.
    """
    # Initialize split_features dynamically based on input data
    split_features = {split: [] for split in audio_data.keys()}

    for split, items in audio_data.items():
        for item in items:
            for key, audio_path in item.items():
                if key not in ["y", "label"]:
                    print(f"Processing {audio_path} (using {device.upper()})...")
                    features_list = extract_audio_features(audio_path, device=device)

                    if features_list is not None:
                        temp_dict = {
                            key: audio_path,
                            f"{key}_opensmile_features": features_list,
                            "y": item.get("y", None),
                            "label": item.get("label", None)
                        }
                        split_features[split].append(temp_dict)
    
    return split_features

---

# Test code

In [10]:
if __name__ == "__main__":
    # Example Input
    MELD_audio_data_updated = {
    "train": [
        {
            "0_0": "/kaggle/input/meld-audio/audio_train/dia0_utt0.wav",
            "y": 0,
            "label": "neutral"
        },
        {
            "0_1": "/kaggle/input/meld-audio/audio_train/dia0_utt1.wav",
            "y": 0,
            "label": "neutral"
        },
        {
            "0_2": "/kaggle/input/meld-audio/audio_train/dia0_utt2.wav",
            "y": 0,
            "label": "neutral"
        }
        ],

    "dev": [
        {
            "0_0": "/kaggle/input/meld-audio/audio_dev/dia0_utt0.wav",
            "y": 3,
            "label": "sadness"
        },
        {
            "0_1": "/kaggle/input/meld-audio/audio_dev/dia0_utt1.wav",
            "y": 1,
            "label": "surprise"
        },
        {
            "1_0": "/kaggle/input/meld-audio/audio_dev/dia1_utt0.wav",
            "y": 0,
            "label": "neutral"
        }
        
        ],

    "test": [
        {
            "0_0": "/kaggle/input/meld-audio/audio_test/dia0_utt0.wav",
            "y": 1,
            "label": "surprise"
        },
        {
            "0_1": "/kaggle/input/meld-audio/audio_test/dia0_utt1.wav",
            "y": 6,
            "label": "anger"
        },
        {
            "0_2": "/kaggle/input/meld-audio/audio_test/dia0_utt2.wav",
            "y": 0,
            "label": "neutral"
        }

        ]
    }
    

    # Process Audio Data
    Audio_Features_OpenSMILE = process_audio_data(MELD_audio_data_updated)

    # Print Results
    for split, features in Audio_Features_OpenSMILE.items():
        print(f"\nSplit: {split}")

        # Loop through each entry in the 'train' dataset of MELD_audio_data_updated
        for entry in features:
            identifier = next(iter(entry))
            # Access the 'opensmile_features' list and print its length
            opensmile_features_length = len(entry[f"{identifier}_opensmile_features"])
            print(f"Opensmile Features (length): {opensmile_features_length}")
            print("\n")

Processing /kaggle/input/meld-audio/audio_train/dia0_utt0.wav (using CPU)...
Processing /kaggle/input/meld-audio/audio_train/dia0_utt1.wav (using CPU)...
Processing /kaggle/input/meld-audio/audio_train/dia0_utt2.wav (using CPU)...
Processing /kaggle/input/meld-audio/audio_dev/dia0_utt0.wav (using CPU)...
Processing /kaggle/input/meld-audio/audio_dev/dia0_utt1.wav (using CPU)...
Processing /kaggle/input/meld-audio/audio_dev/dia1_utt0.wav (using CPU)...
Processing /kaggle/input/meld-audio/audio_test/dia0_utt0.wav (using CPU)...
Processing /kaggle/input/meld-audio/audio_test/dia0_utt1.wav (using CPU)...
Processing /kaggle/input/meld-audio/audio_test/dia0_utt2.wav (using CPU)...

Split: train
Opensmile Features (length): 6373


Opensmile Features (length): 6373


Opensmile Features (length): 6373



Split: dev
Opensmile Features (length): 6373


Opensmile Features (length): 6373


Opensmile Features (length): 6373



Split: test
Opensmile Features (length): 6373


Opensmile Features (lengt

---

# Feature Extraction for MELD Audio Data with pyOpenSMILE

## Read Updated MELD Audio Data in JSON Format

In [11]:
# Example Usage
if __name__ == "__main__":
    # Filepath
    MELD_audio_data_updated_file_path = "/kaggle/input/meld-emotion-recognition/JSON files/JSON files/Final Format/MELD_Audio_Data_Updated_Cleaned.json"

    # Read data from JSON
    MELD_audio_data_updated = read_from_json(MELD_audio_data_updated_file_path)
    #print("Loaded Data:", MELD_audio_data_updated)

Data successfully loaded from /kaggle/input/meld-emotion-recognition/JSON files/JSON files/Final Format/MELD_Audio_Data_Updated_Cleaned.json


---

In [12]:
if __name__ == "__main__":
    # Example Input
    MELD_audio_data_updated = MELD_audio_data_updated

    # Process Audio Data
    Audio_Features_OpenSMILE = process_audio_data(MELD_audio_data_updated)

Processing /kaggle/input/meld-audio/audio_train/dia0_utt0.wav (using CPU)...
Processing /kaggle/input/meld-audio/audio_train/dia0_utt1.wav (using CPU)...
Processing /kaggle/input/meld-audio/audio_train/dia0_utt2.wav (using CPU)...
Processing /kaggle/input/meld-audio/audio_train/dia0_utt3.wav (using CPU)...
Processing /kaggle/input/meld-audio/audio_train/dia0_utt4.wav (using CPU)...
Processing /kaggle/input/meld-audio/audio_train/dia0_utt5.wav (using CPU)...
Processing /kaggle/input/meld-audio/audio_train/dia0_utt6.wav (using CPU)...
Processing /kaggle/input/meld-audio/audio_train/dia0_utt7.wav (using CPU)...
Processing /kaggle/input/meld-audio/audio_train/dia0_utt8.wav (using CPU)...
Processing /kaggle/input/meld-audio/audio_train/dia0_utt9.wav (using CPU)...
Processing /kaggle/input/meld-audio/audio_train/dia0_utt10.wav (using CPU)...
Processing /kaggle/input/meld-audio/audio_train/dia0_utt11.wav (using CPU)...
Processing /kaggle/input/meld-audio/audio_train/dia0_utt12.wav (using CPU)

---

# Structuring Extracted Audio Features

In [13]:
# Extracted Features from Audio Using pyOpenSMILE (Audio_Features_OpenSMILE.json)

"""

Audio_Features_OpenSMILE = {
    "train": [
        {
            "0_0": "/kaggle/input/meld-audio/audio_train/dia0_utt0.wav",
            "0_0_OpenSMILE": ,
            "y": 0,
            "label": "neutral"
        },
        {
            "0_1": "/kaggle/input/meld-audio/audio_train/dia0_utt1.wav",
            "0_1_OpenSMILE": ,
            "y": 0,
            "label": "neutral"
        },
        {
            "0_2": "/kaggle/input/meld-audio/audio_train/dia0_utt2.wav",
            "0_2_OpenSMILE": ,
            "y": 0,
            "label": "neutral"
        },

        .................................................. (so on)

        ],

    "dev": [
        {
            "0_0": "/kaggle/input/meld-audio/audio_train/dia0_utt0.wav",
            "0_0_OpenSMILE": ,
            "y": 3,
            "label": "sadness"
        },
        {
            "0_1": "/kaggle/input/meld-audio/audio_train/dia0_utt1.wav",
            "0_1_OpenSMILE": ,
            "y": 1,
            "label": "surprise"
        },
        {
            "1_0": "/kaggle/input/meld-audio/audio_train/dia1_utt0.wav",
            "1_0_OpenSMILE": ,
            "y": 0,
            "label": "neutral"
        },
        
        .................................................. (so on)
        
        ],

    "test": [
        {
            "0_0": "/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/test/output_repeated_splits_test/dia0_utt0.wav",
            "0_0_OpenSMILE": ,
            "y": 1,
            "label": "surprise"
        },
        {
            "0_1": "/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/test/output_repeated_splits_test/dia0_utt1.wav",
            "0_1_OpenSMILE": ,
            "y": 6,
            "label": "anger"
        },
        {
            "0_2": "/kaggle/input/meld-emotion-recognition/MELD.Raw/MELD.Raw/test/output_repeated_splits_test/dia0_utt2.wav",
            "0_2_OpenSMILE": ,
            "y": 0,
            "label": "neutral"
        },

        .................................................. (so on)

        ]
    }

"""


'\n\nAudio_Features_OpenSMILE = {\n    "train": [\n        {\n            "0_0": "/kaggle/input/meld-audio/audio_train/dia0_utt0.wav",\n            "0_0_OpenSMILE": ,\n            "y": 0,\n            "label": "neutral"\n        },\n        {\n            "0_1": "/kaggle/input/meld-audio/audio_train/dia0_utt1.wav",\n            "0_1_OpenSMILE": ,\n            "y": 0,\n            "label": "neutral"\n        },\n        {\n            "0_2": "/kaggle/input/meld-audio/audio_train/dia0_utt2.wav",\n            "0_2_OpenSMILE": ,\n            "y": 0,\n            "label": "neutral"\n        },\n\n        .................................................. (so on)\n\n        ],\n\n    "dev": [\n        {\n            "0_0": "/kaggle/input/meld-audio/audio_train/dia0_utt0.wav",\n            "0_0_OpenSMILE": ,\n            "y": 3,\n            "label": "sadness"\n        },\n        {\n            "0_1": "/kaggle/input/meld-audio/audio_train/dia0_utt1.wav",\n            "0_1_OpenSMILE": ,\n    

---

# Save Extracted Audio Features in JSON Format

In [14]:
# Example Usage
if __name__ == "__main__":
    # Filepath
    Audio_Features_OpenSMILE_file_path = "/kaggle/working/Audio_Features_OpenSMILE.json"

    # Save data to JSON
    save_to_json(Audio_Features_OpenSMILE, Audio_Features_OpenSMILE_file_path)

Data successfully saved to /kaggle/working/Audio_Features_OpenSMILE.json
