# MFCC Feature Extraction with extra envelope coeffecients

Extract mfcc coeffecients and extra envelope coeffecients from both normal and augmented segments

In [1]:
import os
import numpy as np
import librosa
import soundfile as sf
import pandas as pd
from pathlib import Path
from tqdm import tqdm

In [2]:
def extract_mfcc_features_expanded(segment_info_path, segments_dir='segments', n_mfcc=14, features_output_dir='../../extracted_features/features/mfcc_features.npy', labels_output_dir='../../extracted_features/labels/mfcc_labels.npy'):
    """
    Extract expanded feature set including MFCCs, their deltas, and envelope descriptors.
    Added robustness checks for empty or corrupted audio segments.
    """
    # Load segment info
    metadata = pd.read_csv(segment_info_path)
    
    # Update paths to use the specified segments directory
    segments_path = Path(segments_dir)
    metadata['segment_path'] = metadata['segment_path'].apply(
        lambda x: str(segments_path / Path(x).name))
    
    # Initialize arrays to store features and labels
    features = []
    labels = []
    
    print(f"Extracting expanded feature set from {segments_dir}...")
    for idx, row in tqdm(metadata.iterrows(), total=len(metadata)):
        try:
            # Load audio segment
            y, sr = librosa.load(row['segment_path'])
            
            # Check if the audio segment is valid
            if len(y) == 0:
                print(f"Skipping empty audio file: {row['segment_path']}")
                metadata = metadata.drop(idx)
                continue
                
            # 1. Extract MFCCs and their statistics
            mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
            mfcc_mean = np.mean(mfcc, axis=1)
            
            # 2. Compute MFCC deltas (first derivatives)
            mfcc_delta = librosa.feature.delta(mfcc)
            mfcc_delta_mean = np.mean(mfcc_delta, axis=1)
            
            # 3. Extract envelope-based descriptors
            # Find the amplitude envelope
            envelope = np.abs(y)
            
            # Safety check for empty envelope
            if len(envelope) == 0:
                print(f"Skipping file with empty envelope: {row['segment_path']}")
                metadata = metadata.drop(idx)
                continue
            
            # Find the maximum amplitude and its position
            max_amp_pos = np.argmax(envelope)
            max_amp = envelope[max_amp_pos]
            
            # 3.1 Maximum derivative before the maximum amplitude
            pre_max_deriv = 0
            if max_amp_pos > 0:
                pre_envelope = envelope[:max_amp_pos]
                if len(pre_envelope) > 1:  # Need at least 2 points for diff
                    pre_max_deriv = np.max(np.diff(pre_envelope))
            
            # 3.2 Derivative after the maximum amplitude
            post_max_deriv = 0
            if max_amp_pos < len(envelope)-1:
                post_envelope = envelope[max_amp_pos:]
                if len(post_envelope) > 1:  # Need at least 2 points for diff
                    post_max_deriv = np.min(np.diff(post_envelope))
            
            # 3.3 Temporal centroid
            times = np.arange(len(y))
            # Avoid division by zero
            env_sum = np.sum(envelope)
            if env_sum > 0:
                temporal_centroid = np.sum(times * envelope) / env_sum
                temporal_centroid_ratio = temporal_centroid / len(y)
            else:
                temporal_centroid_ratio = 0.5  # Default to middle if envelope is all zeros
            
            # 3.4 Flatness coefficient (spectral flatness as a proxy)
            # Handle potential warnings from librosa
            with np.errstate(divide='ignore', invalid='ignore'):
                flatness = librosa.feature.spectral_flatness(y=y)[0].mean()
                flatness = 0.0 if np.isnan(flatness) else flatness
            
            # Combine all features
            feature_vector = np.concatenate([
                mfcc_mean,                    # 14 features
                mfcc_delta_mean,              # 14 features
                [pre_max_deriv,               # 1 feature
                 post_max_deriv,              # 1 feature
                 flatness,                    # 1 feature
                 temporal_centroid_ratio]      # 1 feature
            ])
            
            features.append(feature_vector)
            labels.append(row['instrument_label'])
            
        except Exception as e:
            print(f"Error processing {row['segment_path']}: {str(e)}")
            metadata = metadata.drop(idx)
            continue
    
    # Convert to numpy arrays
    X = np.array(features)
    y = np.array(labels)
    
    # Print summary of processing
    print(f"\nProcessing complete:")
    print(f"Successfully processed: {len(features)} segments")
    print(f"Failed/Skipped: {len(metadata.index) - len(features)} segments")
    
    # Create features directory if it doesn't exist
    # output_dir = Path('features')
    # output_dir.mkdir(exist_ok=True, parents=True)
    
    # Save features and labels with directory-specific names
    # dir_suffix = '_augmented' if segments_dir == 'augmentedSegments' else ''
    # np.save(output_dir / f'mfcc_features_expanded{dir_suffix}.npy', X)
    # np.save(output_dir / f'labels_expanded{dir_suffix}.npy', y)
    
    np.save(features_output_dir, X)
    np.save(labels_output_dir, y)
    
    return X, y, metadata

In [3]:
segment_info_path = '../../segment_info/segment_info.csv'
augmented_segment_info_path = '../../segment_info/augmented_segment_info.csv'

os.makedirs('../../extracted_features/features', exist_ok=True)
os.makedirs('../../extracted_features/labels', exist_ok=True)

In [4]:
X_mfcc_env, y_mfcc_env, metadata_mfcc_env = extract_mfcc_features_expanded(
    segment_info_path,
    segments_dir='../../segments',
    features_output_dir='../../extracted_features/features/mfcc_env_features.npy',
    labels_output_dir='../../extracted_features/labels/mfcc_env_labels.npy'
)

Extracting expanded feature set from ../../segments...


100%|██████████| 5714/5714 [00:11<00:00, 517.41it/s]


Processing complete:
Successfully processed: 5714 segments
Failed/Skipped: 0 segments





In [5]:
X_mfcc_env_aug, y_mfcc_env_aug, metadata_mfcc_env_aug = extract_mfcc_features_expanded(
    augmented_segment_info_path,
    segments_dir='../../augmentedSegments',
    features_output_dir='../../extracted_features/features/mfcc_env_aug_features.npy',
    labels_output_dir='../../extracted_features/labels/mfcc_env_aug_labels.npy'
)

Extracting expanded feature set from ../../augmentedSegments...


100%|██████████| 34284/34284 [01:04<00:00, 533.11it/s]


Processing complete:
Successfully processed: 34284 segments
Failed/Skipped: 0 segments



