In [3]:
import os
import librosa

def read_audio_files(directory):
    audio_data = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.mp3'):
                audio_path = os.path.join(root, file)
                # Load audio file
                y, sr = librosa.load(audio_path, sr=None)
                audio_data.append((audio_path, y, sr))
    return audio_data

# Example usage
audio_directory = '/home/cake/Documents/sample_data'
audio_data = read_audio_files(audio_directory)


In [4]:
import numpy as np
import librosa

def extract_features(audio_data):
    features = []
    for audio_path, y, sr in audio_data:
        # Extract MFCC
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        
        # Calculate spectral centroid
        spectral_centroid = librosa.feature.spectral_centroid(y=y, sr=sr)
        if len(spectral_centroid) == 0:
            spectral_centroid_mean = 0
        else:
            spectral_centroid_mean = np.mean(spectral_centroid)
        
        # Calculate zero-crossing rate
        zero_crossing_rate = librosa.feature.zero_crossing_rate(y)
        if len(zero_crossing_rate) == 0:
            zero_crossing_rate_mean = 0
        else:
            zero_crossing_rate_mean = np.mean(zero_crossing_rate)
        
        # Combine all features into a single feature vector
        combined_features = np.concatenate([np.mean(mfcc, axis=1), [spectral_centroid_mean], [zero_crossing_rate_mean]])
        features.append(combined_features)
    return features

# Example usage
audio_features = extract_features(audio_data)
print("Number of audio files processed:", len(audio_features))
print("Shape of extracted features for each audio file:", audio_features[0].shape)


Number of audio files processed: 145
Shape of extracted features for each audio file: (15,)


In [5]:
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
def normalize_features(features, n_components=None):
    # Convert list to numpy array if necessary
    if isinstance(features, list):
        features = np.array(features)
    
    # Standardize features
    scaler = StandardScaler()
    standardized_features = scaler.fit_transform(features)
    
    # Perform PCA for dimensionality reduction
    pca = PCA(n_components=min(n_components, features.shape[1]) if n_components is not None else None)
    reduced_features = pca.fit_transform(standardized_features)
    
    return reduced_features


In [6]:
# Example usage with PCA for dimensionality reduction
reduced_features = normalize_features(audio_features, n_components=50)
print("Shape of reduced features:", reduced_features.shape)


Shape of reduced features: (145, 15)
