In [1]:
import librosa
import os
import pickle
import numpy as np
from scipy.stats import kurtosis,skew
from pydub import AudioSegment
import random

In [2]:
def extract_features(audio_segments, sample_rate=16000):
    
    def generate_random_indices(num_of_values, range_value, seed=42):
        if num_of_values > range_value + 1:
            raise ValueError("Cannot generate more unique numbers than the specified range.")
        
        random.seed(seed)  # Set the seed for reproducibility
        return random.sample(range(0, range_value), num_of_values)

    indices_to_search = generate_random_indices(1000, len(audio_segments))
    
    feature_list = []
    for audio_index  in indices_to_search:
        segment = audio_segments[audio_index]

        mfcc_data = librosa.feature.mfcc(y=segment, sr=sample_rate, n_mfcc=13)
        
        # Calculating various statistic measures on the MFCC coefficients
        mean_mfcc = np.mean(mfcc_data, axis=1)
        median_mfcc = np.median(mfcc_data, axis=1)
        std_mfcc = np.std(mfcc_data, axis=1)
        skew_mfcc = skew(mfcc_data, axis=1)
        kurt_mfcc = kurtosis(mfcc_data, axis=1)
        maximum_mfcc = np.amax(mfcc_data, axis=1)
        minimum_mfcc = np.amin(mfcc_data, axis=1)
        
        segment_features = np.concatenate((
            mean_mfcc, median_mfcc, std_mfcc, skew_mfcc, kurt_mfcc, maximum_mfcc, minimum_mfcc
        ))

        if not np.any(np.isnan(segment_features)):
            feature_list.append(segment_features)

    return(feature_list)

In [3]:
def get_audio_paths(root_path):
    results = {}
    
    for audio_path in os.listdir(root_path):
        # Get the full path by joining root_path and audio_path
        full_path = os.path.join(root_path, audio_path)
        
        # Only add files (not directories)
        if os.path.isfile(full_path):
            filename = os.path.basename(audio_path)
            file_name_wo_ext, _ = os.path.splitext(filename)
            results[file_name_wo_ext] = full_path

    return results

def segment_audio(audio_file_path):
    # Load the audio file
    audio = AudioSegment.from_file(audio_file_path)
    
    # Set duration for each segment in milliseconds and get the sample rate
    segment_duration_ms = 1 * 1000  # 1 seconds in milliseconds
    sample_rate = audio.frame_rate
    samples_per_segment = segment_duration_ms * sample_rate // 1000  # Calculate samples per 4 seconds

    # Convert the entire audio to a numpy array
    audio_array = np.array(audio.get_array_of_samples())

    # Normalize the audio to the range [-1.0, 1.0]
    audio_array = audio_array.astype(np.float32)  # Convert to float32
    audio_array /= np.max(np.abs(audio_array))  # Normalize to [-1, 1]

    # Calculate the number of 4-second segments and trim any extra samples
    num_segments = len(audio_array) // samples_per_segment
    trimmed_audio_array = audio_array[:num_segments * samples_per_segment]

    # Reshape the array into 4-second segments
    segments = trimmed_audio_array.reshape(num_segments, samples_per_segment)
    
    return segments, sample_rate  # Return both segments and the sample rate

In [4]:
def segment_and_extract(audio_paths, save_to):
    feature_dict = {}
    
    for audio_path_key in audio_paths.keys():
        segments, sr = segment_audio(audio_paths[audio_path_key])

        if len(segments) >= 1000:
            extracted_features = extract_features(segments, sample_rate=sr)
            feature_dict[audio_path_key] = extracted_features
        else:
            print(f"{audio_path_key} has less than 1000 segments")
        
    with open(f"../../../data/extracted_features_v2/{save_to}.pickle", "wb") as file:
        print(f"saved to {save_to}")
        pickle.dump(feature_dict, file)

In [5]:
audio_paths = get_audio_paths(r"C:\Computer Science Programs\Fall_2024\EE502_BioMed\project\data\cleaned_combined_v2")

print(len(audio_paths.keys()))

251


In [None]:
save_to = "mfcc_13_no_pitch_1000_all_speakers"
segment_and_extract(audio_paths, save_to)

1040 has less than 1000 segments
1183 has less than 1000 segments
1992 has less than 1000 segments


  skew_mfcc = skew(mfcc_data, axis=1)
  kurt_mfcc = kurtosis(mfcc_data, axis=1)


445 has less than 1000 segments
7312 has less than 1000 segments
8014 has less than 1000 segments
saved to mfcc_13_no_pitch_1000_rand_all_speakers
