In [5]:
import librosa
import os
import pickle
import numpy as np
from scipy.stats import kurtosis,skew,mode

In [None]:
def get_file_paths_by_subfolder(root):
    file_dict = {}
    
    # Walk through the root directory
    for subdir, _, files in os.walk(root):
        # Get the subfolder name (last part of the path)
        subfolder_name = os.path.basename(subdir)
        file_pairs = [(os.path.join(subdir, file), os.path.splitext(file)[0]) for file in files]
        
        # Only add the subfolder if it has files
        if file_pairs:
            file_dict[subfolder_name] = file_pairs
    
    return file_dict

In [None]:
def extract_mfcc(file_dictionary):
    mfcc_dict = {}
    
    for reader in file_dictionary.keys():
        
        if reader not in mfcc_dict:
            mfcc_dict[reader] = []

        for audio_path, filename_no_extension in file_dictionary[reader]:
            signal, sample_rate = librosa.load(audio_path, sr=None)
            # mfcc_data = librosa.feature.mfcc(y=signal, sr=sample_rate, n_fft=1024, n_mfcc=40)

            fbank_data = librosa.feature.melspectrogram(y=signal, sr=sample_rate, n_fft=1024, n_mels=20)
            fbank_data = librosa.power_to_db(fbank_data)  # Convert power to decibel scale

            # delta_mfcc = librosa.feature.delta(mfcc_data, width=3)

            # Calculating various statistic measures on the MFCC coefficients
            # mean_mfcc = np.mean(mfcc_data, axis=1)
            # median_mfcc = np.median(mfcc_data, axis=1)
            # std_mfcc = np.std(mfcc_data, axis=1)
            # skew_mfcc = skew(mfcc_data, axis=1)
            # kurt_mfcc = kurtosis(mfcc_data, axis=1)
            # maximum_mfcc = np.amax(mfcc_data, axis=1)
            # minimum_mfcc = np.amin(mfcc_data, axis=1)

            # Calculate various statistic measures on the Delta MFCC coefficients
            # mean_delta_mfcc = np.mean(delta_mfcc, axis=1)
            # median_delta_mfcc = np.median(delta_mfcc, axis=1)
            # std_delta_mfcc = np.std(delta_mfcc, axis=1)
            # skew_delta_mfcc = skew(delta_mfcc, axis=1)
            # kurt_delta_mfcc = kurtosis(delta_mfcc, axis=1)
            # maximum_delta_mfcc = np.amax(delta_mfcc, axis=1)
            # minimum_delta_mfcc = np.amin(delta_mfcc, axis=1)

            # Calculating various statistic measures on the FBank features
            mean_fbank = np.mean(fbank_data, axis=1)
            median_fbank = np.median(fbank_data, axis=1)
            std_fbank = np.std(fbank_data, axis=1)
            skew_fbank = skew(fbank_data, axis=1)
            kurt_fbank = kurtosis(fbank_data, axis=1)
            maximum_fbank = np.amax(fbank_data, axis=1)
            minimum_fbank = np.amin(fbank_data, axis=1)

            # Pitch extraction
            f0, voiced_flag, voiced_probs = librosa.pyin(
                signal, fmin=librosa.note_to_hz('C2'), fmax=librosa.note_to_hz('C7')
            )
            
            # Handle NaNs in pitch
            if np.all(np.isnan(f0)):
                mean_pitch, median_pitch = 0, 0  # Set default if no pitch is detected
            else:
                mean_pitch = np.nanmean(f0)  # Mean pitch, ignoring NaNs
                median_pitch = np.nanmedian(f0)  # Median pitch, ignoring NaNs

            # feature_list = np.concatenate((
            #     mean_mfcc, median_mfcc, std_mfcc, skew_mfcc, kurt_mfcc, maximum_mfcc, minimum_mfcc,
            #  [mean_pitch, median_pitch]  # Add pitch statistics to the feature list
            # ))
            # feature_list = np.concatenate((
            #     mean_mfcc, median_mfcc, std_mfcc, skew_mfcc, kurt_mfcc, maximum_mfcc, minimum_mfcc,
            #     mean_delta_mfcc, median_delta_mfcc, std_delta_mfcc, skew_delta_mfcc, kurt_delta_mfcc, maximum_delta_mfcc, minimum_delta_mfcc,
            #     [mean_pitch, median_pitch]  # Add pitch statistics to the feature list
            # ))
            # feature_list = np.concatenate((
            #     mean_delta_mfcc, median_delta_mfcc, std_delta_mfcc, skew_delta_mfcc, kurt_delta_mfcc, maximum_delta_mfcc, minimum_delta_mfcc,
            #     [mean_pitch, median_pitch]  # Add pitch statistics to the feature list
            # ))
            feature_list = np.concatenate((
                mean_fbank, median_fbank, std_fbank, skew_fbank, kurt_fbank, maximum_fbank, minimum_fbank,
                [mean_pitch, median_pitch]  # Add pitch statistics to the feature list
            ))

            mfcc_dict[reader].append((feature_list, filename_no_extension))

    with open("../../../data/extracted_features/mfcc_stats_that_v2/fbank_20_features.pickle", "wb") as file:
        pickle.dump(mfcc_dict, file)

In [11]:
file_dictionary = get_file_paths_by_subfolder(r"C:\Computer Science Programs\Fall_2024\EE502_BioMed\project\data\extracted_words\that_before_after")
print(file_dictionary.keys())
# print(type(file_dictionary["19"]))
# print(file_dictionary["19"])

dict_keys(['201', '311', '3240', '4297', '7800', '87'])


In [12]:
extract_mfcc(file_dictionary)

In [None]:
audio_path = r"C:\Computer Science Programs\Fall_2024\EE502_BioMed\project\data\extracted_words\that_before_after\87\87-121553-0001_1.wav"
signal, sample_rate = librosa.load(audio_path, sr=None)
mfcc_data = librosa.feature.mfcc(y=signal, sr=sample_rate, n_fft=1024)

print(mfcc_data.shape)

mean_mfcc = np.mean(mfcc_data, axis=1)
median_mfcc = np.median(mfcc_data, axis=1)
std_mfcc = np.std(mfcc_data, axis=1)
skew_mfcc = skew(mfcc_data, axis=1)
kurt_mfcc = kurtosis(mfcc_data, axis=1)
maximum_mfcc = np.amax(mfcc_data, axis=1)
minimum_mfcc = np.amin(mfcc_data, axis=1)

# print(mean_mfcc)
# print(median_mfcc)
# print(skew_mfcc)
# print(kurt_mfcc)

print(mfcc_data[0])
print(maximum_mfcc)
print(minimum_mfcc)


(20, 7)
[-463.157   -581.5757  -367.2316  -360.0424  -355.79175 -378.06033
 -535.1951 ]
[-355.79175    186.06808     17.36948     29.215172    20.400425
    3.6157994   -8.9810505    3.6135051   14.613032     7.9065084
    9.587907    -6.1566095    3.969215    14.03998     -1.6051408
   -6.2166615   13.189661    -7.3316336   -3.5331597    3.7617269]
[-581.5757      99.36046   -100.5748     -14.087265   -33.46981
  -25.815037   -44.420116   -25.16283    -21.47697    -14.929054
  -14.077993   -29.278122   -12.531057    -5.4511943  -32.61048
  -16.7882      -9.387087   -17.582047   -21.199858   -10.644733 ]
