In [21]:
import numpy as np
import librosa
import librosa.display
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

In [22]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [31]:
def load_audio(file_path):
    # Load the audio file
    y, sr = librosa.load(file_path, sr=None)

    # Compute log-mel filterbank energies
    mel_spectrogram = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    log_mel = librosa.power_to_db(mel_spectrogram, ref=np.max)

    # Compute MFCCs
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)

    # Resize MFCCs to match the shape of log-mel features
    mfccs_resized = np.resize(mfccs, log_mel.shape)

    # Stack log-mel and resized MFCC features together
    features = {'log_mel': log_mel, 'mfccs': mfccs_resized}

    return features


In [28]:

#  Create the CNN model
def create_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', input_shape=input_shape))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())

    return model


In [32]:
# Extract the features using the trained CNN model
def get_cnn_features(model, input_data):
    return model.predict(input_data)

In [35]:
# audio_file_path = "/content/drive/MyDrive/FYP_dataset/CNN Features/abc.wav"
# audio_features = load_audio(audio_file_path)
# input_shape = audio_features.shape[1:]  # Get the shape of the features (excluding the batch dimension)

In [36]:
audio_file_path = "/content/drive/MyDrive/FYP_dataset/CNN Features/abc.wav"
audio_features = load_audio(audio_file_path)

# The 'audio_features' dictionary contains two keys: 'log_mel' and 'mfccs'
log_mel_feature = audio_features['log_mel']
mfccs_feature = audio_features['mfccs']

print("Shape of log-mel feature:", log_mel_feature.shape)
print("Shape of MFCCs feature:", mfccs_feature.shape)

Shape of log-mel feature: (128, 21220)
Shape of MFCCs feature: (128, 21220)


In [37]:
print(audio_features)

{'log_mel': array([[-80.      , -80.      , -80.      , ..., -45.643215, -48.447235,
        -58.160805],
       [-80.      , -80.      , -80.      , ..., -41.876884, -45.942356,
        -60.26156 ],
       [-80.      , -80.      , -80.      , ..., -48.756817, -50.167976,
        -63.8591  ],
       ...,
       [-80.      , -80.      , -80.      , ..., -80.      , -80.      ,
        -80.      ],
       [-80.      , -80.      , -80.      , ..., -80.      , -80.      ,
        -80.      ],
       [-80.      , -80.      , -80.      , ..., -80.      , -80.      ,
        -80.      ]], dtype=float32), 'mfccs': array([[-4.9453665e+02, -4.9453665e+02, -4.9453665e+02, ...,
        -4.4136798e+02, -4.4587466e+02, -4.6699051e+02],
       [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
         6.0612144e+01,  5.8971897e+01,  3.6582897e+01],
       [ 0.0000000e+00,  0.0000000e+00,  0.0000000e+00, ...,
         4.1106766e+01,  4.3204132e+01,  3.0094166e+01],
       ...,
       [ 0.0000000e+

In [38]:
cnn_model_1= create_cnn_model( log_mel_feature.shape)
cnn_model_1.summary()

ValueError: ignored

In [None]:
cnn_features = get_cnn_features(cnn_model, np.expand_dims(audio_features, axis=0))  # Adding batch dimension

# The 'cnn_features' variable now contains the CNN-based features (embeddings) for the input audio
print(cnn_features)