In [1]:
import librosa
from IPython.display import Audio
import pandas as pd
import numpy as np
import tensorflow.keras as keras

In [2]:
def load_audio(file_path):
    y, sr = librosa.load(file_path, sr=44100)
    # Audio preprocessing
    y, _ = librosa.effects.trim(y=y)
    non_silent_intervals = librosa.effects.split(y, top_db=30)
    y = np.concatenate([y[start:end] for start, end in non_silent_intervals])
    return y

def normalize(data, params_file):
    normalization_params = np.load(params_file)
    mean = normalization_params['mean']
    std = normalization_params['std']
    normalized_test_data = (data - mean) / std
    return normalized_test_data

def extract_features(y, sr=44100):
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    zcrs = librosa.feature.zero_crossing_rate(y)
    chroma = librosa.feature.chroma_cqt(y=y, sr=sr, bins_per_octave=24)
    rms = librosa.feature.rms(y=y)
    cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr, roll_percent=0.95)
    
    mfcc_data = mfccs.T
    zcr_data = zcrs.T
    chroma_data = chroma.T
    rms_data = rms.T
    cent_data = cent.T
    spec_bw_data = spec_bw.T
    rolloff_data = rolloff.T

    feature_vectors = np.concatenate((mfcc_data, zcr_data, chroma_data, rms_data, cent_data, spec_bw_data, rolloff_data), axis=1)

    return feature_vectors

In [32]:
# File path and sample rate
# file_path = "Dataset/Mujawwad1/Rast/Rast_other_HP_2.wav"
file_path = "E:\\MyITS\\Final Project (Thesis)\\Temp\\Test Audio\\Tets.wav"

# Load audio file
y = load_audio(file_path)

if librosa.get_duration(path=file_path) > 30:
    y = y[:int(30 * 44100)]

Audio(y, rate=44100)

In [33]:
normalization_parameter = "Saved/Training/Deep ANN (Mean) Cobs_Norm.npz"
model_path = "Saved/Training/Deep ANN (Mean) Cobs_Model.h5"

feature = extract_features(y)

feature = np.mean(feature, axis=0)

feature_normalized = normalize(feature, normalization_parameter)

# Perform prediction
model = keras.models.load_model(model_path)
predictions = model.predict(feature_normalized.reshape(1, -1))
maqams = ["Bayati", "Hijaz", "Jiharkah", "Nahawand", "Rast", "Saba", "Sikah"]
maqam_probabilities = dict(zip(maqams, predictions.flatten()))
sorted_maqam_probabilities = sorted(maqam_probabilities.items(), key=lambda x: x[1], reverse=True)
for maqam, probability in sorted_maqam_probabilities:
    print(f"{maqam}: {probability*100:.2f}%")



[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step
Saba: 89.66%
Bayati: 6.88%
Sikah: 1.81%
Hijaz: 1.49%
Jiharkah: 0.11%
Nahawand: 0.03%
Rast: 0.01%
