Import required Libraries

In [1]:
import os
from pydub import AudioSegment
import librosa 
import numpy as np
import pandas as pd
import tensorflow as tf

function to split reciter audio to multiple chunk of 10 second 

In [3]:
def split_audio(input_folder, output_folder, chunk_length_ms=10000):
    os.makedirs(output_folder, exist_ok=True)
    
    for reciter in os.listdir(input_folder):
        reciter_folder = os.path.join(input_folder, reciter)
        output_reciter_folder = os.path.join(output_folder, reciter)
        os.makedirs(output_reciter_folder, exist_ok=True)
        
        for file in os.listdir(reciter_folder):
            if file.endswith(".mp3") or file.endswith(".wav"):
                audio_path = os.path.join(reciter_folder, file)
                audio = AudioSegment.from_file(audio_path)
                
                for i, chunk in enumerate(audio[::chunk_length_ms]):
                    chunk_path = os.path.join(output_reciter_folder, f"{file.split('.')[0]}_chunk{i}.wav")
                    chunk.export(chunk_path, format="wav")

#call function to split
split_audio(input_folder="reciter_Data/raw_audio",output_folder= "data/split_audio", chunk_length_ms=10000)

Extract Feature using Librosa library

In [4]:
def extract_features(audio_dir):
    features = []
    labels = []

    for reciter in os.listdir(audio_dir):
        reciter_folder = os.path.join(audio_dir, reciter)
        for file in os.listdir(reciter_folder):
            if file.endswith(".wav"):
                file_path = os.path.join(reciter_folder, file)
                y, sr = librosa.load(file_path, duration=5.0, sr=None)
                mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
                mfcc_scaled = np.mean(mfcc.T, axis=0)
                features.append(mfcc_scaled)
                labels.append(reciter)
    
    return np.array(features), np.array(labels)

# Example Usage
X, y = extract_features("data/split_audio")

split data into train test set

In [5]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [8]:

model = tf.keras.Sequential([
    tf.keras.layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],)),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(len(label_encoder.classes_), activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=20, batch_size=32)

Epoch 1/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - accuracy: 0.4537 - loss: 10.0637 - val_accuracy: 0.8018 - val_loss: 0.6383
Epoch 2/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.6723 - loss: 0.8451 - val_accuracy: 0.9301 - val_loss: 0.3744
Epoch 3/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.7932 - loss: 0.6046 - val_accuracy: 0.9485 - val_loss: 0.2317
Epoch 4/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.8625 - loss: 0.3935 - val_accuracy: 0.9685 - val_loss: 0.1628
Epoch 5/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9075 - loss: 0.2811 - val_accuracy: 0.9624 - val_loss: 0.1249
Epoch 6/20
[1m163/163[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.9253 - loss: 0.2294 - val_accuracy: 0.9754 - val_loss: 0.0832
Epoch 7/20
[1m163/163[0m 

In [9]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {test_acc * 100:.2f}%")

[1m41/41[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.9877 - loss: 0.0301
Test Accuracy: 99.08%


In [None]:
def predict_reciter(audio_path, model, label_encoder):
    y, sr = librosa.load(audio_path, duration=5.0, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfcc_scaled = np.mean(mfcc.T, axis=0).reshape(1, -1)
    
    prediction = model.predict(mfcc_scaled)
    predicted_label = label_encoder.inverse_transform([np.argmax(prediction)])
    return predicted_label[0]

# Example Usage
reciter = predict_reciter("test/yasser-al-dossari-091_chunk5.wav", model, label_encoder)
print(f"Predicted Reciter: {reciter}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 58ms/step
Predicted Reciter: other


In [14]:
import joblib
joblib.dump(model,'reciter_model')

['reciter_model']