In [30]:
train_nonqueen_path = 'E:/Queenless/20k_audio_splitted_dataset/train/NonQueen'
train_queen_path = 'E:/Queenless/20k_audio_splitted_dataset/train/Queen'
test_nonqueen_path = 'E:/Queenless/20k_audio_splitted_dataset/test/NonQueen'
test_queen_path = 'E:/Queenless/20k_audio_splitted_dataset/test/Queen'
val_nonqueen_path = 'E:/Queenless/20k_audio_splitted_dataset/val/NonQueen'
val_queen_path = 'E:/Queenless/20k_audio_splitted_dataset/val/Queen'

MFCCs - RF

In [31]:
import os
import librosa
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import librosa.display
import scipy.fftpack as fftpack

def pre_emphasis(signal_in, pre_emph=0.97):
    """
    Bước 1: Pre-emphasis - Lọc thông cao
    """
    emphasized_signal = np.append(signal_in[0], signal_in[1:] - pre_emph * signal_in[:-1]) # y(t) = x(t) - alpha*x(t-1)
    return emphasized_signal

def framing(signal_in, sample_rate, frame_size=0.025, frame_stride=0.01):
    """
    Bước 2: Chia khung (Framing)
    - frame_size: kích thước khung (số giây)
    - frame_stride: bước nhảy giữa các khung (số giây)
    """
    frame_length = int(round(frame_size * sample_rate))
    frame_step = int(round(frame_stride * sample_rate))
    signal_length = len(signal_in)
    num_frames = int(np.ceil(float(np.abs(signal_length - frame_length)) / frame_step)) + 1

    pad_signal_length = num_frames * frame_step + frame_length
    # Zero-padding nếu cần
    z = np.zeros((pad_signal_length - signal_length))
    pad_signal = np.append(signal_in, z)

    indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + \
              np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
    frames = pad_signal[indices.astype(np.int32, copy=False)]
    return frames

def windowing(frames):
    """
    Bước 3: Áp dụng cửa sổ Hamming cho mỗi khung
    """
    frame_length = frames.shape[1]
    hamming = np.hamming(frame_length)
    windowed_frames = frames * hamming
    return windowed_frames

def fft_frames(frames, NFFT=512):
    """
    Bước 4: Tính FFT cho mỗi khung
    """
    mag_frames = np.absolute(np.fft.rfft(frames, NFFT))
    return mag_frames

def power_spectrum(mag_frames, NFFT=512):
    """
    Bước 4.1: Tính phổ công suất của mỗi khung
    """
    return (1.0 / NFFT) * (mag_frames ** 2)

def mel_filterbank(sample_rate, NFFT, nfilt=26, low_freq=0, high_freq=None):
    """
    Bước 5: Tạo Mel filterbank
    """
    if high_freq is None:
        high_freq = sample_rate / 2

    # Chuyển Hz sang Mel
    low_mel = 2595 * np.log10(1 + low_freq / 700.0)
    high_mel = 2595 * np.log10(1 + high_freq / 700.0)
    mel_points = np.linspace(low_mel, high_mel, nfilt + 2)
    # Chuyển lại từ Mel sang Hz
    hz_points = 700 * (10**(mel_points / 2595) - 1)
    bin = np.floor((NFFT + 1) * hz_points / sample_rate)

    fbank = np.zeros((nfilt, int(np.floor(NFFT / 2 + 1))))
    for m in range(1, nfilt + 1):
        f_m_minus = int(bin[m - 1])   # giới hạn trái
        f_m = int(bin[m])             # trung tâm
        f_m_plus = int(bin[m + 1])    # giới hạn phải

        for k in range(f_m_minus, f_m):
            fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
        for k in range(f_m, f_m_plus):
            fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])
    return fbank

# Hàm tính MFCC
def compute_mfcc(signal_in, sample_rate, frame_size=0.025, frame_stride=0.01, 
                 pre_emph=0.97, NFFT=512, nfilt=26, num_ceps=13):
    # Tính MFCCs theo các bước trong code bạn đã cung cấp
    emphasized_signal = pre_emphasis(signal_in, pre_emph)
    frames = framing(emphasized_signal, sample_rate, frame_size, frame_stride)
    windowed_frames = windowing(frames)
    mag_frames = fft_frames(windowed_frames, NFFT)
    pow_frames = power_spectrum(mag_frames, NFFT)
    fbank = mel_filterbank(sample_rate, NFFT, nfilt)
    filter_banks = np.dot(pow_frames, fbank.T)
    filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks)
    log_fbank = np.log(filter_banks)
    mfccs = fftpack.dct(log_fbank, type=2, axis=1, norm='ortho')[:, :num_ceps]
    return mfccs

# Hàm load dữ liệu từ các thư mục
def load_data_from_directory(directory, sample_rate=22050):
    labels = []
    features = []
    for label in ['Queen', 'NonQueen']:
        path = os.path.join(directory, label)
        for file in os.listdir(path):
            file_path = os.path.join(path, file)
            signal, sr = librosa.load(file_path, sr=sample_rate)
            mfcc = compute_mfcc(signal, sr)
            mfcc_mean = np.mean(mfcc, axis=0)  # Tính trung bình của MFCCs để giảm chiều
            features.append(mfcc_mean)
            labels.append(label)
    return np.array(features), np.array(labels)

# Load training, validation và testing data
train_features, train_labels = load_data_from_directory('E:/Queenless/20k_audio_splitted_dataset/train', sample_rate=22050)
val_features, val_labels = load_data_from_directory('E:/Queenless/20k_audio_splitted_dataset/val', sample_rate=22050)
test_features, test_labels = load_data_from_directory('E:/Queenless/20k_audio_splitted_dataset/test', sample_rate=22050)

# Chuyển đổi labels thành số để train với RandomForest
label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train_labels)
val_labels = label_encoder.transform(val_labels)
test_labels = label_encoder.transform(test_labels)

# Huấn luyện mô hình Random Forest
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(train_features, train_labels)

# Đánh giá mô hình trên bộ validation
val_predictions = rf_classifier.predict(val_features)
val_accuracy = accuracy_score(val_labels, val_predictions)
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions = rf_classifier.predict(test_features)
test_accuracy_mfcc_rf = accuracy_score(test_labels, test_predictions)
print(f"Test Accuracy: {test_accuracy_mfcc_rf * 100:.2f}%")



Validation Accuracy: 91.59%
Test Accuracy: 92.00%


MFFCS - SVM

In [32]:
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)
val_features_scaled = scaler.transform(val_features)
test_features_scaled = scaler.transform(test_features)

# Thiết lập phạm vi tham số
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.1, 1],
    'kernel': ['rbf']
}

# Khởi tạo GridSearchCV
grid_search = GridSearchCV(SVC(), param_grid, refit=True, verbose=3)

# Huấn luyện GridSearchCV
grid_search.fit(train_features_scaled, train_labels)

# In ra tham số tốt nhất
print("Best parameters found: ", grid_search.best_params_)

# Đánh giá mô hình với các tham số tối ưu
best_svm_rbf_classifier = grid_search.best_estimator_

# Đánh giá mô hình trên bộ validation
val_predictions_best_svm = best_svm_rbf_classifier.predict(val_features_scaled)
val_accuracy_best_svm = accuracy_score(val_labels, val_predictions_best_svm)
print(f"Validation Accuracy (Best SVM with RBF Kernel): {val_accuracy_best_svm * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_best_svm = best_svm_rbf_classifier.predict(test_features_scaled)
test_accuracy_best_svm = accuracy_score(test_labels, test_predictions_best_svm)
print(f"Test Accuracy (Best SVM with RBF Kernel): {test_accuracy_best_svm * 100:.2f}%")


Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV 1/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.561 total time=   4.8s
[CV 2/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.847 total time=   6.2s
[CV 3/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.717 total time=   5.7s
[CV 4/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.667 total time=   6.3s
[CV 5/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.716 total time=   5.7s
[CV 1/5] END .....C=0.1, gamma=auto, kernel=rbf;, score=0.560 total time=   4.9s
[CV 2/5] END .....C=0.1, gamma=auto, kernel=rbf;, score=0.849 total time=   6.3s
[CV 3/5] END .....C=0.1, gamma=auto, kernel=rbf;, score=0.717 total time=   5.6s
[CV 4/5] END .....C=0.1, gamma=auto, kernel=rbf;, score=0.667 total time=   6.4s
[CV 5/5] END .....C=0.1, gamma=auto, kernel=rbf;, score=0.710 total time=   5.7s
[CV 1/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.569 total time=   4.7s
[CV 2/5] END ......C=0.1, gamma=0.1, kernel=rbf;

In [33]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Khởi tạo mô hình SVM với kernel RBF
svm_rbf_classifier = SVC(C=10, kernel='rbf', gamma=1)

# Huấn luyện mô hình SVM
svm_rbf_classifier.fit(train_features, train_labels)

# Đánh giá mô hình trên bộ validation
val_predictions_svm_rbf = svm_rbf_classifier.predict(val_features)
val_accuracy_svm_rbf = accuracy_score(val_labels, val_predictions_svm_rbf)
print(f"Validation Accuracy (SVM with RBF Kernel): {val_accuracy_svm_rbf * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_svm_rbf = svm_rbf_classifier.predict(test_features)
test_accuracy_mfcc_svm = accuracy_score(test_labels, test_predictions_svm_rbf)
print(f"Test Accuracy (SVM with RBF Kernel): {test_accuracy_mfcc_svm * 100:.2f}%")


Validation Accuracy (SVM with RBF Kernel): 93.63%
Test Accuracy (SVM with RBF Kernel): 94.00%


MFCC - LR

In [39]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Khởi tạo mô hình Logistic Regression
lr_classifier = LogisticRegression(random_state=42)

# Huấn luyện mô hình Logistic Regression với dữ liệu đã chuẩn hóa
lr_classifier.fit(train_features_scaled, train_labels)

# Đánh giá mô hình trên bộ validation
val_predictions_lr = lr_classifier.predict(val_features_scaled)
val_accuracy_lr = accuracy_score(val_labels, val_predictions_lr)
print(f"Validation Accuracy (Logistic Regression): {val_accuracy_lr * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_lr = lr_classifier.predict(test_features_scaled)
test_accuracy_mfcc_lr = accuracy_score(test_labels, test_predictions_lr)
print(f"Test Accuracy (Logistic Regression): {test_accuracy_mfcc_lr * 100:.2f}%")

Validation Accuracy (Logistic Regression): 71.79%
Test Accuracy (Logistic Regression): 73.28%


MFCC - ET

In [48]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import accuracy_score

# Khởi tạo mô hình Extra Trees với các tham số mặc định
et_classifier = ExtraTreesClassifier(n_estimators=100, random_state=42)

# Huấn luyện mô hình Extra Trees với dữ liệu đã chuẩn hóa
et_classifier.fit(train_features_scaled, train_labels)

# Đánh giá mô hình trên bộ validation
val_predictions_et = et_classifier.predict(val_features_scaled)
val_accuracy_et = accuracy_score(val_labels, val_predictions_et)
print(f"Validation Accuracy (Extra Trees): {val_accuracy_et * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_et = et_classifier.predict(test_features_scaled)
test_accuracy_mfcc_et = accuracy_score(test_labels, test_predictions_et)
print(f"Test Accuracy (Extra Trees): {test_accuracy_mfcc_et * 100:.2f}%")


Validation Accuracy (Extra Trees): 91.94%
Test Accuracy (Extra Trees): 92.53%


MFFC - KNN

In [58]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

knn_classifier = KNeighborsClassifier(n_neighbors=5)

# Huấn luyện mô hình KNN với dữ liệu đã chuẩn hóa
knn_classifier.fit(train_features_scaled, train_labels)

# Đánh giá mô hình trên bộ validation
val_predictions_knn = knn_classifier.predict(val_features_scaled)
val_accuracy_knn = accuracy_score(val_labels, val_predictions_knn)
print(f"Validation Accuracy (KNN): {val_accuracy_knn * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_knn = knn_classifier.predict(test_features_scaled)
test_accuracy_mfcc_knn = accuracy_score(test_labels, test_predictions_knn)
print(f"Test Accuracy (KNN): {test_accuracy_mfcc_knn * 100:.2f}%")


Validation Accuracy (KNN): 91.44%
Test Accuracy (KNN): 92.75%


In [59]:
import pandas as pd

features = ["fft", "stft", "mfcc", "cqt", "chroma", "sc"]
models   = ["knn", "svm", "lr", "rf", "et"]

data = []

for feature in features:
    row = {}
    method_name = feature.upper()
    row["Method"] = method_name

    for model in models:
        var_name = f"test_accuracy_{feature}_{model}"
        value = globals().get(var_name, None)
        if value is not None:
            row[model.upper()] = f"{value * 100:.2f}%"
        else:
            row[model.upper()] = "null"
    
    data.append(row)

df = pd.DataFrame(data)

print(df)


   Method     KNN     SVM      LR      RF      ET
0     FFT    null    null    null    null    null
1    STFT    null    null    null    null    null
2    MFCC  92.75%  94.00%  73.28%  92.00%  92.53%
3     CQT    null    null    null    null    null
4  CHROMA    null    null    null    null    null
5      SC    null    null    null    null    null
