# MFCCs

In [30]:
train_nonqueen_path = 'E:/Queenless/20k_audio_splitted_dataset/train/NonQueen'
train_queen_path = 'E:/Queenless/20k_audio_splitted_dataset/train/Queen'
test_nonqueen_path = 'E:/Queenless/20k_audio_splitted_dataset/test/NonQueen'
test_queen_path = 'E:/Queenless/20k_audio_splitted_dataset/test/Queen'
val_nonqueen_path = 'E:/Queenless/20k_audio_splitted_dataset/val/NonQueen'
val_queen_path = 'E:/Queenless/20k_audio_splitted_dataset/val/Queen'

MFCCs - RF

In [31]:
import os
import librosa
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import librosa.display
import scipy.fftpack as fftpack

def pre_emphasis(signal_in, pre_emph=0.97):
    """
    Bước 1: Pre-emphasis - Lọc thông cao
    """
    emphasized_signal = np.append(signal_in[0], signal_in[1:] - pre_emph * signal_in[:-1]) # y(t) = x(t) - alpha*x(t-1)
    return emphasized_signal

def framing(signal_in, sample_rate, frame_size=0.025, frame_stride=0.01):
    """
    Bước 2: Chia khung (Framing)
    - frame_size: kích thước khung (số giây)
    - frame_stride: bước nhảy giữa các khung (số giây)
    """
    frame_length = int(round(frame_size * sample_rate))
    frame_step = int(round(frame_stride * sample_rate))
    signal_length = len(signal_in)
    num_frames = int(np.ceil(float(np.abs(signal_length - frame_length)) / frame_step)) + 1

    pad_signal_length = num_frames * frame_step + frame_length
    # Zero-padding nếu cần
    z = np.zeros((pad_signal_length - signal_length))
    pad_signal = np.append(signal_in, z)

    indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + \
              np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
    frames = pad_signal[indices.astype(np.int32, copy=False)]
    return frames

def windowing(frames):
    """
    Bước 3: Áp dụng cửa sổ Hamming cho mỗi khung
    """
    frame_length = frames.shape[1]
    hamming = np.hamming(frame_length)
    windowed_frames = frames * hamming
    return windowed_frames

def fft_frames(frames, NFFT=512):
    """
    Bước 4: Tính FFT cho mỗi khung
    """
    mag_frames = np.absolute(np.fft.rfft(frames, NFFT))
    return mag_frames

def power_spectrum(mag_frames, NFFT=512):
    """
    Bước 4.1: Tính phổ công suất của mỗi khung
    """
    return (1.0 / NFFT) * (mag_frames ** 2)

def mel_filterbank(sample_rate, NFFT, nfilt=26, low_freq=0, high_freq=None):
    """
    Bước 5: Tạo Mel filterbank
    """
    if high_freq is None:
        high_freq = sample_rate / 2

    # Chuyển Hz sang Mel
    low_mel = 2595 * np.log10(1 + low_freq / 700.0)
    high_mel = 2595 * np.log10(1 + high_freq / 700.0)
    mel_points = np.linspace(low_mel, high_mel, nfilt + 2)
    # Chuyển lại từ Mel sang Hz
    hz_points = 700 * (10**(mel_points / 2595) - 1)
    bin = np.floor((NFFT + 1) * hz_points / sample_rate)

    fbank = np.zeros((nfilt, int(np.floor(NFFT / 2 + 1))))
    for m in range(1, nfilt + 1):
        f_m_minus = int(bin[m - 1])   # giới hạn trái
        f_m = int(bin[m])             # trung tâm
        f_m_plus = int(bin[m + 1])    # giới hạn phải

        for k in range(f_m_minus, f_m):
            fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
        for k in range(f_m, f_m_plus):
            fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])
    return fbank

# Hàm tính MFCC
def compute_mfcc(signal_in, sample_rate, frame_size=0.025, frame_stride=0.01, 
                 pre_emph=0.97, NFFT=512, nfilt=26, num_ceps=13):
    # Tính MFCCs theo các bước trong code bạn đã cung cấp
    emphasized_signal = pre_emphasis(signal_in, pre_emph)
    frames = framing(emphasized_signal, sample_rate, frame_size, frame_stride)
    windowed_frames = windowing(frames)
    mag_frames = fft_frames(windowed_frames, NFFT)
    pow_frames = power_spectrum(mag_frames, NFFT)
    fbank = mel_filterbank(sample_rate, NFFT, nfilt)
    filter_banks = np.dot(pow_frames, fbank.T)
    filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks)
    log_fbank = np.log(filter_banks)
    mfccs = fftpack.dct(log_fbank, type=2, axis=1, norm='ortho')[:, :num_ceps]
    return mfccs

# Hàm load dữ liệu từ các thư mục
def load_data_from_directory(directory, sample_rate=22050):
    labels = []
    features = []
    for label in ['Queen', 'NonQueen']:
        path = os.path.join(directory, label)
        for file in os.listdir(path):
            file_path = os.path.join(path, file)
            signal, sr = librosa.load(file_path, sr=sample_rate)
            mfcc = compute_mfcc(signal, sr)
            mfcc_mean = np.mean(mfcc, axis=0)  # Tính trung bình của MFCCs để giảm chiều
            features.append(mfcc_mean)
            labels.append(label)
    return np.array(features), np.array(labels)

# Load training, validation và testing data
train_features, train_labels = load_data_from_directory('E:/Queenless/20k_audio_splitted_dataset/train', sample_rate=22050)
val_features, val_labels = load_data_from_directory('E:/Queenless/20k_audio_splitted_dataset/val', sample_rate=22050)
test_features, test_labels = load_data_from_directory('E:/Queenless/20k_audio_splitted_dataset/test', sample_rate=22050)

# Chuyển đổi labels thành số để train với RandomForest
label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train_labels)
val_labels = label_encoder.transform(val_labels)
test_labels = label_encoder.transform(test_labels)

# Huấn luyện mô hình Random Forest
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(train_features, train_labels)

# Đánh giá mô hình trên bộ validation
val_predictions = rf_classifier.predict(val_features)
val_accuracy = accuracy_score(val_labels, val_predictions)
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions = rf_classifier.predict(test_features)
test_accuracy_mfcc_rf = accuracy_score(test_labels, test_predictions)
print(f"Test Accuracy: {test_accuracy_mfcc_rf * 100:.2f}%")



Validation Accuracy: 91.59%
Test Accuracy: 92.00%


MFFCS - SVM

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC

scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)
val_features_scaled = scaler.transform(val_features)
test_features_scaled = scaler.transform(test_features)

# Thiết lập phạm vi tham số
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.1, 1],
    'kernel': ['rbf']
}

# Khởi tạo GridSearchCV
grid_search = GridSearchCV(SVC(), param_grid, refit=True, verbose=3)

# Huấn luyện GridSearchCV
grid_search.fit(train_features_scaled, train_labels)

# In ra tham số tốt nhất
print("Best parameters found: ", grid_search.best_params_)

# Đánh giá mô hình với các tham số tối ưu
best_svm_rbf_classifier = grid_search.best_estimator_

# Đánh giá mô hình trên bộ validation
val_predictions_best_svm = best_svm_rbf_classifier.predict(val_features_scaled)
val_accuracy_best_svm = accuracy_score(val_labels, val_predictions_best_svm)
print(f"Validation Accuracy (Best SVM with RBF Kernel): {val_accuracy_best_svm * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_best_svm = best_svm_rbf_classifier.predict(test_features_scaled)
test_accuracy_best_svm = accuracy_score(test_labels, test_predictions_best_svm)
print(f"Test Accuracy (Best SVM with RBF Kernel): {test_accuracy_best_svm * 100:.2f}%")


Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV 1/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.561 total time=   4.8s
[CV 2/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.847 total time=   6.2s
[CV 3/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.717 total time=   5.7s
[CV 4/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.667 total time=   6.3s
[CV 5/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.716 total time=   5.7s
[CV 1/5] END .....C=0.1, gamma=auto, kernel=rbf;, score=0.560 total time=   4.9s
[CV 2/5] END .....C=0.1, gamma=auto, kernel=rbf;, score=0.849 total time=   6.3s
[CV 3/5] END .....C=0.1, gamma=auto, kernel=rbf;, score=0.717 total time=   5.6s
[CV 4/5] END .....C=0.1, gamma=auto, kernel=rbf;, score=0.667 total time=   6.4s
[CV 5/5] END .....C=0.1, gamma=auto, kernel=rbf;, score=0.710 total time=   5.7s
[CV 1/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.569 total time=   4.7s
[CV 2/5] END ......C=0.1, gamma=0.1, kernel=rbf;

In [33]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Khởi tạo mô hình SVM với kernel RBF
svm_rbf_classifier = SVC(C=10, kernel='rbf', gamma=1)

# Huấn luyện mô hình SVM
svm_rbf_classifier.fit(train_features, train_labels)

# Đánh giá mô hình trên bộ validation
val_predictions_svm_rbf = svm_rbf_classifier.predict(val_features)
val_accuracy_svm_rbf = accuracy_score(val_labels, val_predictions_svm_rbf)
print(f"Validation Accuracy (SVM with RBF Kernel): {val_accuracy_svm_rbf * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_svm_rbf = svm_rbf_classifier.predict(test_features)
test_accuracy_mfcc_svm = accuracy_score(test_labels, test_predictions_svm_rbf)
print(f"Test Accuracy (SVM with RBF Kernel): {test_accuracy_mfcc_svm * 100:.2f}%")


Validation Accuracy (SVM with RBF Kernel): 93.63%
Test Accuracy (SVM with RBF Kernel): 94.00%


MFCC - LR

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Khởi tạo mô hình Logistic Regression
lr_classifier = LogisticRegression(random_state=42)

# Huấn luyện mô hình Logistic Regression với dữ liệu đã chuẩn hóa
lr_classifier.fit(train_features_scaled, train_labels)

# Đánh giá mô hình trên bộ validation
val_predictions_lr = lr_classifier.predict(val_features_scaled)
val_accuracy_lr = accuracy_score(val_labels, val_predictions_lr)
print(f"Validation Accuracy (Logistic Regression): {val_accuracy_lr * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_lr = lr_classifier.predict(test_features_scaled)
test_accuracy_mfcc_lr = accuracy_score(test_labels, test_predictions_lr)
print(f"Test Accuracy (Logistic Regression): {test_accuracy_mfcc_lr * 100:.2f}%")

Validation Accuracy (Logistic Regression): 71.79%
Test Accuracy (Logistic Regression): 73.28%


MFCC - ET

In [48]:
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.metrics import accuracy_score

# Khởi tạo mô hình Extra Trees với các tham số mặc định
et_classifier = ExtraTreesClassifier(n_estimators=100, random_state=42)

# Huấn luyện mô hình Extra Trees với dữ liệu đã chuẩn hóa
et_classifier.fit(train_features_scaled, train_labels)

# Đánh giá mô hình trên bộ validation
val_predictions_et = et_classifier.predict(val_features_scaled)
val_accuracy_et = accuracy_score(val_labels, val_predictions_et)
print(f"Validation Accuracy (Extra Trees): {val_accuracy_et * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_et = et_classifier.predict(test_features_scaled)
test_accuracy_mfcc_et = accuracy_score(test_labels, test_predictions_et)
print(f"Test Accuracy (Extra Trees): {test_accuracy_mfcc_et * 100:.2f}%")


Validation Accuracy (Extra Trees): 91.94%
Test Accuracy (Extra Trees): 92.53%


MFFC - KNN

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

knn_classifier = KNeighborsClassifier(n_neighbors=5)

# Huấn luyện mô hình KNN với dữ liệu đã chuẩn hóa
knn_classifier.fit(train_features_scaled, train_labels)

# Đánh giá mô hình trên bộ validation
val_predictions_knn = knn_classifier.predict(val_features_scaled)
val_accuracy_knn = accuracy_score(val_labels, val_predictions_knn)
print(f"Validation Accuracy (KNN): {val_accuracy_knn * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_knn = knn_classifier.predict(test_features_scaled)
test_accuracy_mfcc_knn = accuracy_score(test_labels, test_predictions_knn)
print(f"Test Accuracy (KNN): {test_accuracy_mfcc_knn * 100:.2f}%")


Validation Accuracy (KNN): 91.44%
Test Accuracy (KNN): 92.75%


# FFT with pre-emphasis, framing, windowing

In [1]:
import numpy as np
import librosa

def pre_emphasis(signal_in, pre_emph=0.97):
    """
    Áp dụng pre-emphasis để nhấn mạnh các tần số cao.
    """
    return np.append(signal_in[0], signal_in[1:] - pre_emph * signal_in[:-1])

def framing(signal_in, sample_rate, frame_size=0.025, frame_stride=0.01):
    """
    Chia tín hiệu thành các frame có kích thước và bước nhảy xác định.
    """
    frame_length = int(round(frame_size * sample_rate))
    frame_step = int(round(frame_stride * sample_rate))
    signal_length = len(signal_in)
    num_frames = int(np.ceil(np.abs(signal_length - frame_length) / frame_step)) + 1

    pad_signal_length = num_frames * frame_step + frame_length
    z = np.zeros((pad_signal_length - signal_length))
    pad_signal = np.append(signal_in, z)

    indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + \
              np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
    frames = pad_signal[indices.astype(np.int32, copy=False)]
    return frames

def windowing(frames):
    """
    Áp dụng cửa sổ Hamming cho mỗi frame.
    """
    frame_length = frames.shape[1]
    hamming = np.hamming(frame_length)
    return frames * hamming

def fft_frames(frames, NFFT=512):
    """
    Tính FFT cho mỗi frame và lấy giá trị magnitude.
    """
    return np.absolute(np.fft.rfft(frames, NFFT))

def compute_fft_features(signal_in, sample_rate, frame_size=0.025, frame_stride=0.01, NFFT=512, apply_log=True):
    """
    Tính toán đặc trưng FFT cho tín hiệu âm thanh:
      - Pre-emphasis, Framing, Windowing.
      - Tính FFT cho từng frame và lấy giá trị magnitude.
      - Trung bình các frame để có vector đặc trưng ổn định.
      - (Tùy chọn) Áp dụng log để giảm phạm vi giá trị.
      
    Trả về: vector đặc trưng có kích thước (NFFT/2+1,).
    """
    emphasized_signal = pre_emphasis(signal_in)
    frames = framing(emphasized_signal, sample_rate, frame_size, frame_stride)
    windowed_frames = windowing(frames)
    mag_frames = fft_frames(windowed_frames, NFFT)
    fft_feature = np.mean(mag_frames, axis=0)  # Trung bình theo các frame
    if apply_log:
        fft_feature = np.log(fft_feature + 1e-8)  # Thêm epsilon để tránh log(0)
    return fft_feature


In [2]:
import os

def load_fft_features_from_directory(directory, sample_rate=22050, NFFT=512):
    """
    Duyệt qua các file âm thanh trong thư mục và tính đặc trưng FFT cho mỗi file.
    Giả sử trong directory có hai thư mục con: 'Queen' và 'NonQueen'.
    
    Trả về:
      - features: mảng đặc trưng (mỗi đặc trưng có kích thước NFFT/2+1)
      - labels: nhãn tương ứng.
    """
    labels = []
    features = []
    for label in ['Queen', 'NonQueen']:
        path = os.path.join(directory, label)
        for file in os.listdir(path):
            file_path = os.path.join(path, file)
            signal, sr = librosa.load(file_path, sr=sample_rate)
            fft_feature = compute_fft_features(signal, sr, NFFT=NFFT)
            features.append(fft_feature)
            labels.append(label)
    return np.array(features), np.array(labels)


In [3]:
train_dir = r'E:\Queenless\20k_audio_splitted_dataset\train'
val_dir   = r'E:\Queenless\20k_audio_splitted_dataset\val'
test_dir  = r'E:\Queenless\20k_audio_splitted_dataset\test'

train_features, train_labels = load_fft_features_from_directory(train_dir, sample_rate=22050, NFFT=512)
val_features, val_labels = load_fft_features_from_directory(val_dir, sample_rate=22050, NFFT=512)
test_features, test_labels = load_fft_features_from_directory(test_dir, sample_rate=22050, NFFT=512)


In [4]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Chuyển đổi nhãn từ chuỗi sang số
label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train_labels)
val_labels = label_encoder.transform(val_labels)
test_labels = label_encoder.transform(test_labels)

# Chuẩn hóa đặc trưng (chỉ dùng cho các mô hình cần)
scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)
val_features_scaled = scaler.transform(val_features)
test_features_scaled = scaler.transform(test_features)


FFT - KNN

In [5]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(train_features_scaled, train_labels)

val_pred_knn = knn_classifier.predict(val_features_scaled)
test_accuracy_fft_knn = knn_classifier.predict(test_features_scaled)
print(f"KNN (FFT features) - Validation Accuracy: {accuracy_score(val_labels, val_pred_knn)*100:.2f}%")
print(f"KNN (FFT features) - Test Accuracy: {accuracy_score(test_labels, test_accuracy_fft_knn)*100:.2f}%")

KNN (FFT features) - Validation Accuracy: 93.03%
KNN (FFT features) - Test Accuracy: 94.25%


FFT - SVM

In [11]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.1, 1],
    'kernel': ['rbf']
}

grid_search = GridSearchCV(SVC(), param_grid, refit=True, verbose=3)
grid_search.fit(train_features_scaled, train_labels)
print("Best parameters (SVM with GridSearchCV):", grid_search.best_params_)

best_svm = grid_search.best_estimator_
val_pred_svm = best_svm.predict(val_features_scaled)
test_accuracy_fft_svm = best_svm.predict(test_features_scaled)
print(f"SVM (GridSearchCV, FFT features) - Validation Accuracy: {accuracy_score(val_labels, val_pred_svm)*100:.2f}%")
print(f"SVM (GridSearchCV, FFT features) - Test Accuracy: {accuracy_score(test_labels, test_accuracy_fft_svm)*100:.2f}%")


Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV 1/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.568 total time=  18.5s
[CV 2/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.819 total time=  20.8s
[CV 3/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.718 total time=  21.7s
[CV 4/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.595 total time=  20.3s
[CV 5/5] END ....C=0.1, gamma=scale, kernel=rbf;, score=0.767 total time=  19.7s
[CV 1/5] END .....C=0.1, gamma=auto, kernel=rbf;, score=0.571 total time=  17.8s
[CV 2/5] END .....C=0.1, gamma=auto, kernel=rbf;, score=0.822 total time=  20.6s
[CV 3/5] END .....C=0.1, gamma=auto, kernel=rbf;, score=0.716 total time=  19.9s
[CV 4/5] END .....C=0.1, gamma=auto, kernel=rbf;, score=0.594 total time=  19.8s
[CV 5/5] END .....C=0.1, gamma=auto, kernel=rbf;, score=0.766 total time=  19.9s
[CV 1/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.601 total time=  14.6s
[CV 2/5] END ......C=0.1, gamma=0.1, kernel=rbf;

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

param_grid = {
    'C': [100],
    'gamma': [0.1],
    'kernel': ['rbf']
}

grid_search = GridSearchCV(SVC(), param_grid, refit=True, verbose=3)
grid_search.fit(train_features_scaled, train_labels)

print("Best parameters (SVM with GridSearchCV):", grid_search.best_params_)

best_svm = grid_search.best_estimator_
val_pred_svm = best_svm.predict(val_features_scaled)
test_pred_svm = best_svm.predict(test_features_scaled)

print(f"SVM (GridSearchCV, FFT features) - Validation Accuracy: {accuracy_score(val_labels, val_pred_svm)*100:.2f}%")
print(f"SVM (GridSearchCV, FFT features) - Test Accuracy: {accuracy_score(test_labels, test_pred_svm)*100:.2f}%")


FFT - LR

In [12]:
from sklearn.linear_model import LogisticRegression

lr_classifier = LogisticRegression(random_state=42, max_iter=1000)
lr_classifier.fit(train_features_scaled, train_labels)

val_pred_lr = lr_classifier.predict(val_features_scaled)
test_accuracy_fft_lr = lr_classifier.predict(test_features_scaled)
print(f"Logistic Regression (FFT features) - Validation Accuracy: {accuracy_score(val_labels, val_pred_lr)*100:.2f}%")
print(f"Logistic Regression (FFT features) - Test Accuracy: {accuracy_score(test_labels, test_accuracy_fft_lr)*100:.2f}%")


Logistic Regression (FFT features) - Validation Accuracy: 85.62%
Logistic Regression (FFT features) - Test Accuracy: 85.97%


FFT - RF

In [13]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(train_features, train_labels)

val_pred_rf = rf_classifier.predict(val_features)
test_accuracy_fft_rf = rf_classifier.predict(test_features)
print(f"Random Forest (FFT features) - Validation Accuracy: {accuracy_score(val_labels, val_pred_rf)*100:.2f}%")
print(f"Random Forest (FFT features) - Test Accuracy: {accuracy_score(test_labels, test_accuracy_fft_rf)*100:.2f}%")


Random Forest (FFT features) - Validation Accuracy: 93.58%
Random Forest (FFT features) - Test Accuracy: 94.67%


FFT - ET

In [14]:
from sklearn.ensemble import ExtraTreesClassifier

et_classifier = ExtraTreesClassifier(n_estimators=100, random_state=42)
et_classifier.fit(train_features_scaled, train_labels)

val_pred_et = et_classifier.predict(val_features_scaled)
test_accuracy_fft_et = et_classifier.predict(test_features_scaled)
print(f"Extra Trees (FFT features) - Validation Accuracy: {accuracy_score(val_labels, val_pred_et)*100:.2f}%")
print(f"Extra Trees (FFT features) - Test Accuracy: {accuracy_score(test_labels, test_accuracy_fft_et)*100:.2f}%")


Extra Trees (FFT features) - Validation Accuracy: 92.74%
Extra Trees (FFT features) - Test Accuracy: 93.95%


# FFT 1D

In [14]:
import numpy as np
import librosa
import os
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Hàm FFT 1D
def fft_1d(x):
    N = len(x)
    if N <= 1:
        return x
    even = fft_1d(x[::2])  
    odd = fft_1d(x[1::2])  
    factor = np.exp(-2j * np.pi * np.arange(N) / N)
    X = np.zeros(N, dtype=complex)
    for k in range(N // 2):
        X[k] = even[k] + factor[k] * odd[k]
        X[k + N // 2] = even[k] - factor[k] * odd[k]
    return X

# Hàm tải và tính đặc trưng FFT cho dữ liệu trong thư mục
def load_fft_features_from_directory(directory, sample_rate=22050, NFFT=512):
    labels = []
    features = []
    for label in ['Queen', 'NonQueen']:
        path = os.path.join(directory, label)
        for file in os.listdir(path):
            file_path = os.path.join(path, file)
            signal, sr = librosa.load(file_path, sr=sample_rate)
            fft_feature = fft_1d(signal[:NFFT])
            features.append(fft_feature)
            labels.append(label)
    return np.array(features), np.array(labels)

In [15]:
def preprocess_features(features):
    real_part = features.real
    imag_part = features.imag
    scaler = StandardScaler()
    real_scaled = scaler.fit_transform(real_part)
    imag_scaled = scaler.fit_transform(imag_part)
    return np.stack((real_scaled, imag_scaled), axis=-1)

# Đọc dữ liệu và tính đặc trưng FFT
train_dir = r'E:\Queenless\20k_audio_splitted_dataset\train'
val_dir = r'E:\Queenless\20k_audio_splitted_dataset\val'
test_dir = r'E:\Queenless\20k_audio_splitted_dataset\test'

train_features, train_labels = load_fft_features_from_directory(train_dir)
val_features, val_labels = load_fft_features_from_directory(val_dir)
test_features, test_labels = load_fft_features_from_directory(test_dir)

In [16]:
# Chuyển đổi nhãn từ chuỗi sang số
label_encoder = LabelEncoder()
train_labels = label_encoder.fit_transform(train_labels)
val_labels = label_encoder.transform(val_labels)
test_labels = label_encoder.transform(test_labels)

# Chuẩn hóa đặc trưng
train_features_scaled = preprocess_features(train_features)
val_features_scaled = preprocess_features(val_features)
test_features_scaled = preprocess_features(test_features)

FFT 1D - KNN

In [21]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# Flatten features to make them 2D (samples, features)
train_features_flat = train_features_scaled.reshape(train_features_scaled.shape[0], -1)
val_features_flat = val_features_scaled.reshape(val_features_scaled.shape[0], -1)
test_features_flat = test_features_scaled.reshape(test_features_scaled.shape[0], -1)

knn_classifier = KNeighborsClassifier(n_neighbors=5)

knn_classifier.fit(train_features_flat, train_labels)

val_pred_knn = knn_classifier.predict(val_features_flat)

test_accuracy_fft1d_knn = knn_classifier.predict(test_features_flat)

print(f"KNN (FFT features) - Validation Accuracy: {accuracy_score(val_labels, val_pred_knn)*100:.2f}%")
print(f"KNN (FFT features) - Test Accuracy: {accuracy_score(test_labels, test_accuracy_fft1d_knn)*100:.2f}%")


KNN (FFT features) - Validation Accuracy: 53.53%
KNN (FFT features) - Test Accuracy: 52.35%


FFT 1D - SVM

In [24]:
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

svm = SVC(C=100, gamma=0.1, kernel='rbf')
svm.fit(train_features_flat, train_labels)

val_pred_svm = svm.predict(val_features_flat)
test_accuracy_fft1d_svm = svm.predict(test_features_flat)

print(f"SVM (Manual Parameters) - Validation Accuracy: {accuracy_score(val_labels, val_pred_svm)*100:.2f}%")
print(f"SVM (Manual Parameters) - Test Accuracy: {accuracy_score(test_labels, test_accuracy_fft1d_svm)*100:.2f}%")


SVM (Manual Parameters) - Validation Accuracy: 57.86%
SVM (Manual Parameters) - Test Accuracy: 56.23%


FFT 1D - RF

In [25]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(train_features_flat, train_labels)

val_pred_rf = rf_classifier.predict(val_features_flat)
test_accuracy_fft1d_rf = rf_classifier.predict(test_features_flat)
print(f"Random Forest (FFT features) - Validation Accuracy: {accuracy_score(val_labels, val_pred_rf)*100:.2f}%")
print(f"Random Forest (FFT features) - Test Accuracy: {accuracy_score(test_labels, test_accuracy_fft1d_rf)*100:.2f}%")


Random Forest (FFT features) - Validation Accuracy: 64.28%
Random Forest (FFT features) - Test Accuracy: 67.70%


FFT 1D - ET

In [26]:
from sklearn.ensemble import ExtraTreesClassifier

et_classifier = ExtraTreesClassifier(n_estimators=100, random_state=42)
et_classifier.fit(train_features_flat, train_labels)

val_pred_et = et_classifier.predict(val_features_flat)
test_accuracy_fft1d_et = et_classifier.predict(test_features_flat)
print(f"Extra Trees (FFT features) - Validation Accuracy: {accuracy_score(val_labels, val_pred_et)*100:.2f}%")
print(f"Extra Trees (FFT features) - Test Accuracy: {accuracy_score(test_labels, test_accuracy_fft1d_et)*100:.2f}%")


Extra Trees (FFT features) - Validation Accuracy: 64.38%
Extra Trees (FFT features) - Test Accuracy: 63.45%


# STFT

In [15]:
import numpy as np
import librosa

def compute_stft_features(signal_in, sample_rate, n_fft=2048, hop_length=512, apply_log=True):
    """
    Tính đặc trưng STFT của tín hiệu:
      - Tính STFT bằng librosa.stft và lấy giá trị magnitude.
      - (Tùy chọn) Áp dụng log scaling.
      - Trung bình theo trục thời gian (axis=1) để thu được vector đặc trưng có kích thước (n_fft/2+1,).
    """
    stft_matrix = np.abs(librosa.stft(signal_in, n_fft=n_fft, hop_length=hop_length))
    if apply_log:
        stft_matrix = np.log(stft_matrix + 1e-8)  # Thêm epsilon để tránh log(0)
    # Trung bình theo trục thời gian (tính trung bình các cột)
    features = np.mean(stft_matrix, axis=1)
    return features

def load_stft_features_from_directory(directory, sample_rate=22050, n_fft=2048, hop_length=512):
    """
    Duyệt qua các file âm thanh trong thư mục và tính đặc trưng STFT cho mỗi file.
    Giả sử trong directory có 2 thư mục con: 'Queen' và 'NonQueen'.
    
    Trả về:
      - features: mảng các vector đặc trưng (mỗi vector có kích thước n_fft/2+1)
      - labels: nhãn tương ứng.
    """
    import os
    features = []
    labels = []
    for label in ['Queen', 'NonQueen']:
        path = os.path.join(directory, label)
        for file in os.listdir(path):
            file_path = os.path.join(path, file)
            signal, sr = librosa.load(file_path, sr=sample_rate)
            stft_feature = compute_stft_features(signal, sr, n_fft=n_fft, hop_length=hop_length, apply_log=True)
            features.append(stft_feature)
            labels.append(label)
    return np.array(features), np.array(labels)


In [16]:
train_features_stft, train_labels_stft = load_stft_features_from_directory(train_dir, sample_rate=22050, n_fft=2048, hop_length=512)
val_features_stft, val_labels_stft     = load_stft_features_from_directory(val_dir, sample_rate=22050, n_fft=2048, hop_length=512)
test_features_stft, test_labels_stft   = load_stft_features_from_directory(test_dir, sample_rate=22050, n_fft=2048, hop_length=512)


In [17]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Chuyển đổi nhãn từ chuỗi sang số
label_encoder_stft = LabelEncoder()
train_labels_stft = label_encoder_stft.fit_transform(train_labels_stft)
val_labels_stft   = label_encoder_stft.transform(val_labels_stft)
test_labels_stft  = label_encoder_stft.transform(test_labels_stft)

# Chuẩn hóa đặc trưng (STFT) cho các mô hình cần đặc trưng đã chuẩn hóa
scaler_stft = StandardScaler()
train_features_stft_scaled = scaler_stft.fit_transform(train_features_stft)
val_features_stft_scaled   = scaler_stft.transform(val_features_stft)
test_features_stft_scaled  = scaler_stft.transform(test_features_stft)


STFT - KNN

In [18]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

knn_classifier_stft = KNeighborsClassifier(n_neighbors=25)
knn_classifier_stft.fit(train_features_stft_scaled, train_labels_stft)
val_pred_stft_knn = knn_classifier_stft.predict(val_features_stft_scaled)
test_pred_stft_knn = knn_classifier_stft.predict(test_features_stft_scaled)

test_accuracy_stft_knn = accuracy_score(test_labels_stft, test_pred_stft_knn)
print(f"KNN (STFT) - Test Accuracy: {test_accuracy_stft_knn*100:.2f}%")


KNN (STFT) - Test Accuracy: 94.50%


STFT - SVM

In [67]:
from sklearn.svm import SVC

svm_classifier_stft = SVC(C=100, kernel='rbf', gamma=1, random_state=42)
svm_classifier_stft.fit(train_features_stft_scaled, train_labels_stft)
val_pred_stft_svm = svm_classifier_stft.predict(val_features_stft_scaled)
test_pred_stft_svm = svm_classifier_stft.predict(test_features_stft_scaled)

test_accuracy_stft_svm = accuracy_score(test_labels_stft, test_pred_stft_svm)
print(f"SVM (STFT) - Test Accuracy: {test_accuracy_stft_svm*100:.2f}%")

SVM (STFT) - Test Accuracy: 56.62%


STFT - LR

In [20]:
from sklearn.linear_model import LogisticRegression

lr_classifier_stft = LogisticRegression(random_state=42, max_iter=1000)
lr_classifier_stft.fit(train_features_stft_scaled, train_labels_stft)
val_pred_stft_lr = lr_classifier_stft.predict(val_features_stft_scaled)
test_pred_stft_lr = lr_classifier_stft.predict(test_features_stft_scaled)

test_accuracy_stft_lr = accuracy_score(test_labels_stft, test_pred_stft_lr)
print(f"Logistic Regression (STFT) - Test Accuracy: {test_accuracy_stft_lr*100:.2f}%")

Logistic Regression (STFT) - Test Accuracy: 89.50%


STFT - RF

In [21]:
from sklearn.ensemble import RandomForestClassifier

rf_classifier_stft = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier_stft.fit(train_features_stft, train_labels_stft)
val_pred_stft_rf = rf_classifier_stft.predict(val_features_stft)
test_pred_stft_rf = rf_classifier_stft.predict(test_features_stft)

test_accuracy_stft_rf = accuracy_score(test_labels_stft, test_pred_stft_rf)
print(f"Random Forest (STFT) - Test Accuracy: {test_accuracy_stft_rf*100:.2f}%")


Random Forest (STFT) - Test Accuracy: 94.65%


STFT - ET

In [22]:
from sklearn.ensemble import ExtraTreesClassifier

et_classifier_stft = ExtraTreesClassifier(n_estimators=100, random_state=42)
et_classifier_stft.fit(train_features_stft_scaled, train_labels_stft)
val_pred_stft_et = et_classifier_stft.predict(val_features_stft_scaled)
test_pred_stft_et = et_classifier_stft.predict(test_features_stft_scaled)

test_accuracy_stft_et = accuracy_score(test_labels_stft, test_pred_stft_et)
print(f"Extra Trees (STFT) - Test Accuracy: {test_accuracy_stft_et*100:.2f}%")


Extra Trees (STFT) - Test Accuracy: 94.10%


# CQT

In [23]:
import numpy as np
import librosa

def compute_cqt_features(signal_in, sample_rate, hop_length=512, fmin=None, 
                         n_bins=84, bins_per_octave=12, apply_log=True):
    """
    Tính đặc trưng CQT của tín hiệu:
      - Sử dụng librosa.cqt để tính ma trận CQT, lấy giá trị magnitude.
      - (Tùy chọn) Áp dụng log scaling để giảm phạm vi giá trị.
      - Trung bình theo trục thời gian (axis=1) để thu được vector đặc trưng có kích thước (n_bins,).
      
    Nếu fmin không được chỉ định, librosa sẽ tự động sử dụng fmin mặc định.
    """
    cqt_matrix = np.abs(librosa.cqt(signal_in, sr=sample_rate, hop_length=hop_length,
                                    fmin=fmin, n_bins=n_bins, bins_per_octave=bins_per_octave))
    if apply_log:
        cqt_matrix = np.log(cqt_matrix + 1e-8)
    # Trung bình theo trục thời gian (tính trung bình các cột)
    features = np.mean(cqt_matrix, axis=1)
    return features

def load_cqt_features_from_directory(directory, sample_rate=22050, 
                                     hop_length=512, fmin=None, n_bins=84, bins_per_octave=12):
    """
    Duyệt qua các file âm thanh trong thư mục và tính đặc trưng CQT cho mỗi file.
    Giả sử trong directory có 2 thư mục con: 'Queen' và 'NonQueen'.
    
    Trả về:
      - features: mảng các vector đặc trưng (mỗi vector có kích thước n_bins)
      - labels: nhãn tương ứng.
    """
    import os
    features = []
    labels = []
    for label in ['Queen', 'NonQueen']:
        path = os.path.join(directory, label)
        for file in os.listdir(path):
            file_path = os.path.join(path, file)
            signal, sr = librosa.load(file_path, sr=sample_rate)
            cqt_feature = compute_cqt_features(signal, sr, hop_length=hop_length, 
                                               fmin=fmin, n_bins=n_bins, bins_per_octave=bins_per_octave, 
                                               apply_log=True)
            features.append(cqt_feature)
            labels.append(label)
    return np.array(features), np.array(labels)


In [24]:
train_features_cqt, train_labels_cqt = load_cqt_features_from_directory(train_dir, sample_rate=22050,
                                                                        hop_length=512, fmin=None, n_bins=84, bins_per_octave=12)
val_features_cqt, val_labels_cqt     = load_cqt_features_from_directory(val_dir, sample_rate=22050,
                                                                        hop_length=512, fmin=None, n_bins=84, bins_per_octave=12)
test_features_cqt, test_labels_cqt   = load_cqt_features_from_directory(test_dir, sample_rate=22050,
                                                                        hop_length=512, fmin=None, n_bins=84, bins_per_octave=12)


In [25]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Chuyển đổi nhãn từ chuỗi sang số
label_encoder_cqt = LabelEncoder()
train_labels_cqt = label_encoder_cqt.fit_transform(train_labels_cqt)
val_labels_cqt   = label_encoder_cqt.transform(val_labels_cqt)
test_labels_cqt  = label_encoder_cqt.transform(test_labels_cqt)

# Chuẩn hóa đặc trưng (CQT) cho các mô hình cần đặc trưng đã chuẩn hóa
scaler_cqt = StandardScaler()
train_features_cqt_scaled = scaler_cqt.fit_transform(train_features_cqt)
val_features_cqt_scaled   = scaler_cqt.transform(val_features_cqt)
test_features_cqt_scaled  = scaler_cqt.transform(test_features_cqt)


CQT - KNN

In [26]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

knn_classifier_cqt = KNeighborsClassifier(n_neighbors=5)
knn_classifier_cqt.fit(train_features_cqt_scaled, train_labels_cqt)
val_pred_cqt_knn = knn_classifier_cqt.predict(val_features_cqt_scaled)
test_pred_cqt_knn = knn_classifier_cqt.predict(test_features_cqt_scaled)

test_accuracy_cqt_knn = accuracy_score(test_labels_cqt, test_pred_cqt_knn)
print(f"KNN (CQT) - Test Accuracy: {test_accuracy_cqt_knn*100:.2f}%")


KNN (CQT) - Test Accuracy: 95.83%


CQT - SVM

In [27]:
from sklearn.svm import SVC

svm_classifier_cqt = SVC(C=10, kernel='rbf', gamma=1, random_state=42)
svm_classifier_cqt.fit(train_features_cqt_scaled, train_labels_cqt)
val_pred_cqt_svm = svm_classifier_cqt.predict(val_features_cqt_scaled)
test_pred_cqt_svm = svm_classifier_cqt.predict(test_features_cqt_scaled)

test_accuracy_cqt_svm = accuracy_score(test_labels_cqt, test_pred_cqt_svm)
print(f"SVM (CQT) - Test Accuracy: {test_accuracy_cqt_svm*100:.2f}%")


SVM (CQT) - Test Accuracy: 80.17%


CQT - LR

In [28]:
from sklearn.linear_model import LogisticRegression

lr_classifier_cqt = LogisticRegression(random_state=42, max_iter=1000)
lr_classifier_cqt.fit(train_features_cqt_scaled, train_labels_cqt)
val_pred_cqt_lr = lr_classifier_cqt.predict(val_features_cqt_scaled)
test_pred_cqt_lr = lr_classifier_cqt.predict(test_features_cqt_scaled)

test_accuracy_cqt_lr = accuracy_score(test_labels_cqt, test_pred_cqt_lr)
print(f"Logistic Regression (CQT) - Test Accuracy: {test_accuracy_cqt_lr*100:.2f}%")


Logistic Regression (CQT) - Test Accuracy: 83.60%


CQT - RF

In [29]:
from sklearn.ensemble import RandomForestClassifier

rf_classifier_cqt = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier_cqt.fit(train_features_cqt, train_labels_cqt)
val_pred_cqt_rf = rf_classifier_cqt.predict(val_features_cqt)
test_pred_cqt_rf = rf_classifier_cqt.predict(test_features_cqt)

test_accuracy_cqt_rf = accuracy_score(test_labels_cqt, test_pred_cqt_rf)
print(f"Random Forest (CQT) - Test Accuracy: {test_accuracy_cqt_rf*100:.2f}%")


Random Forest (CQT) - Test Accuracy: 94.55%


CQT - ET

In [30]:
from sklearn.ensemble import ExtraTreesClassifier

et_classifier_cqt = ExtraTreesClassifier(n_estimators=100, random_state=42)
et_classifier_cqt.fit(train_features_cqt_scaled, train_labels_cqt)
val_pred_cqt_et = et_classifier_cqt.predict(val_features_cqt_scaled)
test_pred_cqt_et = et_classifier_cqt.predict(test_features_cqt_scaled)

test_accuracy_cqt_et = accuracy_score(test_labels_cqt, test_pred_cqt_et)
print(f"Extra Trees (CQT) - Test Accuracy: {test_accuracy_cqt_et*100:.2f}%")


Extra Trees (CQT) - Test Accuracy: 95.08%


# Chroma

In [31]:
import numpy as np
import librosa

def compute_chroma_features(signal_in, sample_rate, n_fft=2048, hop_length=512, n_chroma=12):
    """
    Tính đặc trưng Chroma của tín hiệu:
      - Sử dụng librosa.feature.chroma_stft để tính ma trận chroma.
      - Trung bình theo trục thời gian (axis=1) để thu được vector đặc trưng có kích thước (n_chroma,).
    """
    chroma = librosa.feature.chroma_stft(y=signal_in, sr=sample_rate, n_fft=n_fft, 
                                           hop_length=hop_length, n_chroma=n_chroma)
    # Tính trung bình theo thời gian (các frame)
    feature_vector = np.mean(chroma, axis=1)
    return feature_vector

def load_chroma_features_from_directory(directory, sample_rate=22050, n_fft=2048, hop_length=512, n_chroma=12):
    """
    Duyệt qua các file âm thanh trong thư mục và tính đặc trưng Chroma cho mỗi file.
    Giả sử trong directory có 2 thư mục con: 'Queen' và 'NonQueen'.
    
    Trả về:
      - features: mảng các vector đặc trưng (mỗi vector có kích thước n_chroma)
      - labels: nhãn tương ứng.
    """
    import os
    features = []
    labels = []
    for label in ['Queen', 'NonQueen']:
        path = os.path.join(directory, label)
        for file in os.listdir(path):
            file_path = os.path.join(path, file)
            signal, sr = librosa.load(file_path, sr=sample_rate)
            chroma_feature = compute_chroma_features(signal, sr, n_fft=n_fft, 
                                                     hop_length=hop_length, n_chroma=n_chroma)
            features.append(chroma_feature)
            labels.append(label)
    return np.array(features), np.array(labels)


In [32]:
# Load dữ liệu từ các thư mục đã định nghĩa sử dụng hàm trích xuất Chroma
train_features_chroma, train_labels_chroma = load_chroma_features_from_directory(train_dir, sample_rate=22050, n_fft=2048, hop_length=512, n_chroma=12)
val_features_chroma, val_labels_chroma     = load_chroma_features_from_directory(val_dir, sample_rate=22050, n_fft=2048, hop_length=512, n_chroma=12)
test_features_chroma, test_labels_chroma   = load_chroma_features_from_directory(test_dir, sample_rate=22050, n_fft=2048, hop_length=512, n_chroma=12)


  return pitch_tuning(


In [33]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Chuyển đổi nhãn từ chuỗi sang số
label_encoder_chroma = LabelEncoder()
train_labels_chroma = label_encoder_chroma.fit_transform(train_labels_chroma)
val_labels_chroma   = label_encoder_chroma.transform(val_labels_chroma)
test_labels_chroma  = label_encoder_chroma.transform(test_labels_chroma)

# Chuẩn hóa đặc trưng (Chroma)
scaler_chroma = StandardScaler()
train_features_chroma_scaled = scaler_chroma.fit_transform(train_features_chroma)
val_features_chroma_scaled   = scaler_chroma.transform(val_features_chroma)
test_features_chroma_scaled  = scaler_chroma.transform(test_features_chroma)


Chroma - KNN

In [34]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

knn_classifier_chroma = KNeighborsClassifier(n_neighbors=5)
knn_classifier_chroma.fit(train_features_chroma_scaled, train_labels_chroma)
val_pred_chroma_knn = knn_classifier_chroma.predict(val_features_chroma_scaled)
test_pred_chroma_knn = knn_classifier_chroma.predict(test_features_chroma_scaled)

test_accuracy_chroma_knn = accuracy_score(test_labels_chroma, test_pred_chroma_knn)
print(f"KNN (Chroma) - Test Accuracy: {test_accuracy_chroma_knn*100:.2f}%")


KNN (Chroma) - Test Accuracy: 73.22%


Chroma - SVM

In [35]:
from sklearn.svm import SVC

svm_classifier_chroma = SVC(C=10, kernel='rbf', gamma=1, random_state=42)
svm_classifier_chroma.fit(train_features_chroma_scaled, train_labels_chroma)
val_pred_chroma_svm = svm_classifier_chroma.predict(val_features_chroma_scaled)
test_pred_chroma_svm = svm_classifier_chroma.predict(test_features_chroma_scaled)

test_accuracy_chroma_svm = accuracy_score(test_labels_chroma, test_pred_chroma_svm)
print(f"SVM (Chroma) - Test Accuracy: {test_accuracy_chroma_svm*100:.2f}%")


SVM (Chroma) - Test Accuracy: 76.17%


Chroma - LR

In [36]:
from sklearn.linear_model import LogisticRegression

lr_classifier_chroma = LogisticRegression(random_state=42, max_iter=1000)
lr_classifier_chroma.fit(train_features_chroma_scaled, train_labels_chroma)
val_pred_chroma_lr = lr_classifier_chroma.predict(val_features_chroma_scaled)
test_pred_chroma_lr = lr_classifier_chroma.predict(test_features_chroma_scaled)

test_accuracy_chroma_lr = accuracy_score(test_labels_chroma, test_pred_chroma_lr)
print(f"Logistic Regression (Chroma) - Test Accuracy: {test_accuracy_chroma_lr*100:.2f}%")


Logistic Regression (Chroma) - Test Accuracy: 63.12%


Chroma - RF

In [37]:
from sklearn.ensemble import RandomForestClassifier

rf_classifier_chroma = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier_chroma.fit(train_features_chroma, train_labels_chroma)
val_pred_chroma_rf = rf_classifier_chroma.predict(val_features_chroma)
test_pred_chroma_rf = rf_classifier_chroma.predict(test_features_chroma)

test_accuracy_chroma_rf = accuracy_score(test_labels_chroma, test_pred_chroma_rf)
print(f"Random Forest (Chroma) - Test Accuracy: {test_accuracy_chroma_rf*100:.2f}%")


Random Forest (Chroma) - Test Accuracy: 76.42%


Chroma - ET

In [38]:
from sklearn.ensemble import ExtraTreesClassifier

et_classifier_chroma = ExtraTreesClassifier(n_estimators=100, random_state=42)
et_classifier_chroma.fit(train_features_chroma_scaled, train_labels_chroma)
val_pred_chroma_et = et_classifier_chroma.predict(val_features_chroma_scaled)
test_pred_chroma_et = et_classifier_chroma.predict(test_features_chroma_scaled)

test_accuracy_chroma_et = accuracy_score(test_labels_chroma, test_pred_chroma_et)
print(f"Extra Trees (Chroma) - Test Accuracy: {test_accuracy_chroma_et*100:.2f}%")


Extra Trees (Chroma) - Test Accuracy: 75.42%


# Spectral Contrast

In [39]:
import numpy as np
import librosa

def compute_sc_features(signal_in, sr, n_bands=6, fmin=200, fmax=8000, n_fft=2048, hop_length=512):
    """
    Tính đặc trưng Spectral Contrast của tín hiệu:
      - Tính STFT của tín hiệu.
      - Tạo Mel filter bank với n_bands, fmin, fmax.
      - Áp dụng Mel filter bank lên phổ công suất để thu được mel-spectrogram.
      - Với mỗi dải, tính hiệu giữa giá trị cực đại và cực tiểu.
    
    Trả về: vector SC có kích thước (n_bands,).
    """
    # Tính STFT và lấy phổ công suất (mũi suất bình phương)
    D = np.abs(librosa.stft(signal_in, n_fft=n_fft, hop_length=hop_length))**2
    
    # Tạo Mel filter bank
    mel_filter = librosa.filters.mel(sr=sr, n_fft=n_fft, n_mels=n_bands, fmin=fmin, fmax=fmax)
    
    # Tính mel-spectrogram
    mel_spectrogram = np.dot(mel_filter, D)
    
    # Tính Spectral Contrast: hiệu giữa peak và valley cho mỗi dải
    sc = []
    for i in range(n_bands):
        peak = np.max(mel_spectrogram[i, :])
        valley = np.min(mel_spectrogram[i, :])
        sc.append(peak - valley)
    
    return np.array(sc)

def load_sc_features_from_directory(directory, sample_rate=22050, n_bands=6, fmin=200, fmax=8000, n_fft=2048, hop_length=512):
    """
    Duyệt qua các file âm thanh trong thư mục và tính đặc trưng Spectral Contrast cho mỗi file.
    Giả sử trong directory có 2 thư mục con: 'Queen' và 'NonQueen'.
    
    Trả về:
      - features: mảng các vector đặc trưng (mỗi vector có kích thước n_bands)
      - labels: nhãn tương ứng.
    """
    import os
    features = []
    labels = []
    for label in ['Queen', 'NonQueen']:
        path = os.path.join(directory, label)
        for file in os.listdir(path):
            file_path = os.path.join(path, file)
            signal, sr = librosa.load(file_path, sr=sample_rate)
            sc_feature = compute_sc_features(signal, sr, n_bands=n_bands, fmin=fmin, fmax=fmax, n_fft=n_fft, hop_length=hop_length)
            features.append(sc_feature)
            labels.append(label)
    return np.array(features), np.array(labels)


In [40]:
# Load dữ liệu sử dụng hàm trích xuất Spectral Contrast
train_features_sc, train_labels_sc = load_sc_features_from_directory(train_dir, sample_rate=22050, n_bands=6, fmin=200, fmax=8000, n_fft=2048, hop_length=512)
val_features_sc, val_labels_sc     = load_sc_features_from_directory(val_dir, sample_rate=22050, n_bands=6, fmin=200, fmax=8000, n_fft=2048, hop_length=512)
test_features_sc, test_labels_sc   = load_sc_features_from_directory(test_dir, sample_rate=22050, n_bands=6, fmin=200, fmax=8000, n_fft=2048, hop_length=512)


In [41]:
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Chuyển đổi nhãn từ chuỗi sang số
label_encoder_sc = LabelEncoder()
train_labels_sc = label_encoder_sc.fit_transform(train_labels_sc)
val_labels_sc   = label_encoder_sc.transform(val_labels_sc)
test_labels_sc  = label_encoder_sc.transform(test_labels_sc)

# Chuẩn hóa đặc trưng SC (dữ liệu đã có kích thước (n_bands,) với n_bands=6)
scaler_sc = StandardScaler()
train_features_sc_scaled = scaler_sc.fit_transform(train_features_sc)
val_features_sc_scaled   = scaler_sc.transform(val_features_sc)
test_features_sc_scaled  = scaler_sc.transform(test_features_sc)


SC - KNN

In [68]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

knn_classifier_sc = KNeighborsClassifier(n_neighbors=25)
knn_classifier_sc.fit(train_features_sc_scaled, train_labels_sc)
val_pred_sc_knn = knn_classifier_sc.predict(val_features_sc_scaled)
test_pred_sc_knn = knn_classifier_sc.predict(test_features_sc_scaled)

test_accuracy_sc_knn = accuracy_score(test_labels_sc, test_pred_sc_knn)
print(f"KNN (SC) - Test Accuracy: {test_accuracy_sc_knn*100:.2f}%")


KNN (SC) - Test Accuracy: 65.70%


SC - SVM

In [69]:
from sklearn.svm import SVC

svm_classifier_sc = SVC(C=100, kernel='rbf', gamma=1, random_state=42)
svm_classifier_sc.fit(train_features_sc_scaled, train_labels_sc)
val_pred_sc_svm = svm_classifier_sc.predict(val_features_sc_scaled)
test_pred_sc_svm = svm_classifier_sc.predict(test_features_sc_scaled)

test_accuracy_sc_svm = accuracy_score(test_labels_sc, test_pred_sc_svm)
print(f"SVM (SC) - Test Accuracy: {test_accuracy_sc_svm*100:.2f}%")


SVM (SC) - Test Accuracy: 57.00%


SC - LR

In [44]:
from sklearn.linear_model import LogisticRegression

lr_classifier_sc = LogisticRegression(random_state=42, max_iter=1000)
lr_classifier_sc.fit(train_features_sc_scaled, train_labels_sc)
val_pred_sc_lr = lr_classifier_sc.predict(val_features_sc_scaled)
test_pred_sc_lr = lr_classifier_sc.predict(test_features_sc_scaled)

test_accuracy_sc_lr = accuracy_score(test_labels_sc, test_pred_sc_lr)
print(f"Logistic Regression (SC) - Test Accuracy: {test_accuracy_sc_lr*100:.2f}%")


Logistic Regression (SC) - Test Accuracy: 53.47%


SC - RF

In [45]:
from sklearn.ensemble import RandomForestClassifier

rf_classifier_sc = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier_sc.fit(train_features_sc, train_labels_sc)
val_pred_sc_rf = rf_classifier_sc.predict(val_features_sc)
test_pred_sc_rf = rf_classifier_sc.predict(test_features_sc)

test_accuracy_sc_rf = accuracy_score(test_labels_sc, test_pred_sc_rf)
print(f"Random Forest (SC) - Test Accuracy: {test_accuracy_sc_rf*100:.2f}%")


Random Forest (SC) - Test Accuracy: 67.33%


SC - ET

In [62]:
# Cell 9
from sklearn.ensemble import ExtraTreesClassifier

et_classifier_sc = ExtraTreesClassifier(n_estimators=100, random_state=42)
et_classifier_sc.fit(train_features_sc_scaled, train_labels_sc)
val_pred_sc_et = et_classifier_sc.predict(val_features_sc_scaled)
test_pred_sc_et = et_classifier_sc.predict(test_features_sc_scaled)

test_accuracy_sc_et = accuracy_score(test_labels_sc, test_pred_sc_et)
print(f"Extra Trees (SC) - Test Accuracy: {test_accuracy_sc_et*100:.2f}%")


Extra Trees (SC) - Test Accuracy: 67.27%


# Result

In [72]:
import pandas as pd

features = ["fft", "stft", "mfcc", "cqt", "chroma", "sc"]
models   = ["knn", "svm", "lr", "rf", "et"]

data = []

for feature in features:
    row = {}
    method_name = feature.upper()
    row["Method"] = method_name

    for model in models:
        var_name = f"test_accuracy_{feature}_{model}"
        value = globals().get(var_name, None)
        row[model.upper()] = f"{value * 100:.2f}%"
    data.append(row)

df = pd.DataFrame(data)

df.to_csv("results.csv", index=False)
print(df)


   Method     KNN     SVM      LR      RF      ET
0     FFT  94.25%  97.82%  85.97%  94.67%  93.95%
1    STFT  94.50%  56.62%  89.50%  94.65%  94.10%
2    MFCC  92.75%  94.00%  73.28%  92.00%  92.53%
3     CQT  95.83%  80.17%  83.60%  94.55%  95.08%
4  CHROMA  73.22%  76.17%  63.12%  76.42%  75.42%
5      SC  65.70%  57.00%  53.47%  67.33%  67.27%
