In [2]:
import os
import librosa
import numpy as np
import time
import joblib
from sklearn.metrics import accuracy_score
import librosa.display
import scipy.fftpack as fftpack
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import MinMaxScaler


In [3]:
train_path = 'E:/Queenless/20k_audio_splitted_dataset/train'
val_path = 'E:/Queenless/20k_audio_splitted_dataset/val'
test_path = 'E:/Queenless/20k_audio_splitted_dataset/test'

output_dir = 'E:/Queenless/features'
os.makedirs(output_dir, exist_ok=True)

# MFCCs

In [None]:
def pre_emphasis(signal_in, alpha=0.97):
    """
    Bước 1: Pre-emphasis - Lọc thông cao
    """
    emphasized_signal = np.append(signal_in[0], signal_in[1:] - alpha * signal_in[:-1]) # y(t) = x(t) - alpha*x(t-1)
    return emphasized_signal

def framing(signal_in, sample_rate, frame_size=0.025, frame_stride=0.01):
    """
    Bước 2: Chia khung (Framing)
    - frame_size: kích thước khung (số giây)
    - frame_stride: bước nhảy giữa các khung (số giây)
    """
    frame_length = int(round(frame_size * sample_rate))
    frame_step = int(round(frame_stride * sample_rate))
    signal_length = len(signal_in)
    num_frames = int(np.ceil(float(np.abs(signal_length - frame_length)) / frame_step)) + 1

    pad_signal_length = num_frames * frame_step + frame_length
    # Zero-padding nếu cần
    z = np.zeros((pad_signal_length - signal_length))
    pad_signal = np.append(signal_in, z)

    indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + \
              np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
    frames = pad_signal[indices.astype(np.int32, copy=False)]
    return frames

def windowing(frames):
    """
    Bước 3: Áp dụng cửa sổ Hamming cho mỗi khung
    """
    frame_length = frames.shape[1]
    hamming = np.hamming(frame_length)
    windowed_frames = frames * hamming
    return windowed_frames

def fft_frames(frames, NFFT=512):
    """
    Bước 4: Tính FFT cho mỗi khung
    """
    mag_frames = np.absolute(np.fft.rfft(frames, NFFT))
    return mag_frames

def power_spectrum(mag_frames, NFFT=512):
    """
    Bước 4.1: Tính phổ công suất của mỗi khung
    """
    return (1.0 / NFFT) * (mag_frames ** 2)

def mel_filterbank(sample_rate, NFFT, nfilt=26, low_freq=0, high_freq=None):
    """
    Bước 5: Tạo Mel filterbank
    """
    if high_freq is None:
        high_freq = sample_rate / 2

    # Chuyển Hz sang Mel
    low_mel = 2595 * np.log10(1 + low_freq / 700.0)
    high_mel = 2595 * np.log10(1 + high_freq / 700.0)
    mel_points = np.linspace(low_mel, high_mel, nfilt + 2)
    # Chuyển lại từ Mel sang Hz
    hz_points = 700 * (10**(mel_points / 2595) - 1)
    bin = np.floor((NFFT + 1) * hz_points / sample_rate)

    fbank = np.zeros((nfilt, int(np.floor(NFFT / 2 + 1))))
    for m in range(1, nfilt + 1):
        f_m_minus = int(bin[m - 1])   # giới hạn trái
        f_m = int(bin[m])             # trung tâm
        f_m_plus = int(bin[m + 1])    # giới hạn phải

        for k in range(f_m_minus, f_m):
            fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
        for k in range(f_m, f_m_plus):
            fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])
    return fbank

# Hàm tính mfccs
def compute_mfccs(signal_in, sample_rate, frame_size=0.025, frame_stride=0.01, 
                 pre_emph=0.97, NFFT=512, nfilt=26, num_ceps=13):
    emphasized_signal = pre_emphasis(signal_in, pre_emph)
    frames = framing(emphasized_signal, sample_rate, frame_size, frame_stride)
    windowed_frames = windowing(frames)
    mag_frames = fft_frames(windowed_frames, NFFT)
    pow_frames = power_spectrum(mag_frames, NFFT)
    fbank = mel_filterbank(sample_rate, NFFT, nfilt)
    filter_banks = np.dot(pow_frames, fbank.T)
    filter_banks = np.where(filter_banks == 0, np.finfo(float).eps, filter_banks)
    log_fbank = np.log(filter_banks)
    mfccs = fftpack.dct(log_fbank, type=2, axis=1, norm='ortho')[:, :num_ceps]
    return mfccs

Loading train data from .pkl files...
Loading val data from .pkl files...
Loading test data from .pkl files...


In [None]:

def extract_mfccs_features(directory, sample_rate=22050, output_dir=None, dataset_type='train'):
    # Kiểm tra nếu file đã tồn tại, thì load lại
    features_file = os.path.join(output_dir, f'mfccs_features_{dataset_type}.pkl')
    labels_file = os.path.join(output_dir, f'mfccs_labels_{dataset_type}.pkl')
    data_file = os.path.join(output_dir, f'mfccs_data_{dataset_type}.pkl')
    
    if os.path.exists(features_file) and os.path.exists(labels_file) and os.path.exists(data_file):
        print(f"Loading {dataset_type} data from .pkl files...")
        features = joblib.load(features_file)
        labels = joblib.load(labels_file)
        data = joblib.load(data_file)
    else:
        print(f"Extracting {dataset_type} data...")
        start_time = time.time()
        labels = []
        features = []
        data = []  # Thêm một biến để lưu dữ liệu đầu vào (có thể là tín hiệu, ví dụ như signal hoặc các thông tin khác)
        
        for label in ['Queen', 'NonQueen']:
            path = os.path.join(directory, label)
            for file in os.listdir(path):
                file_path = os.path.join(path, file)
                signal, sr = librosa.load(file_path, sr=sample_rate)
                mfccs = compute_mfccs(signal_in=signal, sample_rate=sr)
                mfccs_mean = np.mean(mfccs, axis=0)
                features.append(mfccs_mean)
                labels.append(label)
                data.append(signal)  # Lưu tín hiệu âm thanh gốc hoặc dữ liệu khác nếu cần
        
        features = np.array(features)
        labels = np.array(labels)
        data = np.array(data)  # Chuyển dữ liệu sang dạng numpy array nếu cần lưu
        
        if output_dir:
            # Lưu từng đối tượng riêng biệt cho train/val/test
            joblib.dump(features, features_file)
            joblib.dump(labels, labels_file)
            joblib.dump(data, data_file)  # Lưu tín hiệu âm thanh hoặc dữ liệu
        
        end_time = time.time()
        print(f"MFCCs extraction time: {end_time - start_time:.2f} seconds")
    
    return features, labels, data

train_features_mfccs, train_labels_mfccs, train_data_mfccs = extract_mfccs_features(train_path, output_dir=output_dir, dataset_type='train')
val_features_mfccs, val_labels_mfccs, val_data_mfccs = extract_mfccs_features(val_path, output_dir=output_dir, dataset_type='val')
test_features_mfccs, test_labels_mfccs, test_data_mfccs = extract_mfccs_features(test_path, output_dir=output_dir, dataset_type='test')

In [48]:
print(train_features_mfccs.shape)
print(val_features_mfccs.shape)
print(test_features_mfccs.shape)

(14066, 13)
(2010, 13)
(4000, 13)


In [49]:
train_features_mfccs[0, :]

array([-84.59486884,   6.34044745,  -2.47652946,   5.6866161 ,
        -6.0883983 ,   3.79775816,  -4.57106924,   2.68850088,
        -2.55462216,   1.96805757,  -1.50288011,   0.65516511,
        -0.11471445])

In [72]:
scaler = StandardScaler()
train_features_mfccs_scaled = scaler.fit_transform(train_features_mfccs)
val_features_mfccs_scaled = scaler.transform(val_features_mfccs)
test_features_mfccs_scaled = scaler.transform(test_features_mfccs)

MFCCs - RF

In [73]:
start_time = time.time()

rf_classifier = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=None, 
                                       min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, 
                                       max_features='sqrt', max_leaf_nodes=None, min_impurity_decrease=0.0, 
                                       bootstrap=False, oob_score=False, n_jobs=None, random_state=42, verbose=0, 
                                       warm_start=False, class_weight=None, ccp_alpha=0.0, max_samples=None, monotonic_cst=None)
rf_classifier.fit(train_features_mfccs_scaled, train_labels_mfccs)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_predictions = rf_classifier.predict(val_features_mfccs_scaled)
val_accuracy = accuracy_score(val_labels_mfccs, val_predictions)
print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")

test_predictions = rf_classifier.predict(test_features_mfccs_scaled)
test_accuracy_mfccs_rf = accuracy_score(test_labels_mfccs, test_predictions)
print(f"Test Accuracy: {test_accuracy_mfccs_rf * 100:.2f}%")


Training time: 9.80 seconds
Validation Accuracy: 91.99%
Test Accuracy: 92.38%


Tuning bootstrap=False from 92% to 92.38%

MFCCs - SVM

In [60]:
start_time = time.time()

svm_rbf_classifier = SVC(C=10, kernel='rbf', degree=3, gamma=1, coef0=0.0, shrinking=True, 
                         probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, 
                         max_iter=-1, decision_function_shape='ovr', break_ties=False, random_state=42)

# Huấn luyện mô hình SVM
svm_rbf_classifier.fit(train_features_mfccs, train_labels_mfccs)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

# Đánh giá mô hình trên bộ validation
val_predictions_svm_rbf = svm_rbf_classifier.predict(val_features_mfccs)
val_accuracy_svm_rbf = accuracy_score(val_labels_mfccs, val_predictions_svm_rbf)
print(f"Validation Accuracy (SVM with RBF Kernel): {val_accuracy_svm_rbf * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_svm_rbf = svm_rbf_classifier.predict(test_features_mfccs)
test_accuracy_mfccs_svm = accuracy_score(test_labels_mfccs, test_predictions_svm_rbf)
print(f"Test Accuracy (SVM with RBF Kernel): {test_accuracy_mfccs_svm * 100:.2f}%")

Training time: 5.91 seconds
Validation Accuracy (SVM with RBF Kernel): 93.63%
Test Accuracy (SVM with RBF Kernel): 94.00%


In [61]:
start_time = time.time()

# Khởi tạo mô hình SVM với data scaling
svm_rbf_classifier = SVC(C=10, kernel='rbf', degree=3, gamma=1, coef0=0.0, shrinking=True, 
                         probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, 
                         max_iter=-1, decision_function_shape='ovr', break_ties=False, random_state=42)

# Huấn luyện mô hình SVM
svm_rbf_classifier.fit(train_features_mfccs_scaled, train_labels_mfccs)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

# Đánh giá mô hình trên bộ validation
val_predictions_svm_rbf = svm_rbf_classifier.predict(val_features_mfccs_scaled)
val_accuracy_svm_rbf = accuracy_score(val_labels_mfccs, val_predictions_svm_rbf)
print(f"Validation Accuracy (SVM with RBF Kernel): {val_accuracy_svm_rbf * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_svm_rbf = svm_rbf_classifier.predict(test_features_mfccs_scaled)
scale_test_accuracy_mfccs_svm = accuracy_score(test_labels_mfccs, test_predictions_svm_rbf)
print(f"Test Accuracy (SVM with RBF Kernel): {scale_test_accuracy_mfccs_svm * 100:.2f}%")

Training time: 16.12 seconds
Validation Accuracy (SVM with RBF Kernel): 93.03%
Test Accuracy (SVM with RBF Kernel): 94.05%


Scaling data improve from 94% to 94.05%

MFCCs - LR

In [62]:
start_time = time.time()

# Khởi tạo mô hình Logistic Regression
lr_classifier = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, 
                                   class_weight=None, random_state=42, solver='liblinear', max_iter=100, multi_class='deprecated', 
                                   verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)

# Huấn luyện mô hình Logistic Regression với dữ liệu đã chuẩn hóa
lr_classifier.fit(train_features_mfccs, train_labels_mfccs)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

# Đánh giá mô hình trên bộ validation
val_predictions_lr = lr_classifier.predict(val_features_mfccs)
val_accuracy_lr = accuracy_score(val_labels_mfccs, val_predictions_lr)
print(f"Validation Accuracy (Logistic Regression): {val_accuracy_lr * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_lr = lr_classifier.predict(test_features_mfccs)
test_accuracy_mfccs_lr = accuracy_score(test_labels_mfccs, test_predictions_lr)
print(f"Test Accuracy (Logistic Regression): {test_accuracy_mfccs_lr * 100:.2f}%")

Training time: 0.07 seconds
Validation Accuracy (Logistic Regression): 70.85%
Test Accuracy (Logistic Regression): 72.67%


In [63]:
start_time = time.time()

# Khởi tạo mô hình Logistic Regression với data scaling
lr_classifier = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, fit_intercept=True, intercept_scaling=1, 
                                   class_weight=None, random_state=42, solver='liblinear', max_iter=100, multi_class='deprecated', 
                                   verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)

# Huấn luyện mô hình Logistic Regression với dữ liệu đã chuẩn hóa
lr_classifier.fit(train_features_mfccs_scaled, train_labels_mfccs)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

# Đánh giá mô hình trên bộ validation
val_predictions_lr = lr_classifier.predict(val_features_mfccs_scaled)
val_accuracy_lr = accuracy_score(val_labels_mfccs, val_predictions_lr)
print(f"Validation Accuracy (Logistic Regression): {val_accuracy_lr * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_lr = lr_classifier.predict(test_features_mfccs_scaled)
scale_test_accuracy_mfccs_lr = accuracy_score(test_labels_mfccs, test_predictions_lr)
print(f"Test Accuracy (Logistic Regression): {scale_test_accuracy_mfccs_lr * 100:.2f}%")

Training time: 0.04 seconds
Validation Accuracy (Logistic Regression): 71.79%
Test Accuracy (Logistic Regression): 73.28%


Tuned the LR model but still only has 73.28% accuracy -> problem with feature extraction method?

MFCCs - ET

In [64]:
start_time = time.time()

# Khởi tạo mô hình Extra Trees 
et_classifier = ExtraTreesClassifier(n_estimators=200, criterion='gini', max_depth=30, min_samples_split=2, min_samples_leaf=1, 
                                     min_weight_fraction_leaf=0.0, max_features=None, max_leaf_nodes=None, min_impurity_decrease=0.0,
                                     bootstrap=False, oob_score=False, n_jobs=None, random_state=42, verbose=0, warm_start=False, 
                                     class_weight=None, ccp_alpha=0.0, max_samples=None, monotonic_cst=None)

et_classifier.fit(train_features_mfccs, train_labels_mfccs)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

# Đánh giá mô hình trên bộ validation
val_predictions_et = et_classifier.predict(val_features_mfccs)
val_accuracy_et = accuracy_score(val_labels_mfccs, val_predictions_et)
print(f"Validation Accuracy (Extra Trees): {val_accuracy_et * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_et = et_classifier.predict(test_features_mfccs)
test_accuracy_mfccs_et = accuracy_score(test_labels_mfccs, test_predictions_et)
print(f"Test Accuracy (Extra Trees): {test_accuracy_mfccs_et * 100:.2f}%")


Training time: 5.96 seconds
Validation Accuracy (Extra Trees): 92.09%
Test Accuracy (Extra Trees): 92.97%


In [65]:
start_time = time.time()

# Khởi tạo mô hình Extra Trees với data scaling
et_classifier = ExtraTreesClassifier(n_estimators=200, criterion='gini', max_depth=30, min_samples_split=2, min_samples_leaf=1, 
                                     min_weight_fraction_leaf=0.0, max_features=None, max_leaf_nodes=None, min_impurity_decrease=0.0,
                                     bootstrap=False, oob_score=False, n_jobs=None, random_state=42, verbose=0, warm_start=False, 
                                     class_weight=None, ccp_alpha=0.0, max_samples=None, monotonic_cst=None)

# Huấn luyện mô hình Extra Trees với dữ liệu đã chuẩn hóa
et_classifier.fit(train_features_mfccs_scaled, train_labels_mfccs)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

# Đánh giá mô hình trên bộ validation
val_predictions_et = et_classifier.predict(val_features_mfccs_scaled)
val_accuracy_et = accuracy_score(val_labels_mfccs, val_predictions_et)
print(f"Validation Accuracy (Extra Trees): {val_accuracy_et * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_et = et_classifier.predict(test_features_mfccs_scaled)
scale_test_accuracy_mfccs_et = accuracy_score(test_labels_mfccs, test_predictions_et)
print(f"Test Accuracy (Extra Trees): {scale_test_accuracy_mfccs_et * 100:.2f}%")


Training time: 6.93 seconds
Validation Accuracy (Extra Trees): 92.14%
Test Accuracy (Extra Trees): 92.90%


Model tuning improved from 92.53% to 92.97%

MFCCs - KNN

In [66]:
start_time = time.time()

knn_classifier = KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=30, p=2, 
                                      metric='minkowski', metric_params=None, n_jobs=None)

# Huấn luyện mô hình KNN với dữ liệu đã chuẩn hóa
knn_classifier.fit(train_features_mfccs, train_labels_mfccs)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

# Đánh giá mô hình trên bộ validation
val_predictions_knn = knn_classifier.predict(val_features_mfccs)
val_accuracy_knn = accuracy_score(val_labels_mfccs, val_predictions_knn)
print(f"Validation Accuracy (KNN): {val_accuracy_knn * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_knn = knn_classifier.predict(test_features_mfccs)
test_accuracy_mfccs_knn = accuracy_score(test_labels_mfccs, test_predictions_knn)
print(f"Test Accuracy (KNN): {test_accuracy_mfccs_knn * 100:.2f}%")

Training time: 0.04 seconds
Validation Accuracy (KNN): 91.00%
Test Accuracy (KNN): 91.57%


In [67]:
start_time = time.time()

knn_classifier = KNeighborsClassifier(n_neighbors=5, weights='uniform', algorithm='auto', leaf_size=30, p=2, 
                                      metric='minkowski', metric_params=None, n_jobs=None)

# Huấn luyện mô hình KNN với dữ liệu đã chuẩn hóa
knn_classifier.fit(train_features_mfccs_scaled, train_labels_mfccs)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

# Đánh giá mô hình trên bộ validation
val_predictions_knn = knn_classifier.predict(val_features_mfccs_scaled)
val_accuracy_knn = accuracy_score(val_labels_mfccs, val_predictions_knn)
print(f"Validation Accuracy (KNN): {val_accuracy_knn * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_knn = knn_classifier.predict(test_features_mfccs_scaled)
scale_test_accuracy_mfccs_knn = accuracy_score(test_labels_mfccs, test_predictions_knn)
print(f"Test Accuracy (KNN): {scale_test_accuracy_mfccs_knn * 100:.2f}%")

Training time: 0.03 seconds
Validation Accuracy (KNN): 91.44%
Test Accuracy (KNN): 92.75%


# FFTs (with pre-emphasis, framing, windowing)

In [68]:
def pre_emphasis(signal_in, pre_emph=0.97):
    """
    Áp dụng pre-emphasis để nhấn mạnh các tần số cao.
    """
    return np.append(signal_in[0], signal_in[1:] - pre_emph * signal_in[:-1])

def framing(signal_in, sample_rate, frame_size=0.025, frame_stride=0.01):
    """
    Chia tín hiệu thành các frame có kích thước và bước nhảy xác định.
    """
    frame_length = int(round(frame_size * sample_rate))
    frame_step = int(round(frame_stride * sample_rate))
    signal_length = len(signal_in)
    num_frames = int(np.ceil(np.abs(signal_length - frame_length) / frame_step)) + 1

    pad_signal_length = num_frames * frame_step + frame_length
    z = np.zeros((pad_signal_length - signal_length))
    pad_signal = np.append(signal_in, z)

    indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + \
              np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
    frames = pad_signal[indices.astype(np.int32, copy=False)]
    return frames

def windowing(frames):
    """
    Áp dụng cửa sổ Hamming cho mỗi frame.
    """
    frame_length = frames.shape[1]
    hamming = np.hamming(frame_length)
    return frames * hamming

def fft_frames(frames, NFFT=512):
    """
    Tính FFT cho mỗi frame và lấy giá trị magnitude.
    """
    return np.absolute(np.fft.rfft(frames, NFFT))

def compute_fft_features(signal_in, sample_rate, frame_size=0.025, frame_stride=0.01, NFFT=512, apply_log=True):
    """
    Tính toán đặc trưng FFT cho tín hiệu âm thanh:
      - Pre-emphasis, Framing, Windowing.
      - Tính FFT cho từng frame và lấy giá trị magnitude.
      - Trung bình các frame để có vector đặc trưng ổn định.
      - (Tùy chọn) Áp dụng log để giảm phạm vi giá trị.
      
    Trả về: vector đặc trưng có kích thước (NFFT/2+1,).
    """
    emphasized_signal = pre_emphasis(signal_in)
    frames = framing(emphasized_signal, sample_rate, frame_size, frame_stride)
    windowed_frames = windowing(frames)
    mag_frames = fft_frames(windowed_frames, NFFT)
    fft_feature = np.mean(mag_frames, axis=0)  # Trung bình theo các frame
    if apply_log:
        fft_feature = np.log(fft_feature + 1e-8)  # Thêm epsilon để tránh log(0)
    return fft_feature


In [93]:
import numpy as np

def pre_emphasis(signal_in, pre_emph=0.97):
    """
    Áp dụng pre-emphasis để nhấn mạnh các tần số cao.
    """
    return np.append(signal_in[0], signal_in[1:] - pre_emph * signal_in[:-1])

def framing(signal_in, sample_rate, frame_size=0.025, frame_stride=0.01):
    """
    Chia tín hiệu thành các frame có kích thước và bước nhảy xác định.
    """
    frame_length = int(round(frame_size * sample_rate))
    frame_step = int(round(frame_stride * sample_rate))
    signal_length = len(signal_in)
    
    num_frames = int(np.ceil(float(np.abs(signal_length - frame_length)) / frame_step)) + 1

    pad_signal_length = num_frames * frame_step + frame_length
    pad_signal = np.pad(signal_in, (0, pad_signal_length - signal_length), mode='constant')

    indices = np.arange(0, frame_length) + np.arange(0, num_frames * frame_step, frame_step)[:, None]
    frames = pad_signal[indices.astype(np.int32)]
    return frames

def windowing(frames):
    """
    Áp dụng cửa sổ Hamming cho mỗi frame.
    """
    frame_length = frames.shape[1]
    hamming = np.hamming(frame_length)
    return frames * hamming

def fft_frames(frames, NFFT=512):
    """
    Tính FFT cho mỗi frame và lấy giá trị magnitude.
    """
    return np.abs(np.fft.fft(frames, NFFT))[:, :NFFT//2+1]  # Chỉ lấy giá trị magnitude dương

def compute_fft_features(signal_in, sample_rate, frame_size=0.025, frame_stride=0.01, NFFT=512, apply_log=True):
    """
    Tính toán đặc trưng FFT cho tín hiệu âm thanh:
      - Pre-emphasis, Framing, Windowing.
      - Tính FFT cho từng frame và lấy giá trị magnitude.
      - Trung bình các frame để có vector đặc trưng ổn định.
      - (Tùy chọn) Áp dụng log để giảm phạm vi giá trị.
    Trả về: vector đặc trưng có kích thước (NFFT/2+1,).
    """
    emphasized_signal = pre_emphasis(signal_in)
    frames = framing(emphasized_signal, sample_rate, frame_size, frame_stride)
    windowed_frames = windowing(frames)
    mag_frames = fft_frames(windowed_frames, NFFT)
    fft_feature = np.mean(mag_frames, axis=0)  # Trung bình theo các frame
    
    # Áp dụng log nếu cần, đảm bảo không có log(0)
    if apply_log:
        fft_feature = np.log(np.maximum(fft_feature, 1e-8))  # Thêm epsilon để tránh log(0)
    
    return fft_feature


In [94]:
def load_fft_features_from_directory(directory, sample_rate=22050, NFFT=512, output_dir=None, dataset_type='train'):
    """
    Duyệt qua các file âm thanh trong thư mục và tính đặc trưng FFT cho mỗi file.
    Giả sử trong directory có hai thư mục con: 'Queen' và 'NonQueen'.
    
    Trả về:
      - features: mảng đặc trưng (mỗi đặc trưng có kích thước NFFT/2+1)
      - labels: nhãn tương ứng.
    """
    # Kiểm tra nếu file đã tồn tại, thì load lại
    features_file = os.path.join(output_dir, f'fft3_features_{dataset_type}.pkl')
    labels_file = os.path.join(output_dir, f'fft3_labels_{dataset_type}.pkl')
    
    if os.path.exists(features_file) and os.path.exists(labels_file):
        print(f"Loading {dataset_type} data from .pkl files...")
        features = joblib.load(features_file)
        labels = joblib.load(labels_file)
    else:
        print(f"Extracting {dataset_type} data...")
        start_time = time.time()
        labels = []
        features = []
        
        for label in ['Queen', 'NonQueen']:
            path = os.path.join(directory, label)
            for file in os.listdir(path):
                file_path = os.path.join(path, file)
                signal, sr = librosa.load(file_path, sr=sample_rate)
                fft_feature = compute_fft_features(signal, sr, NFFT=NFFT)
                features.append(fft_feature)
                labels.append(label)
        
        features = np.array(features)
        labels = np.array(labels)
        
        # Lưu dữ liệu vào file .pkl nếu chưa tồn tại
        if output_dir:
            joblib.dump(features, features_file)
            joblib.dump(labels, labels_file)
        
        end_time = time.time()
        print(f"FFT extraction time: {end_time - start_time:.2f} seconds")
    
    return features, labels

In [95]:
train_features_fft, train_labels_fft = load_fft_features_from_directory(train_path, sample_rate=22050, NFFT=512, output_dir=output_dir, dataset_type='train')
val_features_fft, val_labels_fft = load_fft_features_from_directory(val_path, sample_rate=22050, NFFT=512, output_dir=output_dir, dataset_type='val')
test_features_fft, test_labels_fft = load_fft_features_from_directory(test_path, sample_rate=22050, NFFT=512, output_dir=output_dir, dataset_type='test')

Extracting train data...
FFT extraction time: 66.67 seconds
Extracting val data...
FFT extraction time: 9.76 seconds
Extracting test data...
FFT extraction time: 19.05 seconds


In [96]:
scaler = StandardScaler()
train_features_fft_scaled = scaler.fit_transform(train_features_fft)
val_features_fft_scaled = scaler.transform(val_features_fft)
test_features_fft_scaled = scaler.transform(test_features_fft)

FFTs - KNN

In [97]:
start_time = time.time()

knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(train_features_fft, train_labels_fft)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_knn = knn_classifier.predict(val_features_fft)
test_accuracy_fft_knn = knn_classifier.predict(test_features_fft)
print(f"KNN (FFT features) - Validation Accuracy: {accuracy_score(val_labels_fft, val_pred_knn)*100:.2f}%")
print(f"KNN (FFT features) - Test Accuracy: {accuracy_score(test_labels_fft, test_accuracy_fft_knn)*100:.2f}%")

Training time: 0.02 seconds
KNN (FFT features) - Validation Accuracy: 93.28%
KNN (FFT features) - Test Accuracy: 95.15%


94.25% to 95.15%, pre_emph = 0.98 => 95.28

In [98]:
start_time = time.time()

knn_classifier = KNeighborsClassifier(n_neighbors=5)
knn_classifier.fit(train_features_fft_scaled, train_labels_fft)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_knn = knn_classifier.predict(val_features_fft_scaled)
test_accuracy_fft_knn = knn_classifier.predict(test_features_fft_scaled)
print(f"KNN (FFT features) - Validation Accuracy: {accuracy_score(val_labels_fft, val_pred_knn)*100:.2f}%")
print(f"KNN (FFT features) - Test Accuracy: {accuracy_score(test_labels_fft, test_accuracy_fft_knn)*100:.2f}%")

Training time: 0.02 seconds
KNN (FFT features) - Validation Accuracy: 93.03%
KNN (FFT features) - Test Accuracy: 94.25%


94.25%, pre_emph = 0.98 => 94.42

FFTs - SVM

In [99]:
start_time = time.time()

svm_rbf_classifier = SVC(C=100, kernel='rbf', degree=3, gamma=0.1, coef0=0.0, shrinking=True, 
                         probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, 
                         max_iter=-1, decision_function_shape='ovr', break_ties=False, random_state=42)

# Huấn luyện mô hình SVM
svm_rbf_classifier.fit(train_features_fft, train_labels_fft)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

# Đánh giá mô hình trên bộ validation
val_predictions_svm_rbf = svm_rbf_classifier.predict(val_features_fft)
val_accuracy_svm_rbf = accuracy_score(val_labels_fft, val_predictions_svm_rbf)
print(f"Validation Accuracy (SVM with RBF Kernel): {val_accuracy_svm_rbf * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_svm_rbf = svm_rbf_classifier.predict(test_features_fft)
test_accuracy_fft_svm = accuracy_score(test_labels_fft, test_predictions_svm_rbf)
print(f"Test Accuracy (SVM with RBF Kernel): {test_accuracy_fft_svm * 100:.2f}%")

Training time: 10.43 seconds
Validation Accuracy (SVM with RBF Kernel): 97.71%
Test Accuracy (SVM with RBF Kernel): 97.47%


97.47%

In [100]:
start_time = time.time()

svm_rbf_classifier = SVC(C=100, kernel='rbf', degree=3, gamma=0.1, coef0=0.0, shrinking=True, 
                         probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, 
                         max_iter=-1, decision_function_shape='ovr', break_ties=False, random_state=42)

# Huấn luyện mô hình SVM
svm_rbf_classifier.fit(train_features_fft_scaled, train_labels_fft)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

# Đánh giá mô hình trên bộ validation
val_predictions_svm_rbf = svm_rbf_classifier.predict(val_features_fft_scaled)
val_accuracy_svm_rbf = accuracy_score(val_labels_fft, val_predictions_svm_rbf)
print(f"Validation Accuracy (SVM with RBF Kernel): {val_accuracy_svm_rbf * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_svm_rbf = svm_rbf_classifier.predict(test_features_fft_scaled)
scale_test_accuracy_fft_svm = accuracy_score(test_labels_fft, test_predictions_svm_rbf)
print(f"Test Accuracy (SVM with RBF Kernel): {scale_test_accuracy_fft_svm * 100:.2f}%")

Training time: 32.25 seconds
Validation Accuracy (SVM with RBF Kernel): 97.11%
Test Accuracy (SVM with RBF Kernel): 97.82%


97.82

FFTs - LR

In [87]:
start_time = time.time()

lr_classifier = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=10, fit_intercept=True, intercept_scaling=1, 
                                   class_weight=None, random_state=42, solver='lbfgs', max_iter=1500, multi_class='deprecated', 
                                   verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)
lr_classifier.fit(train_features_fft, train_labels_fft)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_lr = lr_classifier.predict(val_features_fft)
test_accuracy_fft_lr = lr_classifier.predict(test_features_fft)
print(f"Logistic Regression (FFT features) - Validation Accuracy: {accuracy_score(val_labels_fft, val_pred_lr)*100:.2f}%")
print(f"Logistic Regression (FFT features) - Test Accuracy: {accuracy_score(test_labels_fft, test_accuracy_fft_lr)*100:.2f}%")


Training time: 8.09 seconds
Logistic Regression (FFT features) - Validation Accuracy: 86.07%
Logistic Regression (FFT features) - Test Accuracy: 86.33%


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [88]:
start_time = time.time()

lr_classifier = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=10, fit_intercept=True, intercept_scaling=1, 
                                   class_weight=None, random_state=42, solver='saga', max_iter=1000, multi_class='deprecated', 
                                   verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)

lr_classifier.fit(train_features_fft_scaled, train_labels_fft)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_lr = lr_classifier.predict(val_features_fft_scaled)
scale_test_accuracy_fft_lr = lr_classifier.predict(test_features_fft_scaled)
print(f"Logistic Regression (FFT features) - Validation Accuracy: {accuracy_score(val_labels_fft, val_pred_lr)*100:.2f}%")
print(f"Logistic Regression (FFT features) - Test Accuracy: {accuracy_score(test_labels_fft, scale_test_accuracy_fft_lr)*100:.2f}%")


Training time: 60.68 seconds
Logistic Regression (FFT features) - Validation Accuracy: 85.57%
Logistic Regression (FFT features) - Test Accuracy: 86.05%




Tuning from 85.97% to 86.05%

Using min max scaling -> 86.08%

But Non-scaling data working better -> 86.33%

FFTs - RF

In [90]:
start_time = time.time()

rf_classifier = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=None, min_samples_split=2, 
                                       min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='sqrt', 
                                       max_leaf_nodes=None, min_impurity_decrease=0.0, bootstrap=True, oob_score=False, 
                                       n_jobs=None, random_state=42, verbose=0, warm_start=False, class_weight=None, 
                                       ccp_alpha=0.0, max_samples=None, monotonic_cst=None)

rf_classifier.fit(train_features_fft, train_labels_fft)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_rf = rf_classifier.predict(val_features_fft)
test_accuracy_fft_rf = rf_classifier.predict(test_features_fft)
print(f"Random Forest (FFT features) - Validation Accuracy: {accuracy_score(val_labels_fft, val_pred_rf)*100:.2f}%")
print(f"Random Forest (FFT features) - Test Accuracy: {accuracy_score(test_labels_fft, test_accuracy_fft_rf)*100:.2f}%")

Training time: 32.89 seconds
Random Forest (FFT features) - Validation Accuracy: 93.58%
Random Forest (FFT features) - Test Accuracy: 94.67%


Not tuning yet

FFTs - ET

In [91]:
start_time = time.time()

et_classifier = ExtraTreesClassifier(n_estimators=200, criterion='gini', max_depth=30, min_samples_split=2, min_samples_leaf=1, 
                                     min_weight_fraction_leaf=0.0, max_features=None, max_leaf_nodes=None, min_impurity_decrease=0.0,
                                     bootstrap=False, oob_score=False, n_jobs=None, random_state=42, verbose=0, warm_start=False, 
                                     class_weight=None, ccp_alpha=0.0, max_samples=None, monotonic_cst=None)

# Huấn luyện mô hình Extra Trees với dữ liệu đã chuẩn hóa
et_classifier.fit(train_features_fft, train_labels_fft)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

# Đánh giá mô hình trên bộ validation
val_predictions_et = et_classifier.predict(val_features_fft)
val_accuracy_et = accuracy_score(val_labels_fft, val_predictions_et)
print(f"Validation Accuracy (Extra Trees): {val_accuracy_et * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_et = et_classifier.predict(test_features_fft)
test_accuracy_fft_et = accuracy_score(test_labels_fft, test_predictions_et)
print(f"Test Accuracy (Extra Trees): {test_accuracy_fft_et * 100:.2f}%")


Training time: 102.19 seconds
Validation Accuracy (Extra Trees): 94.88%
Test Accuracy (Extra Trees): 95.73%


In [92]:
start_time = time.time()

et_classifier = ExtraTreesClassifier(n_estimators=200, criterion='gini', max_depth=30, min_samples_split=2, min_samples_leaf=1, 
                                     min_weight_fraction_leaf=0.0, max_features=None, max_leaf_nodes=None, min_impurity_decrease=0.0,
                                     bootstrap=False, oob_score=False, n_jobs=None, random_state=42, verbose=0, warm_start=False, 
                                     class_weight=None, ccp_alpha=0.0, max_samples=None, monotonic_cst=None)

# Huấn luyện mô hình Extra Trees với dữ liệu đã chuẩn hóa
et_classifier.fit(train_features_fft_scaled, train_labels_fft)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

# Đánh giá mô hình trên bộ validation
val_predictions_et = et_classifier.predict(val_features_fft_scaled)
val_accuracy_et = accuracy_score(val_labels_fft, val_predictions_et)
print(f"Validation Accuracy (Extra Trees): {val_accuracy_et * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_et = et_classifier.predict(test_features_fft_scaled)
scale_test_accuracy_fft_et = accuracy_score(test_labels_fft, test_predictions_et)
print(f"Test Accuracy (Extra Trees): {scale_test_accuracy_fft_et * 100:.2f}%")


Training time: 102.91 seconds
Validation Accuracy (Extra Trees): 94.98%
Test Accuracy (Extra Trees): 95.80%


93.95% -> 95.80% beautifull

# MFCC

In [None]:
def compute_mfcc(file_path, n_mfcc=13):
    y, sr = librosa.load(file_path, sr=16000) 
    mfcc = librosa.feature.mfcc(y=y, sr=sr, S=None, n_mfcc=n_mfcc, dct_type=2, norm='ortho', lifter=0, mel_norm='slaney')
    return mfcc

In [None]:
def extract_mfcc_features(directory, sample_rate=22050, output_dir=None, dataset_type='train', reduce_dimension=True):
    # Kiểm tra nếu file đã tồn tại, thì load lại
    features_file = os.path.join(output_dir, f'mfcc2_features_{dataset_type}.pkl')
    labels_file = os.path.join(output_dir, f'mfcc2_labels_{dataset_type}.pkl')
    data_file = os.path.join(output_dir, f'mfcc2_data_{dataset_type}.pkl')
    
    if os.path.exists(features_file) and os.path.exists(labels_file) and os.path.exists(data_file):
        print(f"Loading {dataset_type} data from .pkl files...")
        features = joblib.load(features_file)
        labels = joblib.load(labels_file)
        data = joblib.load(data_file)
    else:
        print(f"Extracting {dataset_type} data...")
        start_time = time.time()
        labels = []
        features = []
        data = []  # Thêm một biến để lưu dữ liệu đầu vào (có thể là tín hiệu, ví dụ như signal hoặc các thông tin khác)
        
        for label in ['Queen', 'NonQueen']:
            path = os.path.join(directory, label)
            for file in os.listdir(path):
                file_path = os.path.join(path, file)
                mfcc = compute_mfcc(file_path=file_path, n_mfcc=70)  # Chiết xuất MFCC cho mỗi tệp âm thanh
                features.append(mfcc)  # Lưu ma trận MFCC cho mỗi tệp âm thanh
                labels.append(label)
        
        features = np.array(features)
        labels = np.array(labels)
        data = np.array(data)  # Chuyển dữ liệu sang dạng numpy array nếu cần lưu
        
        # Nếu cần giảm chiều theo axis=2
        if reduce_dimension:
            features = np.mean(features, axis=2)
        
        if output_dir:
            # Lưu từng đối tượng riêng biệt cho train/val/test
            joblib.dump(features, features_file)
            joblib.dump(labels, labels_file)
            joblib.dump(data, data_file)  # Lưu tín hiệu âm thanh hoặc dữ liệu
        
        end_time = time.time()
        print(f"MFCC extraction time: {end_time - start_time:.2f} seconds")
    
    return features, labels, data

# Gọi hàm với tham số reduce_dimension=True để giảm chiều ngay khi gọi hàm
train_features_mfcc, train_labels_mfcc, train_data_mfcc = extract_mfcc_features(train_path, output_dir=output_dir, dataset_type='train')
val_features_mfcc, val_labels_mfcc, val_data_mfcc = extract_mfcc_features(val_path, output_dir=output_dir, dataset_type='val')
test_features_mfcc, test_labels_mfcc, test_data_mfcc = extract_mfcc_features(test_path, output_dir=output_dir, dataset_type='test')

Loading train data from .pkl files...
Loading val data from .pkl files...
Loading test data from .pkl files...


In [103]:
train_features_mfcc.shape

(14066, 120)

In [104]:
scaler = StandardScaler()
train_features_mfcc_scaled = scaler.fit_transform(train_features_mfcc)
val_features_mfcc_scaled = scaler.transform(val_features_mfcc)
test_features_mfcc_scaled = scaler.transform(test_features_mfcc)

In [105]:
start_time = time.time()

svm_rbf_classifier = SVC(C=8.31, kernel='rbf', degree=3, gamma='auto', coef0=0.0, shrinking=True, 
                         probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, 
                         max_iter=-1, decision_function_shape='ovr', break_ties=False, random_state=42)

# Huấn luyện mô hình SVM
svm_rbf_classifier.fit(train_features_mfcc_scaled, train_labels_mfcc)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

# Đánh giá mô hình trên bộ validation
val_predictions_svm_rbf = svm_rbf_classifier.predict(val_features_mfcc_scaled)
val_accuracy_svm_rbf = accuracy_score(val_labels_mfcc, val_predictions_svm_rbf)
print(f"Validation Accuracy (SVM with RBF Kernel): {val_accuracy_svm_rbf * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_svm_rbf = svm_rbf_classifier.predict(test_features_mfcc_scaled)
scale_test_accuracy_mfcc_svm = accuracy_score(test_labels_mfcc, test_predictions_svm_rbf)
print(f"Test Accuracy (SVM with RBF Kernel): {scale_test_accuracy_mfcc_svm * 100:.2f}%")

Training time: 33.88 seconds
Validation Accuracy (SVM with RBF Kernel): 92.84%
Test Accuracy (SVM with RBF Kernel): 92.80%


98.55%

# STFTs

In [3]:
import numpy as np
import librosa
import os

def apply_preprocessing(signal_in, sample_rate):
    """
    Apply optional preprocessing steps on the signal:
      - Apply pre-emphasis filtering.
    """
    # Pre-emphasis filter: Boosting higher frequencies slightly
    pre_emphasis = 0.97
    signal_in = np.append(signal_in[0], signal_in[1:] - pre_emphasis * signal_in[:-1])
    return signal_in

def frame_signal(signal, frame_size, hop_length):
    """
    Convert a signal into overlapping frames.
    
    Args:
        signal: Input audio signal
        frame_size: Number of samples per frame
        hop_length: Number of samples between frames
    
    Returns:
        numpy.ndarray: Framed signal of shape (frame_length, num_frames)
    """
    # Calculate the number of frames
    num_frames = 1 + int((len(signal) - frame_size) / hop_length)
    
    # Create an empty array to store frames
    frames = np.zeros((frame_size, num_frames))
    
    # Frame the signal
    for i in range(num_frames):
        start = i * hop_length
        end = start + frame_size
        if end <= len(signal):
            frames[:, i] = signal[start:end]
        else:  # Zero padding for last frame if needed
            frames[:len(signal)-start, i] = signal[start:]
    
    return frames

def apply_window(frames, window_type='hann'):
    """
    Apply a window function to each frame.
    
    Args:
        frames: Framed signal of shape (frame_length, num_frames)
        window_type: Type of window function ('hann', 'hamming', 'blackman', etc.)
    
    Returns:
        numpy.ndarray: Windowed frames
    """
    # Create the window function
    frame_length = frames.shape[0]
    window = np.zeros(frame_length)
    
    if window_type == 'hann':
        window = np.hanning(frame_length)
    elif window_type == 'hamming':
        window = np.hamming(frame_length)
    elif window_type == 'blackman':
        window = np.blackman(frame_length)
    else:  # Default to Hann window
        window = np.hanning(frame_length)
    
    # Apply window to each frame (element-wise multiplication)
    return frames * window.reshape(-1, 1)

def compute_stfts_features(signal_in, sample_rate, n_fft=2048, hop_length=512, 
                          window_type='hann', apply_log=True):
    """
    Compute STFT features with explicitly separated steps:
      - Apply preprocessing (pre-emphasis)
      - Frame the signal into overlapping segments
      - Apply windowing to each frame
      - Compute STFT
      - Extract features with optional log scaling
    """
    # Apply preprocessing (e.g., pre-emphasis)
    signal_in = apply_preprocessing(signal_in, sample_rate)
    
    # Explicit framing and windowing (for demonstration, though librosa.stft does this internally)
    frames = frame_signal(signal_in, n_fft, hop_length)
    windowed_frames = apply_window(frames, window_type)
    
    # Note: In practice, we could use these windowed frames directly for FFT
    # but for compatibility with the original code, we'll use librosa.stft
    
    # Compute STFT with a window function to avoid spectral leakage
    stfts_matrix = np.abs(librosa.stft(signal_in, n_fft=n_fft, hop_length=hop_length, window=window_type))
    
    # Optional: Apply logarithmic scaling (dynamic range compression)
    if apply_log:
        stfts_matrix = np.log(stfts_matrix + 1e-10)  # More robust epsilon for log scaling

    # Feature extraction: Mean and additional energy-based features (e.g., variance)
    features_mean = np.mean(stfts_matrix, axis=1)
    features_variance = np.var(stfts_matrix, axis=1)
    
    # Combine mean and variance (you could also add other stats like median or skewness)
    features = np.concatenate([features_mean, features_variance], axis=0)

    # Normalize the features (optional)
    features = (features - np.mean(features)) / np.std(features)  # Z-score normalization

    return features

def process_audio_file(file_path, n_fft=2048, hop_length=512, window_type='hann'):
    """
    Process an audio file to extract STFT features.
    
    Args:
        file_path: Path to the audio file
        n_fft: FFT window size
        hop_length: Hop length between frames
        window_type: Type of window function to apply
        
    Returns:
        numpy.ndarray: Extracted features
    """
    # Load the audio file
    signal, sample_rate = librosa.load(file_path, sr=None)
    
    # Compute features
    features = compute_stfts_features(signal, sample_rate, n_fft, hop_length, window_type)
    
    return features

In [4]:
def load_stfts_features_from_directory(directory, sample_rate=22050, n_fft=2048, hop_length=512, output_dir=None, dataset_type='train'):
    """
    Load and compute stfts features from audio files in a directory, with improved accuracy.
    Assumes 'Queen' and 'NonQueen' subdirectories are present.
    """
    # Kiểm tra nếu file đã tồn tại, thì load lại
    features_file = os.path.join(output_dir, f'stfts_features_{dataset_type}1.pkl')
    labels_file = os.path.join(output_dir, f'stfts_labels_{dataset_type}1.pkl')
    
    if os.path.exists(features_file) and os.path.exists(labels_file):
        print(f"Loading {dataset_type}1 data from .pkl files...")
        features = joblib.load(features_file)
        labels = joblib.load(labels_file)
    else:
        print(f"Extracting {dataset_type} data...")
        start_time = time.time()
        labels = []
        features = []

        for label in ['Queen', 'NonQueen']:
            path = os.path.join(directory, label)
            for file in os.listdir(path):
                file_path = os.path.join(path, file)
                # Load audio file
                signal, sr = librosa.load(file_path, sr=sample_rate)

                # Compute stfts features
                stfts_feature = compute_stfts_features(signal, sr, n_fft=n_fft, hop_length=hop_length, apply_log=True)

                features.append(stfts_feature)
                labels.append(label)

        features = np.array(features)
        labels = np.array(labels)

        # Lưu dữ liệu vào file .pkl nếu chưa tồn tại
        if output_dir:
            joblib.dump(features, features_file)
            joblib.dump(labels, labels_file)

        end_time = time.time()
        print(f"STFTs extraction time: {end_time - start_time:.2f} seconds")
    
    return features, labels

train_features_stfts, train_labels_stfts = load_stfts_features_from_directory(train_path, sample_rate=22050, n_fft=2048, hop_length=512, output_dir=output_dir, dataset_type='train')
val_features_stfts, val_labels_stfts = load_stfts_features_from_directory(val_path, sample_rate=22050, n_fft=2048, hop_length=512, output_dir=output_dir, dataset_type='val')
test_features_stfts, test_labels_stfts = load_stfts_features_from_directory(test_path, sample_rate=22050, n_fft=2048, hop_length=512, output_dir=output_dir, dataset_type='test')

Extracting train data...
STFTs extraction time: 241.89 seconds
Extracting val data...
STFTs extraction time: 34.09 seconds
Extracting test data...
STFTs extraction time: 68.79 seconds


In [5]:
scaler_stfts = StandardScaler()
train_features_stfts_scaled = scaler_stfts.fit_transform(train_features_stfts)
val_features_stfts_scaled   = scaler_stfts.transform(val_features_stfts)
test_features_stfts_scaled  = scaler_stfts.transform(test_features_stfts)


STFTs - KNN

In [6]:
start_time = time.time()

knn_classifier_stfts = KNeighborsClassifier(n_neighbors=3, weights='uniform', algorithm='auto', leaf_size=30, p=2, 
                                           metric='minkowski', metric_params=None, n_jobs=None)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

knn_classifier_stfts.fit(train_features_stfts, train_labels_stfts)
val_pred_stfts_knn = knn_classifier_stfts.predict(val_features_stfts)
test_pred_stfts_knn = knn_classifier_stfts.predict(test_features_stfts)

test_accuracy_stfts_knn = accuracy_score(test_labels_stfts, test_pred_stfts_knn)
print(f"KNN (STFTs) - Test Accuracy: {test_accuracy_stfts_knn*100:.2f}%")


Training time: 0.00 seconds
KNN (STFTs) - Test Accuracy: 93.30%


Old 95.10%

In [7]:
start_time = time.time()

knn_classifier_stfts = KNeighborsClassifier(n_neighbors=3, weights='uniform', algorithm='auto', leaf_size=30, p=2, 
                                           metric='minkowski', metric_params=None, n_jobs=None)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

knn_classifier_stfts.fit(train_features_stfts_scaled, train_labels_stfts)
val_pred_stfts_knn = knn_classifier_stfts.predict(val_features_stfts_scaled)
test_pred_stfts_knn = knn_classifier_stfts.predict(test_features_stfts_scaled)

test_accuracy_stfts_knn = accuracy_score(test_labels_stfts, test_pred_stfts_knn)
print(f"KNN (STFTs) - Test Accuracy: {test_accuracy_stfts_knn*100:.2f}%")


Training time: 0.00 seconds
KNN (STFTs) - Test Accuracy: 92.10%


Old 94.50%

STFTs - SVM

In [8]:
start_time = time.time()

svm_rbf_classifier = SVC(C=10, kernel='rbf', degree=3, gamma='scale', coef0=0.0, shrinking=True, 
                         probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, 
                         max_iter=-1, decision_function_shape='ovr', break_ties=False, random_state=42)

# Huấn luyện mô hình SVM
svm_rbf_classifier.fit(train_features_stfts_scaled, train_labels_stfts)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

# Đánh giá mô hình trên bộ validation
val_predictions_svm_rbf = svm_rbf_classifier.predict(val_features_stfts_scaled)
val_accuracy_stfts_svm = accuracy_score(val_labels_stfts, val_predictions_svm_rbf)
print(f"Validation Accuracy (SVM with RBF Kernel): {val_accuracy_stfts_svm * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_svm_rbf = svm_rbf_classifier.predict(test_features_stfts_scaled)
scale_test_accuracy_stfts_svm = accuracy_score(test_labels_stfts, test_predictions_svm_rbf)
print(f"Test Accuracy (SVM with RBF Kernel): {scale_test_accuracy_stfts_svm * 100:.2f}%")

Training time: 245.16 seconds
Validation Accuracy (SVM with RBF Kernel): 96.32%
Test Accuracy (SVM with RBF Kernel): 95.58%


STFTs - LR

In [9]:
start_time = time.time()

lr_classifier_stfts = LogisticRegression(random_state=42, max_iter=1000, solver='liblinear', penalty= 'l2', C= 0.08858667904100823)
lr_classifier_stfts.fit(train_features_stfts_scaled, train_labels_stfts)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_stfts_lr = lr_classifier_stfts.predict(val_features_stfts_scaled)
test_pred_stfts_lr = lr_classifier_stfts.predict(test_features_stfts_scaled)

test_accuracy_stfts_lr = accuracy_score(test_labels_stfts, test_pred_stfts_lr)
print(f"Logistic Regression (STFTs) - Test Accuracy: {test_accuracy_stfts_lr*100:.2f}%")

Training time: 37.37 seconds
Logistic Regression (STFTs) - Test Accuracy: 88.65%


STFTs - RF

In [10]:
start_time = time.time()

rf_classifier_stfts = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=None, 
                                       min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, 
                                       max_features='sqrt', max_leaf_nodes=None, min_impurity_decrease=0.0, 
                                       bootstrap=False, oob_score=False, n_jobs=None, random_state=42, verbose=0, 
                                       warm_start=False, class_weight=None, ccp_alpha=0.0, max_samples=None, monotonic_cst=None)

rf_classifier_stfts.fit(train_features_stfts, train_labels_stfts)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_stfts_rf = rf_classifier_stfts.predict(val_features_stfts)
test_pred_stfts_rf = rf_classifier_stfts.predict(test_features_stfts)

test_accuracy_stfts_rf = accuracy_score(test_labels_stfts, test_pred_stfts_rf)
print(f"Random Forest (STFTs) - Test Accuracy: {test_accuracy_stfts_rf*100:.2f}%")


Training time: 169.32 seconds
Random Forest (STFTs) - Test Accuracy: 93.30%


STFTs - ET

In [None]:
start_time

et_classifier_stfts = ExtraTreesClassifier(n_estimators=100, random_state=42)
et_classifier_stfts.fit(train_features_stfts_scaled, train_labels_stfts)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_stfts_et = et_classifier_stfts.predict(val_features_stfts_scaled)
test_pred_stfts_et = et_classifier_stfts.predict(test_features_stfts_scaled)

test_accuracy_stfts_et = accuracy_score(test_labels_stfts, test_pred_stfts_et)
print(f"Extra Trees (STFTs) - Test Accuracy: {test_accuracy_stfts_et*100:.2f}%")


Training time: 180.13 seconds
Extra Trees (stfts) - Test Accuracy: 91.35%


# CQT

In [104]:
def compute_cqt_features(signal_in, sample_rate, hop_length=512, fmin=None, 
                         n_bins=84, bins_per_octave=12, apply_log=True):
    """
    Tính đặc trưng CQT của tín hiệu:
      - Sử dụng librosa.cqt để tính ma trận CQT, lấy giá trị magnitude.
      - (Tùy chọn) Áp dụng log scaling để giảm phạm vi giá trị.
      - Trung bình theo trục thời gian (axis=1) để thu được vector đặc trưng có kích thước (n_bins,).
      
    Nếu fmin không được chỉ định, librosa sẽ tự động sử dụng fmin mặc định.
    """
    cqt_matrix = np.abs(librosa.cqt(signal_in, sr=sample_rate, hop_length=hop_length,
                                    fmin=fmin, n_bins=n_bins, bins_per_octave=bins_per_octave))
    if apply_log:
        cqt_matrix = np.log(cqt_matrix + 1e-8)
    # Trung bình theo trục thời gian (tính trung bình các cột)
    features = np.mean(cqt_matrix, axis=1)
    return features


In [105]:
def load_cqt_features_from_directory(directory, sample_rate=22050, hop_length=512, 
                                     fmin=None, n_bins=84, bins_per_octave=12, output_dir=None, dataset_type='train'):
    # Kiểm tra nếu file đã tồn tại, thì load lại
    features_file = os.path.join(output_dir, f'cqt_features_{dataset_type}.pkl')
    labels_file = os.path.join(output_dir, f'cqt_labels_{dataset_type}.pkl')
    
    if os.path.exists(features_file) and os.path.exists(labels_file):
        print(f"Loading {dataset_type} data from .pkl files...")
        features = joblib.load(features_file)
        labels = joblib.load(labels_file)
    else:
        print(f"Extracting {dataset_type} data...")
        start_time = time.time()
        labels = []
        features = []
        
        for label in ['Queen', 'NonQueen']:
            path = os.path.join(directory, label)
            for file in os.listdir(path):
                file_path = os.path.join(path, file)
                signal, sr = librosa.load(file_path, sr=sample_rate)
                
                # Compute CQT features
                cqt_feature = compute_cqt_features(signal, sr, hop_length=hop_length, 
                                                   fmin=fmin, n_bins=n_bins, bins_per_octave=bins_per_octave, 
                                                   apply_log=True)
                
                features.append(cqt_feature)
                labels.append(label)

        features = np.array(features)
        labels = np.array(labels)
        
        # Lưu dữ liệu vào file .pkl nếu chưa tồn tại
        if output_dir:
            joblib.dump(features, features_file)
            joblib.dump(labels, labels_file)
        
        end_time = time.time()
        print(f"CQT extraction time: {end_time - start_time:.2f} seconds")
    
    return features, labels

train_features_cqt, train_labels_cqt = load_cqt_features_from_directory(train_path, sample_rate=22050,
                                                                        hop_length=512, fmin=None, n_bins=84, bins_per_octave=12, 
                                                                        output_dir=output_dir, dataset_type='train')
val_features_cqt, val_labels_cqt = load_cqt_features_from_directory(val_path, sample_rate=22050,
                                                                    hop_length=512, fmin=None, n_bins=84, bins_per_octave=12, 
                                                                    output_dir=output_dir, dataset_type='val')
test_features_cqt, test_labels_cqt = load_cqt_features_from_directory(test_path, sample_rate=22050,
                                                                      hop_length=512, fmin=None, n_bins=84, bins_per_octave=12, 
                                                                      output_dir=output_dir, dataset_type='test')

Extracting train data...
CQT extraction time: 591.23 seconds
Extracting val data...
CQT extraction time: 85.39 seconds
Extracting test data...
CQT extraction time: 172.54 seconds


In [107]:
scaler_cqt = StandardScaler()
train_features_cqt_scaled = scaler_cqt.fit_transform(train_features_cqt)
val_features_cqt_scaled   = scaler_cqt.transform(val_features_cqt)
test_features_cqt_scaled  = scaler_cqt.transform(test_features_cqt)

CQT - KNN

In [108]:
start_time = time.time()

knn_classifier_cqt = KNeighborsClassifier(n_neighbors=1, weights='uniform', algorithm='auto', leaf_size=30, p=2, 
                                      metric='minkowski', metric_params=None, n_jobs=None)

knn_classifier_cqt.fit(train_features_cqt_scaled, train_labels_cqt)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_cqt_knn = knn_classifier_cqt.predict(val_features_cqt_scaled)
test_pred_cqt_knn = knn_classifier_cqt.predict(test_features_cqt_scaled)

test_accuracy_cqt_knn = accuracy_score(test_labels_cqt, test_pred_cqt_knn)
print(f"KNN (CQT) - Test Accuracy: {test_accuracy_cqt_knn*100:.2f}%")


Training time: 0.01 seconds
KNN (CQT) - Test Accuracy: 97.52%


CQT - SVM

In [109]:
start_time = time.time()

svm_classifier_cqt = SVC(C=100, kernel='rbf', gamma='auto', random_state=42)
svm_classifier_cqt.fit(train_features_cqt_scaled, train_labels_cqt)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_cqt_svm = svm_classifier_cqt.predict(val_features_cqt_scaled)
test_pred_cqt_svm = svm_classifier_cqt.predict(test_features_cqt_scaled)

test_accuracy_cqt_svm = accuracy_score(test_labels_cqt, test_pred_cqt_svm)
print(f"SVM (CQT) - Test Accuracy: {test_accuracy_cqt_svm*100:.2f}%")


Training time: 7.56 seconds
SVM (CQT) - Test Accuracy: 96.85%


CQT - LR

In [110]:
start_time = time.time()

lr_classifier_cqt =LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=1.0, 
                                      fit_intercept=True, intercept_scaling=1, 
                                      class_weight=None, random_state=None, solver='lbfgs', 
                                      max_iter=500, multi_class='deprecated', verbose=0, 
                                      warm_start=False, n_jobs=None, l1_ratio=None)

lr_classifier_cqt.fit(train_features_cqt_scaled, train_labels_cqt)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_cqt_lr = lr_classifier_cqt.predict(val_features_cqt_scaled)
test_pred_cqt_lr = lr_classifier_cqt.predict(test_features_cqt_scaled)

test_accuracy_cqt_lr = accuracy_score(test_labels_cqt, test_pred_cqt_lr)
print(f"Logistic Regression (CQT) - Test Accuracy: {test_accuracy_cqt_lr*100:.2f}%")


Training time: 0.59 seconds
Logistic Regression (CQT) - Test Accuracy: 83.60%


CQT - RF

In [111]:
start_time = time.time()

rf_classifier_cqt = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=None, 
                                       min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, 
                                       max_features='sqrt', max_leaf_nodes=None, min_impurity_decrease=0.0, 
                                       bootstrap=False, oob_score=False, n_jobs=None, random_state=42, verbose=0, 
                                       warm_start=False, class_weight=None, ccp_alpha=0.0, max_samples=None, monotonic_cst=None)

rf_classifier_cqt.fit(train_features_cqt, train_labels_cqt)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_cqt_rf = rf_classifier_cqt.predict(val_features_cqt)
test_pred_cqt_rf = rf_classifier_cqt.predict(test_features_cqt)

test_accuracy_cqt_rf = accuracy_score(test_labels_cqt, test_pred_cqt_rf)
print(f"Random Forest (CQT) - Test Accuracy: {test_accuracy_cqt_rf*100:.2f}%")


Training time: 29.00 seconds
Random Forest (CQT) - Test Accuracy: 95.45%


CQT - ET

In [112]:
start_time = time.time()

et_classifier_cqt = ExtraTreesClassifier(n_estimators=200, criterion='gini', max_depth=30, min_samples_split=2, min_samples_leaf=1, 
                                     min_weight_fraction_leaf=0.0, max_features=None, max_leaf_nodes=None, min_impurity_decrease=0.0,
                                     bootstrap=False, oob_score=False, n_jobs=None, random_state=42, verbose=0, warm_start=False, 
                                     class_weight=None, ccp_alpha=0.0, max_samples=None, monotonic_cst=None)

et_classifier_cqt.fit(train_features_cqt_scaled, train_labels_cqt)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_cqt_et = et_classifier_cqt.predict(val_features_cqt_scaled)
test_pred_cqt_et = et_classifier_cqt.predict(test_features_cqt_scaled)

test_accuracy_cqt_et = accuracy_score(test_labels_cqt, test_pred_cqt_et)
print(f"Extra Trees (CQT) - Test Accuracy: {test_accuracy_cqt_et*100:.2f}%")

Training time: 30.03 seconds
Extra Trees (CQT) - Test Accuracy: 95.53%


# Chroma + Sprectral centroid

In [115]:
def compute_chroma_and_centroid_features(signal_in, sample_rate, n_fft=2048, hop_length=512, n_chroma=12):
    """
    Tính cả đặc trưng Chroma và Spectral Centroid của tín hiệu:
      - Sử dụng librosa.feature.chroma_stft để tính ma trận chroma.
      - Sử dụng librosa.feature.spectral_centroid để tính Spectral Centroid.
      - Trung bình theo trục thời gian (axis=1) để thu được vector đặc trưng cho Chroma.
      - Trung bình theo trục thời gian (axis=1) để thu được vector đặc trưng cho Spectral Centroid.
      - Trả về vector đặc trưng kết hợp (chroma + centroid).
    """
    # Tính đặc trưng Chroma
    chroma = librosa.feature.chroma_stft(y=signal_in, sr=sample_rate, n_fft=n_fft, 
                                           hop_length=hop_length, n_chroma=n_chroma)
    chroma_feature = np.mean(chroma, axis=1)

    # Tính Spectral Centroid
    centroid = librosa.feature.spectral_centroid(y=signal_in, sr=sample_rate, n_fft=n_fft, 
                                                 hop_length=hop_length)
    centroid_feature = np.mean(centroid, axis=1)

    # Kết hợp các đặc trưng vào một vector duy nhất
    combined_feature = np.concatenate((chroma_feature, centroid_feature), axis=0)
    return combined_feature

In [116]:
def load_combined_features_from_directory(directory, sample_rate=22050, n_fft=2048, hop_length=512, n_chroma=12, output_dir=None, dataset_type='train'):
    # Kiểm tra nếu file đã tồn tại, thì load lại
    features_file = os.path.join(output_dir, f'combined_features_{dataset_type}.pkl')
    labels_file = os.path.join(output_dir, f'combined_labels_{dataset_type}.pkl')
    
    if os.path.exists(features_file) and os.path.exists(labels_file):
        print(f"Loading {dataset_type} data from .pkl files...")
        features = joblib.load(features_file)
        labels = joblib.load(labels_file)
    else:
        print(f"Extracting {dataset_type} data...")
        start_time = time.time()
        labels = []
        features = []

        for label in ['Queen', 'NonQueen']:
            path = os.path.join(directory, label)
            for file in os.listdir(path):
                file_path = os.path.join(path, file)
                signal, sr = librosa.load(file_path, sr=sample_rate)
                
                # Tính toán các đặc trưng kết hợp (chroma + centroid)
                combined_feature = compute_chroma_and_centroid_features(signal, sr, n_fft=n_fft, 
                                                                        hop_length=hop_length, n_chroma=n_chroma)
                
                features.append(combined_feature)
                labels.append(label)

        features = np.array(features)
        labels = np.array(labels)
        
        # Lưu dữ liệu vào file .pkl nếu chưa tồn tại
        if output_dir:
            joblib.dump(features, features_file)
            joblib.dump(labels, labels_file)
        
        end_time = time.time()
        print(f"Combined features extraction time: {end_time - start_time:.2f} seconds")
    
    return features, labels

train_features_combined, train_labels_combined = load_combined_features_from_directory(train_path, sample_rate=22050, n_fft=2048, hop_length=512, n_chroma=12, output_dir=output_dir, dataset_type='train')
val_features_combined, val_labels_combined = load_combined_features_from_directory(val_path, sample_rate=22050, n_fft=2048, hop_length=512, n_chroma=12, output_dir=output_dir, dataset_type='val')
test_features_combined, test_labels_combined = load_combined_features_from_directory(test_path, sample_rate=22050, n_fft=2048, hop_length=512, n_chroma=12, output_dir=output_dir, dataset_type='test')

Extracting train data...
Combined features extraction time: 197.54 seconds
Extracting val data...
Combined features extraction time: 28.23 seconds
Extracting test data...
Combined features extraction time: 56.15 seconds


In [117]:
scaler_combined = StandardScaler()
train_features_combined_scaled = scaler_combined.fit_transform(train_features_combined)
val_features_combined_scaled   = scaler_combined.transform(val_features_combined)
test_features_combined_scaled  = scaler_combined.transform(test_features_combined)

Chroma + SC - KNN

In [126]:
start_time = time.time()

knn_classifier_combined = KNeighborsClassifier(n_neighbors=9, weights='uniform', algorithm='auto', leaf_size=30, p=2, 
                                      metric='minkowski', metric_params=None, n_jobs=None)

knn_classifier_combined.fit(train_features_combined_scaled, train_labels_combined)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_combined_knn = knn_classifier_combined.predict(val_features_combined_scaled)
test_pred_combined_knn = knn_classifier_combined.predict(test_features_combined_scaled)

test_accuracy_combined_knn = accuracy_score(test_labels_combined, test_pred_combined_knn)
print(f"KNN (combined) - Test Accuracy: {test_accuracy_combined_knn*100:.2f}%")

Training time: 0.03 seconds
KNN (combined) - Test Accuracy: 79.53%


Chroma + SC - SVM

In [141]:
start_time = time.time()

svm_classifier_combined = SVC(C=50, kernel='rbf', gamma='auto', random_state=42)
svm_classifier_combined.fit(train_features_combined_scaled, train_labels_combined)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_combined_svm = svm_classifier_combined.predict(val_features_combined_scaled)
test_pred_combined_svm = svm_classifier_combined.predict(test_features_combined_scaled)

test_accuracy_combined_svm = accuracy_score(test_labels_combined, test_pred_combined_svm)
print(f"SVM (combined) - Test Accuracy: {test_accuracy_combined_svm*100:.2f}%")

Training time: 14.54 seconds
SVM (combined) - Test Accuracy: 81.47%


Chroma + SC - LR

In [160]:
start_time = time.time()

lr_classifier_combined =LogisticRegression(penalty='l2', dual=False, tol=0.001, C=1, 
                                      fit_intercept=True, intercept_scaling=1, 
                                      class_weight=None, random_state=None, solver='lbfgs', 
                                      max_iter=500, multi_class='deprecated', verbose=0, 
                                      warm_start=False, n_jobs=None, l1_ratio=None)

lr_classifier_combined.fit(train_features_combined_scaled, train_labels_combined)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_combined_lr = lr_classifier_combined.predict(val_features_combined_scaled)
test_pred_combined_lr = lr_classifier_combined.predict(test_features_combined_scaled)

test_accuracy_combined_lr = accuracy_score(test_labels_combined, test_pred_combined_lr)
print(f"Logistic Regression (combined) - Test Accuracy: {test_accuracy_combined_lr*100:.2f}%")


Training time: 0.06 seconds
Logistic Regression (combined) - Test Accuracy: 62.62%


Chroma + SC - RF

In [163]:
start_time = time.time()

rf_classifier_combined = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=None, 
                                       min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, 
                                       max_features='sqrt', max_leaf_nodes=None, min_impurity_decrease=0.0, 
                                       bootstrap=False, oob_score=False, n_jobs=None, random_state=42, verbose=0, 
                                       warm_start=False, class_weight=None, ccp_alpha=0.0, max_samples=None, monotonic_cst=None)

rf_classifier_combined.fit(train_features_combined, train_labels_combined)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_combined_rf = rf_classifier_combined.predict(val_features_combined)
test_pred_combined_rf = rf_classifier_combined.predict(test_features_combined)

test_accuracy_combined_rf = accuracy_score(test_labels_combined, test_pred_combined_rf)
print(f"Random Forest (combined) - Test Accuracy: {test_accuracy_combined_rf*100:.2f}%")


Training time: 10.30 seconds
Random Forest (combined) - Test Accuracy: 81.27%


Chroma + SC - ET

In [169]:
start_time = time.time()

et_classifier_combined = ExtraTreesClassifier(n_estimators=300, criterion='gini', max_depth=30, min_samples_split=2, min_samples_leaf=1, 
                                     min_weight_fraction_leaf=0.0, max_features=None, max_leaf_nodes=None, min_impurity_decrease=0.0,
                                     bootstrap=False, oob_score=False, n_jobs=None, random_state=42, verbose=0, warm_start=False, 
                                     class_weight=None, ccp_alpha=0.0, max_samples=None, monotonic_cst=None)

et_classifier_combined.fit(train_features_combined_scaled, train_labels_combined)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_combined_et = et_classifier_combined.predict(val_features_combined_scaled)
test_pred_combined_et = et_classifier_combined.predict(test_features_combined_scaled)

test_accuracy_combined_et = accuracy_score(test_labels_combined, test_pred_combined_et)
print(f"Extra Trees (combined) - Test Accuracy: {test_accuracy_combined_et*100:.2f}%")

Training time: 10.98 seconds
Extra Trees (combined) - Test Accuracy: 81.90%


In [3]:
def compute_chroma_energy(signal_in, sample_rate, frame_size=0.025, frame_stride=0.01):
    stft = librosa.stft(signal_in, n_fft=int(frame_size * sample_rate), hop_length=int(frame_stride * sample_rate))
    chroma = librosa.feature.chroma_stft(S=np.abs(stft), sr=sample_rate)
    chroma_energy = np.sum(chroma**2, axis=1)
    return chroma_energy

def compute_rmse(signal_in, sample_rate=22050, frame_size=0.025, frame_stride=0.01):
    rmse = librosa.feature.rms(y=signal_in)
    return rmse[0]

def compute_zcr(signal_in, sample_rate=22050, frame_size=0.025, frame_stride=0.01):
    zcr = librosa.feature.zero_crossing_rate(signal_in, frame_length=int(frame_size * sample_rate), hop_length=int(frame_stride * sample_rate))
    return zcr[0]

def compute_spectral_flux(signal_in, sample_rate=22050, frame_size=0.025, frame_stride=0.01):
    stft = librosa.stft(signal_in, n_fft=int(frame_size * sample_rate), hop_length=int(frame_stride * sample_rate))
    mag_frames = np.abs(stft)
    flux = np.diff(mag_frames, axis=1)
    spectral_flux = np.sum(flux, axis=0)
    return spectral_flux

In [4]:
def extract_features(directory, sample_rate=22050, output_dir=None, dataset_type='train'):
    # Kiểm tra nếu file đã tồn tại, thì load lại
    features_file = os.path.join(output_dir, f'features_{dataset_type}.pkl')
    labels_file = os.path.join(output_dir, f'labels_{dataset_type}.pkl')
    data_file = os.path.join(output_dir, f'data_{dataset_type}.pkl')
    
    if os.path.exists(features_file) and os.path.exists(labels_file) and os.path.exists(data_file):
        print(f"Loading {dataset_type} data from .pkl files...")
        features = joblib.load(features_file)
        labels = joblib.load(labels_file)
        data = joblib.load(data_file)
    else:
        print(f"Extracting {dataset_type} data...")
        start_time = time.time()
        labels = []
        features = []
        data = []  # Thêm một biến để lưu dữ liệu đầu vào (có thể là tín hiệu, ví dụ như signal hoặc các thông tin khác)
        
        for label in ['Queen', 'NonQueen']:
            path = os.path.join(directory, label)
            for file in os.listdir(path):
                file_path = os.path.join(path, file)
                signal, sr = librosa.load(file_path, sr=sample_rate)
                
                # Tính toán các đặc trưng
                chroma_energy = compute_chroma_energy(signal, sr)
                rmse = compute_rmse(signal, sr)
                zcr = compute_zcr(signal, sr)
                spectral_flux = compute_spectral_flux(signal, sr)
                
                # Tính trung bình các đặc trưng nếu cần
                feature = np.hstack([chroma_energy, rmse, zcr, spectral_flux])
                
                features.append(feature)
                labels.append(label)
                data.append(signal)  # Lưu tín hiệu âm thanh gốc hoặc dữ liệu khác nếu cần
        
        features = np.array(features)
        labels = np.array(labels)
        data = np.array(data)  # Chuyển dữ liệu sang dạng numpy array nếu cần lưu
        
        if output_dir:
            # Lưu từng đối tượng riêng biệt cho train/val/test
            joblib.dump(features, features_file)
            joblib.dump(labels, labels_file)
            joblib.dump(data, data_file)  # Lưu tín hiệu âm thanh hoặc dữ liệu
        
        end_time = time.time()
        print(f"Feature extraction time: {end_time - start_time:.2f} seconds")
    
    return features, labels, data

train_features, train_labels, train_data = extract_features(train_path, output_dir=output_dir, dataset_type='train')
val_features, val_labels, val_data = extract_features(val_path, output_dir=output_dir, dataset_type='val')
test_features, test_labels, test_data = extract_features(test_path, output_dir=output_dir, dataset_type='test')


Loading train data from .pkl files...
Loading val data from .pkl files...
Loading test data from .pkl files...


In [5]:
scaler_cafe = StandardScaler()
train_features_cafe_scaled = scaler_cafe.fit_transform(train_features)
val_features_cafe_scaled   = scaler_cafe.transform(val_features)
test_features_cafe_scaled  = scaler_cafe.transform(test_features)

In [186]:
start_time = time.time()

knn_classifier_cafe = KNeighborsClassifier(n_neighbors=13, weights='uniform', algorithm='auto', leaf_size=30, p=2, 
                                      metric='minkowski', metric_params=None, n_jobs=None)

knn_classifier_cafe.fit(train_features_cafe_scaled, train_labels)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_cafe_knn = knn_classifier_cafe.predict(val_features_cafe_scaled)
test_pred_cafe_knn = knn_classifier_cafe.predict(test_features_cafe_scaled)

test_accuracy_cafe_knn = accuracy_score(test_labels, test_pred_cafe_knn)
print(f"KNN (cafe) - Test Accuracy: {test_accuracy_cafe_knn*100:.2f}%")

Training time: 0.02 seconds
KNN (cafe) - Test Accuracy: 63.45%


In [7]:
start_time = time.time()

svm_classifier_cafe = SVC(C=10, kernel='rbf', gamma='auto', random_state=42)
svm_classifier_cafe.fit(train_features_cafe_scaled, train_labels)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_cafe_svm = svm_classifier_cafe.predict(val_features_cafe_scaled)
test_pred_cafe_svm = svm_classifier_cafe.predict(test_features_cafe_scaled)

test_accuracy_cafe_svm = accuracy_score(test_labels, test_pred_cafe_svm)
print(f"SVM (cafe) - Test Accuracy: {test_accuracy_cafe_svm*100:.2f}%")

Training time: 273.19 seconds
SVM (cafe) - Test Accuracy: 76.53%


# FFT + CQT

In [51]:
def pre_emphasis(signal_in, pre_emph=0.97):
    """
    Áp dụng pre-emphasis để nhấn mạnh các tần số cao.
    
    Args:
        signal_in: Tín hiệu đầu vào
        pre_emph: Hệ số pre-emphasis, mặc định là 0.97
        
    Returns:
        Tín hiệu sau khi áp dụng pre-emphasis
    """
    return np.append(signal_in[0], signal_in[1:] - pre_emph * signal_in[:-1])

def framing(signal_in, sample_rate, frame_size=0.025, frame_stride=0.01):
    """
    Chia tín hiệu thành các frame có kích thước và bước nhảy xác định.
    """
    frame_length = int(round(frame_size * sample_rate))
    frame_step = int(round(frame_stride * sample_rate))
    signal_length = len(signal_in)
    num_frames = int(np.ceil(np.abs(signal_length - frame_length) / frame_step)) + 1

    pad_signal_length = num_frames * frame_step + frame_length
    z = np.zeros((pad_signal_length - signal_length))
    pad_signal = np.append(signal_in, z)

    indices = np.tile(np.arange(0, frame_length), (num_frames, 1)) + \
              np.tile(np.arange(0, num_frames * frame_step, frame_step), (frame_length, 1)).T
    frames = pad_signal[indices.astype(np.int32, copy=False)]
    return frames

def windowing(frames):
    """
    Áp dụng cửa sổ Hamming cho mỗi frame.
    """
    frame_length = frames.shape[1]
    hamming = np.hamming(frame_length)
    return frames * hamming

def fft_frames(frames, NFFT=512):
    """
    Tính FFT cho mỗi frame và lấy giá trị magnitude.
    """
    return np.absolute(np.fft.rfft(frames, NFFT))

def compute_fft_features(signal_in, sample_rate, frame_size=0.025, frame_stride=0.01, NFFT=512, apply_log=True):
    """
    Tính toán đặc trưng FFT cho tín hiệu âm thanh:
      - Pre-emphasis, Framing, Windowing.
      - Tính FFT cho từng frame và lấy giá trị magnitude.
      - Trung bình các frame để có vector đặc trưng ổn định.
      - (Tùy chọn) Áp dụng log để giảm phạm vi giá trị.
      
    Trả về: vector đặc trưng có kích thước (NFFT/2+1,).
    """
    emphasized_signal = pre_emphasis(signal_in)
    frames = framing(emphasized_signal, sample_rate, frame_size, frame_stride)
    windowed_frames = windowing(frames)
    mag_frames = fft_frames(windowed_frames, NFFT)
    fft_feature = np.mean(mag_frames, axis=0)  # Trung bình theo các frame
    if apply_log:
        fft_feature = np.log(fft_feature + 1e-8)  # Thêm epsilon để tránh log(0)
    return fft_feature

def compute_cqt_features(signal_in, sample_rate, fmin=None, 
                         n_bins=84, bins_per_octave=12, apply_log=True):
    """
    Tính đặc trưng CQT của tín hiệu:
      - Sử dụng librosa.cqt để tính ma trận CQT, lấy giá trị magnitude.
      - (Tùy chọn) Áp dụng log scaling để giảm phạm vi giá trị.
      - Trung bình theo trục thời gian (axis=1) để thu được vector đặc trưng có kích thước (n_bins,).
      
    Args:
        signal_in: Tín hiệu đầu vào
        sample_rate: Tần số lấy mẫu
        fmin: Tần số thấp nhất (Hz). Nếu None, librosa sẽ sử dụng mặc định
        n_bins: Số lượng bin tần số
        bins_per_octave: Số lượng bin cho mỗi quãng 8
        apply_log: Áp dụng logarit cho đặc trưng
        
    Returns:
        Vector đặc trưng CQT có kích thước (n_bins,)
    """
    cqt_matrix = np.abs(librosa.cqt(signal_in, sr=sample_rate,
                                    fmin=fmin, n_bins=n_bins, bins_per_octave=bins_per_octave))
    
    # Trung bình theo trục thời gian (tính trung bình các cột)
    cqt_feature = np.mean(cqt_matrix, axis=1)
    
    # Áp dụng logarithm (nếu cần)
    if apply_log:
        cqt_feature = np.log(cqt_feature + 1e-8)  # Thêm epsilon để tránh log(0)
    
    return cqt_feature

def compute_features(signal_in, sample_rate, frame_size=0.025,
                     NFFT=512,
                     fmin=None, n_bins=84, bins_per_octave=12, apply_log=True):
    """
    Tính toán và kết hợp đặc trưng FFT và CQT cho tín hiệu âm thanh.
    
    Args:
        signal_in: Tín hiệu đầu vào
        sample_rate: Tần số lấy mẫu
        frame_size: Kích thước frame cho FFT (giây hoặc số mẫu nếu > 1)
        NFFT: Số điểm FFT
        fmin: Tần số thấp nhất (Hz) cho CQT
        n_bins: Số lượng bin tần số cho CQT
        bins_per_octave: Số lượng bin cho mỗi quãng 8 cho CQT
        apply_log: Áp dụng logarit cho đặc trưng
        
    Returns:
        Vector đặc trưng kết hợp FFT và CQT
    """
    # Tính đặc trưng FFT
    fft_feature = compute_fft_features(
        signal_in, sample_rate, frame_size=frame_size,
        NFFT=NFFT,
        apply_log=apply_log
    )
    
    # Tính đặc trưng CQT
    cqt_feature = compute_cqt_features(
        signal_in, sample_rate, fmin=fmin,
        n_bins=n_bins, bins_per_octave=bins_per_octave,
        apply_log=apply_log
    )
    
    # Kết hợp hai đặc trưng
    combined_feature = np.concatenate((fft_feature, cqt_feature))
    
    return combined_feature

def load_features_from_directory(directory, sample_rate=22050, NFFT=512, 
                                 frame_size=0.025,
                                 output_dir=None, 
                                 dataset_type='train',
                                 fmin=None, n_bins=84, bins_per_octave=12):
    """
    Duyệt qua các file âm thanh trong thư mục và tính đặc trưng kết hợp FFT và CQT cho mỗi file.
    
    Args:
        directory: Thư mục chứa dữ liệu âm thanh
        sample_rate: Tần số lấy mẫu
        NFFT: Số điểm FFT
        frame_size: Kích thước frame (giây hoặc số mẫu nếu > 1)
        output_dir: Thư mục đầu ra để lưu đặc trưng
        dataset_type: Loại tập dữ liệu ('train', 'test', 'val')
        fmin: Tần số thấp nhất (Hz) cho CQT
        n_bins: Số lượng bin tần số cho CQT
        bins_per_octave: Số lượng bin cho mỗi quãng 8 cho CQT
        
    Returns:
        features: Mảng đặc trưng kết hợp
        labels: Nhãn tương ứng
    """
    # Kiểm tra nếu file đã tồn tại, thì load lại
    features_file = os.path.join(output_dir, f'combined_features2_{dataset_type}_nfft{NFFT}_bins{n_bins}.pkl')
    labels_file = os.path.join(output_dir, f'combined_labels2_{dataset_type}_nfft{NFFT}_bins{n_bins}.pkl')
    
    if os.path.exists(features_file) and os.path.exists(labels_file):
        print(f"Loading {dataset_type} data from .pkl files...")
        features = joblib.load(features_file)
        labels = joblib.load(labels_file)
    else:
        print(f"Extracting {dataset_type} data...")
        start_time = time.time()
        labels = []
        features = []
        
        for label in ['NonQueen', 'Queen']:
            path = os.path.join(directory, label)
            if not os.path.exists(path):
                print(f"Warning: Path {path} does not exist. Skipping.")
                continue
                
            for file in os.listdir(path):
                if not file.endswith(('.wav', '.mp3', '.flac', '.ogg')):
                    continue
                    
                file_path = os.path.join(path, file)
                try:
                    signal, sr = librosa.load(file_path, sr=sample_rate)
                    
                    # Tính toán đặc trưng kết hợp FFT và CQT
                    combined_feature = compute_features(
                        signal, sr, frame_size=frame_size,
                        NFFT=NFFT,
                        fmin=fmin, n_bins=n_bins, 
                        bins_per_octave=bins_per_octave
                    )
                    
                    features.append(combined_feature)
                    labels.append(label)
                except Exception as e:
                    print(f"Error processing file {file_path}: {str(e)}")
        
        features = np.array(features)
        labels = np.array(labels)
        
        # Lưu dữ liệu vào file .pkl nếu chưa tồn tại
        if output_dir:
            os.makedirs(output_dir, exist_ok=True)
            joblib.dump(features, features_file)
            joblib.dump(labels, labels_file)
        
        end_time = time.time()
        print(f"Feature extraction time: {end_time - start_time:.2f} seconds")
        print(f"Features shape: {features.shape}, Labels shape: {labels.shape}")
    
    return features, labels

# Trích xuất đặc trưng kết hợp cho tập dữ liệu
train_features, train_labels = load_features_from_directory(
    train_path, sample_rate=22050, NFFT=512, fmin=None, n_bins=96, bins_per_octave=12,
    output_dir=output_dir, dataset_type='train'
)

val_features, val_labels = load_features_from_directory(
    val_path, sample_rate=22050, NFFT=512, fmin=None, n_bins=96, bins_per_octave=12,
    output_dir=output_dir, dataset_type='val'
)

test_features, test_labels = load_features_from_directory(
    test_path, sample_rate=22050, NFFT=512, fmin=None, n_bins=96, bins_per_octave=12,
    output_dir=output_dir, dataset_type='test'
)

Extracting train data...
Feature extraction time: 685.76 seconds
Features shape: (14066, 353), Labels shape: (14066,)
Extracting val data...
Feature extraction time: 97.72 seconds
Features shape: (2010, 353), Labels shape: (2010,)
Extracting test data...
Feature extraction time: 192.34 seconds
Features shape: (4000, 353), Labels shape: (4000,)


In [52]:
train_features.shape[1]

353

In [53]:
scaler = StandardScaler()
train_features_scaled = scaler.fit_transform(train_features)
val_features_scaled   = scaler.transform(val_features)
test_features_scaled  = scaler.transform(test_features)

In [54]:
start_time = time.time()

knn_classifier = KNeighborsClassifier(n_neighbors=1)
knn_classifier.fit(train_features, train_labels)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_knn = knn_classifier.predict(val_features)
test_accuracy_knn = knn_classifier.predict(test_features)
print(f"KNN (FFT features) - Validation Accuracy: {accuracy_score(val_labels, val_pred_knn)*100:.2f}%")
print(f"KNN (FFT features) - Test Accuracy: {accuracy_score(test_labels, test_accuracy_knn)*100:.2f}%")

Training time: 0.02 seconds
KNN (FFT features) - Validation Accuracy: 96.97%
KNN (FFT features) - Test Accuracy: 97.15%


In [55]:
start_time = time.time()

knn_classifier = KNeighborsClassifier(n_neighbors=1)
knn_classifier.fit(train_features_scaled, train_labels)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_knn = knn_classifier.predict(val_features_scaled)
test_accuracy_knn = knn_classifier.predict(test_features_scaled)
print(f"KNN (FFT features) - Validation Accuracy: {accuracy_score(val_labels, val_pred_knn)*100:.2f}%")
print(f"KNN (FFT features) - Test Accuracy: {accuracy_score(test_labels, test_accuracy_knn)*100:.2f}%")

Training time: 0.01 seconds
KNN (FFT features) - Validation Accuracy: 96.87%
KNN (FFT features) - Test Accuracy: 97.05%


In [56]:
start_time = time.time()

svm_rbf_classifier = SVC(C=100, kernel='rbf', degree=3, gamma=0.1, coef0=0.0, shrinking=True, 
                         probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, 
                         max_iter=-1, decision_function_shape='ovr', break_ties=False, random_state=42)

X_combined = np.vstack((train_features, val_features))
y_combined = np.concatenate((train_labels, val_labels))

# Huấn luyện mô hình SVM trên dữ liệu kết hợp
svm_rbf_classifier.fit(X_combined, y_combined)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

# Đánh giá mô hình trên bộ testing
test_predictions_svm_rbf = svm_rbf_classifier.predict(test_features)
test_accuracy_svm = accuracy_score(test_labels, test_predictions_svm_rbf)
print(f"Test Accuracy (SVM with RBF Kernel): {test_accuracy_svm * 100:.2f}%")

Training time: 81.22 seconds
Test Accuracy (SVM with RBF Kernel): 98.60%


In [57]:
start_time = time.time()

svm_rbf_classifier = SVC(C=100, kernel='rbf', degree=3, gamma=0.1, coef0=0.0, shrinking=True, 
                         probability=False, tol=0.001, cache_size=200, class_weight=None, verbose=False, 
                         max_iter=-1, decision_function_shape='ovr', break_ties=False, random_state=42)

# Huấn luyện mô hình SVM
svm_rbf_classifier.fit(train_features_scaled, train_labels)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

# Đánh giá mô hình trên bộ validation
val_predictions_svm_rbf = svm_rbf_classifier.predict(val_features_scaled)
val_accuracy_svm_rbf = accuracy_score(val_labels, val_predictions_svm_rbf)
print(f"Validation Accuracy (SVM with RBF Kernel): {val_accuracy_svm_rbf * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_svm_rbf = svm_rbf_classifier.predict(test_features_scaled)
scale_test_accuracy_svm = accuracy_score(test_labels, test_predictions_svm_rbf)
print(f"Test Accuracy (SVM with RBF Kernel): {scale_test_accuracy_svm * 100:.2f}%")

Training time: 115.15 seconds
Validation Accuracy (SVM with RBF Kernel): 97.46%
Test Accuracy (SVM with RBF Kernel): 98.00%


In [58]:
start_time = time.time()

lr_classifier = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=10, fit_intercept=True, intercept_scaling=1, 
                                   class_weight=None, random_state=42, solver='lbfgs', max_iter=1500, multi_class='deprecated', 
                                   verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)
lr_classifier.fit(train_features, train_labels)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_lr = lr_classifier.predict(val_features)
test_accuracy_lr = lr_classifier.predict(test_features)
print(f"Logistic Regression (FFT features) - Validation Accuracy: {accuracy_score(val_labels, val_pred_lr)*100:.2f}%")
print(f"Logistic Regression (FFT features) - Test Accuracy: {accuracy_score(test_labels, test_accuracy_lr)*100:.2f}%")

Training time: 9.04 seconds
Logistic Regression (FFT features) - Validation Accuracy: 88.51%
Logistic Regression (FFT features) - Test Accuracy: 88.08%


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [59]:
start_time = time.time()

lr_classifier = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=10, fit_intercept=True, intercept_scaling=1, 
                                   class_weight=None, random_state=42, solver='saga', max_iter=1000, multi_class='deprecated', 
                                   verbose=0, warm_start=False, n_jobs=None, l1_ratio=None)

lr_classifier.fit(train_features_scaled, train_labels)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_lr = lr_classifier.predict(val_features_scaled)
scale_test_accuracy_lr = lr_classifier.predict(test_features_scaled)
print(f"Logistic Regression (FFT features) - Validation Accuracy: {accuracy_score(val_labels, val_pred_lr)*100:.2f}%")
print(f"Logistic Regression (FFT features) - Test Accuracy: {accuracy_score(test_labels, scale_test_accuracy_lr)*100:.2f}%")

Training time: 85.66 seconds
Logistic Regression (FFT features) - Validation Accuracy: 88.21%
Logistic Regression (FFT features) - Test Accuracy: 88.67%




In [60]:
start_time = time.time()

rf_classifier = RandomForestClassifier(n_estimators=100, criterion='gini', max_depth=None, min_samples_split=2, 
                                       min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='sqrt', 
                                       max_leaf_nodes=None, min_impurity_decrease=0.0, bootstrap=True, oob_score=False, 
                                       n_jobs=None, random_state=42, verbose=0, warm_start=False, class_weight=None, 
                                       ccp_alpha=0.0, max_samples=None, monotonic_cst=None)

rf_classifier.fit(train_features, train_labels)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

val_pred_rf = rf_classifier.predict(val_features)
test_accuracy_rf = rf_classifier.predict(test_features)
print(f"Random Forest (FFT features) - Validation Accuracy: {accuracy_score(val_labels, val_pred_rf)*100:.2f}%")
print(f"Random Forest (FFT features) - Test Accuracy: {accuracy_score(test_labels, test_accuracy_rf)*100:.2f}%")

Training time: 42.03 seconds
Random Forest (FFT features) - Validation Accuracy: 94.63%
Random Forest (FFT features) - Test Accuracy: 95.12%


In [61]:
start_time = time.time()

et_classifier = ExtraTreesClassifier(n_estimators=200, criterion='gini', max_depth=30, min_samples_split=2, min_samples_leaf=1, 
                                     min_weight_fraction_leaf=0.0, max_features=None, max_leaf_nodes=None, min_impurity_decrease=0.0,
                                     bootstrap=False, oob_score=False, n_jobs=None, random_state=42, verbose=0, warm_start=False, 
                                     class_weight=None, ccp_alpha=0.0, max_samples=None, monotonic_cst=None)

et_classifier.fit(train_features, train_labels)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

# Đánh giá mô hình trên bộ validation
val_predictions_et = et_classifier.predict(val_features)
val_accuracy_et = accuracy_score(val_labels, val_predictions_et)
print(f"Validation Accuracy (Extra Trees): {val_accuracy_et * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_et = et_classifier.predict(test_features)
test_accuracy_et = accuracy_score(test_labels, test_predictions_et)
print(f"Test Accuracy (Extra Trees): {test_accuracy_et * 100:.2f}%")

Training time: 254.77 seconds
Validation Accuracy (Extra Trees): 95.92%
Test Accuracy (Extra Trees): 96.55%


In [62]:
start_time = time.time()

et_classifier = ExtraTreesClassifier(n_estimators=200, criterion='gini', max_depth=30, min_samples_split=2, min_samples_leaf=1, 
                                     min_weight_fraction_leaf=0.0, max_features=None, max_leaf_nodes=None, min_impurity_decrease=0.0,
                                     bootstrap=False, oob_score=False, n_jobs=None, random_state=42, verbose=0, warm_start=False, 
                                     class_weight=None, ccp_alpha=0.0, max_samples=None, monotonic_cst=None)

# Huấn luyện mô hình Extra Trees với dữ liệu đã chuẩn hóa
et_classifier.fit(train_features_scaled, train_labels)

end_time = time.time()
print(f"Training time: {end_time - start_time:.2f} seconds")

# Đánh giá mô hình trên bộ validation
val_predictions_et = et_classifier.predict(val_features_scaled)
val_accuracy_et = accuracy_score(val_labels, val_predictions_et)
print(f"Validation Accuracy (Extra Trees): {val_accuracy_et * 100:.2f}%")

# Đánh giá mô hình trên bộ testing
test_predictions_et = et_classifier.predict(test_features_scaled)
scale_test_accuracy_et = accuracy_score(test_labels, test_predictions_et)
print(f"Test Accuracy (Extra Trees): {scale_test_accuracy_et * 100:.2f}%")



Training time: 157.91 seconds
Validation Accuracy (Extra Trees): 95.97%
Test Accuracy (Extra Trees): 96.58%


In [63]:
from sklearn.ensemble import GradientBoostingClassifier

start_time = time.time()

gb_classifier = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, 
                            max_depth=3, min_samples_split=2,
                            min_samples_leaf=1, subsample=1.0,
                            max_features='sqrt', random_state=42)

gb_classifier.fit(train_features_scaled, train_labels)

end_time = time.time()
print(f"Training time (Gradient Boosting): {end_time - start_time:.2f} seconds")

val_predictions_gb = gb_classifier.predict(val_features_scaled)
val_accuracy_gb = accuracy_score(val_labels, val_predictions_gb)
print(f"Validation Accuracy (Gradient Boosting): {val_accuracy_gb * 100:.2f}%")

test_predictions_gb = gb_classifier.predict(test_features_scaled)
test_accuracy_gb = accuracy_score(test_labels, test_predictions_gb)
print(f"Test Accuracy (Gradient Boosting): {test_accuracy_gb * 100:.2f}%")
print("-" * 50)

Training time (Gradient Boosting): 14.44 seconds
Validation Accuracy (Gradient Boosting): 90.00%
Test Accuracy (Gradient Boosting): 91.55%
--------------------------------------------------


In [64]:
from sklearn.preprocessing import LabelEncoder
import time
import xgboost as xgb
from sklearn.metrics import accuracy_score

# Chuyển đổi các nhãn từ chuỗi sang số
le = LabelEncoder()
train_labels_enc = le.fit_transform(train_labels)
val_labels_enc = le.transform(val_labels)
test_labels_enc = le.transform(test_labels)

start_time = time.time()

xgb_classifier = xgb.XGBClassifier(
    n_estimators=200,
    max_depth=30,
    random_state=42,
    use_label_encoder=False,    # Tắt cảnh báo về label encoder
    eval_metric='logloss'       # Chỉ định hàm mất mát
)
xgb_classifier.fit(train_features, train_labels_enc)

end_time = time.time()
print(f"Training time (XGBoost): {end_time - start_time:.2f} seconds")

# Đánh giá trên bộ validation
val_predictions_xgb = xgb_classifier.predict(val_features)
val_accuracy_xgb = accuracy_score(val_labels_enc, val_predictions_xgb)
print(f"Validation Accuracy (XGBoost): {val_accuracy_xgb * 100:.2f}%")

# Đánh giá trên bộ testing
test_predictions_xgb = xgb_classifier.predict(test_features)
test_accuracy_xgb = accuracy_score(test_labels_enc, test_predictions_xgb)
print(f"Test Accuracy (XGBoost): {test_accuracy_xgb * 100:.2f}%")


Parameters: { "use_label_encoder" } are not used.



Training time (XGBoost): 37.49 seconds
Validation Accuracy (XGBoost): 96.92%
Test Accuracy (XGBoost): 97.12%


In [65]:
import lightgbm as lgb
import time
from sklearn.metrics import accuracy_score

start_time = time.time()

lgbm_classifier = lgb.LGBMClassifier(
    n_estimators=200,
    max_depth=30,
    random_state=42
)
lgbm_classifier.fit(train_features, train_labels)

end_time = time.time()
print(f"\nTraining time (LightGBM): {end_time - start_time:.2f} seconds")

# Đánh giá trên bộ validation
val_predictions_lgbm = lgbm_classifier.predict(val_features)
val_accuracy_lgbm = accuracy_score(val_labels, val_predictions_lgbm)
print(f"Validation Accuracy (LightGBM): {val_accuracy_lgbm * 100:.2f}%")

# Đánh giá trên bộ testing
test_predictions_lgbm = lgbm_classifier.predict(test_features)
test_accuracy_lgbm = accuracy_score(test_labels, test_predictions_lgbm)
print(f"Test Accuracy (LightGBM): {test_accuracy_lgbm * 100:.2f}%")


[LightGBM] [Info] Number of positive: 7000, number of negative: 7066
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.048210 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 90015
[LightGBM] [Info] Number of data points in the train set: 14066, number of used features: 353
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.497654 -> initscore=-0.009384
[LightGBM] [Info] Start training from score -0.009384

Training time (LightGBM): 2.54 seconds
Validation Accuracy (LightGBM): 97.21%
Test Accuracy (LightGBM): 97.10%
