In [1]:
import os
import numpy as np
import librosa
from tqdm import tqdm  # Import tqdm for progress bars

# Feature extraction function
def extract_features(fake_root_dirs, real_root_dir, max_length=500):
    features = []
    labels = []
    
    def process_audio_file(file_path, label):
        try:
            # Load audio file
            audio, _ = librosa.load(file_path, sr=16000)
            # Extract features (Mel-Frequency Cepstral Coefficients)
            mfccs = librosa.feature.mfcc(y=audio, sr=16000, n_mfcc=40)
            # Pad or trim to the fixed length
            if mfccs.shape[1] < max_length:
                mfccs = np.pad(mfccs, ((0, 0), (0, max_length - mfccs.shape[1])), mode='constant')
            else:
                mfccs = mfccs[:, :max_length]
            features.append(mfccs)
            labels.append(label)
        except Exception as e:
            print(f"Error with file: {file_path} -> {str(e)}")

    # Process fake files from multiple directories with progress bar
    for fake_root_dir in fake_root_dirs:
        for file in tqdm(os.listdir(fake_root_dir), desc=f'Processing fake files from {fake_root_dir}'):
            file_path = os.path.join(fake_root_dir, file)
            process_audio_file(file_path, 1)  # Label 1 for fake

    # Process real files with progress bar
    for file in tqdm(os.listdir(real_root_dir), desc='Processing real files'):
        file_path = os.path.join(real_root_dir, file)
        process_audio_file(file_path, 0)  # Label 0 for real

    return np.array(features), np.array(labels)


In [2]:
# Specify fake and real directories
fake_root_dirs = [
    '/kaggle/input/wavefake-test/generated_audio/ljspeech_melgan',
    '/kaggle/input/wavefake-test/generated_audio/ljspeech_melgan_large',
    '/kaggle/input/wavefake-test/generated_audio/ljspeech_waveglow'
]
real_root_dir = '/kaggle/input/the-lj-speech-dataset/LJSpeech-1.1/wavs'

# Extract features for fake and real audio
x, y = extract_features(fake_root_dirs, real_root_dir)

# Flatten the 2D MFCC arrays for XGBoost (XGBoost expects 2D input)
x = x.reshape(x.shape[0], -1)

# Model Training and Evaluation Code
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, classification_report

# Split the data with 30% for testing
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.3, random_state=42)

# Initialize XGBoost classifier
model = XGBClassifier(n_estimators=100, max_depth=6, learning_rate=0.1, random_state=42)

# Train the model
model.fit(xtrain, ytrain)

# Predict on the test set
ypred = model.predict(xtest)

# Evaluate the model
accuracy = accuracy_score(ytest, ypred)
print(f"Accuracy: {accuracy:.4f}")

# Detailed classification report
print(classification_report(ytest, ypred))


Processing fake files from /kaggle/input/wavefake-test/generated_audio/ljspeech_melgan: 100%|██████████| 13100/13100 [07:58<00:00, 27.39it/s]
Processing fake files from /kaggle/input/wavefake-test/generated_audio/ljspeech_melgan_large: 100%|██████████| 13100/13100 [08:05<00:00, 26.97it/s]
Processing fake files from /kaggle/input/wavefake-test/generated_audio/ljspeech_waveglow: 100%|██████████| 13100/13100 [08:08<00:00, 26.84it/s]
Processing real files: 100%|██████████| 13100/13100 [08:21<00:00, 26.14it/s]


Accuracy: 0.7789
              precision    recall  f1-score   support

           0       0.83      0.14      0.24      3904
           1       0.78      0.99      0.87     11816

    accuracy                           0.78     15720
   macro avg       0.80      0.56      0.55     15720
weighted avg       0.79      0.78      0.71     15720



In [4]:
import pickle

# Save the model to a file
with open('xgboost_model.pkl', 'wb') as f:
    pickle.dump(model, f)


In [5]:
# Load the model from the file
with open('xgboost_model.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

# Use the loaded model to make predictions
ypred_loaded = loaded_model.predict(xtest)
