In [1]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report

# Feature extraction function
def extract_features(fake_root_dirs, real_root_dir, max_length=500):
    features = []
    labels = []
    
    def process_audio_file(file_path, label):
        try:
            # Load audio file
            audio, _ = librosa.load(file_path, sr=16000)
            # Extract features (Mel-Frequency Cepstral Coefficients)
            mfccs = librosa.feature.mfcc(y=audio, sr=16000, n_mfcc=40)
            # Pad or trim to the fixed length
            if mfccs.shape[1] < max_length:
                mfccs = np.pad(mfccs, ((0, 0), (0, max_length - mfccs.shape[1])), mode='constant')
            else:
                mfccs = mfccs[:, :max_length]
            features.append(mfccs)
            labels.append(label)
        except Exception as e:
            print(f"Error with file: {file_path} -> {str(e)}")

    # Process fake files from multiple directories
    for fake_root_dir in fake_root_dirs:
        for file in os.listdir(fake_root_dir):
            file_path = os.path.join(fake_root_dir, file)
            process_audio_file(file_path, 1)  # Label 1 for fake

    # Process real files
    for file in os.listdir(real_root_dir):
        file_path = os.path.join(real_root_dir, file)
        process_audio_file(file_path, 0)  # Label 0 for real

    return np.array(features), np.array(labels)

# Specify fake and real directories
fake_root_dirs = [
    '/kaggle/input/wavefake-test/generated_audio/ljspeech_melgan',
    '/kaggle/input/wavefake-test/generated_audio/ljspeech_melgan_large',
    '/kaggle/input/wavefake-test/generated_audio/ljspeech_waveglow'
]
real_root_dir = '/kaggle/input/the-lj-speech-dataset/LJSpeech-1.1/wavs'

# Extract features for fake and real audio
x, y = extract_features(fake_root_dirs, real_root_dir)

# Flatten the 2D MFCC arrays for Random Forest (Random Forest expects 2D input)
x = x.reshape(x.shape[0], -1)

# Split the data with 30% for testing
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.3, random_state=42)

# Initialize Random Forest classifier
model = RandomForestClassifier(n_estimators=100, max_depth=6, random_state=42)

# Train the model
model.fit(xtrain, ytrain)

# Predict on the test set
ypred = model.predict(xtest)

# Evaluate the model
accuracy = accuracy_score(ytest, ypred)
print(f"Accuracy: {accuracy:.4f}")

# Detailed classification report
print(classification_report(ytest, ypred))


Accuracy: 0.7517
              precision    recall  f1-score   support

           0       0.00      0.00      0.00      3904
           1       0.75      1.00      0.86     11816

    accuracy                           0.75     15720
   macro avg       0.38      0.50      0.43     15720
weighted avg       0.56      0.75      0.65     15720



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [4]:
import joblib  # Ensure joblib is imported# Save the trained model using joblib
joblib.dump(model, '/kaggle/working/random_forest_model.pkl')

['/kaggle/working/random_forest_model.pkl']

In [1]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import gc

# Feature extraction function with log-mel spectrograms
def extract_features(fake_root_dirs, real_root_dir, max_length=300, n_mels=40, sample_rate=8000, batch_size=32):
    features = []
    labels = []
    
    def process_audio_file(file_path, label):
        try:
            # Load audio file
            audio, _ = librosa.load(file_path, sr=sample_rate)
            # Extract log-mel spectrogram
            mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_mels=n_mels)
            log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
            # Pad or trim to the fixed length
            if log_mel_spectrogram.shape[1] < max_length:
                log_mel_spectrogram = np.pad(log_mel_spectrogram, ((0, 0), (0, max_length - log_mel_spectrogram.shape[1])), mode='constant')
            else:
                log_mel_spectrogram = log_mel_spectrogram[:, :max_length]
            features.append(log_mel_spectrogram)
            labels.append(label)
        except Exception as e:
            print(f"Error with file: {file_path} -> {str(e)}")

    # Process fake files from multiple directories in batches
    for fake_root_dir in fake_root_dirs:
        files = os.listdir(fake_root_dir)
        for i in range(0, len(files), batch_size):
            batch_files = files[i:i + batch_size]
            for file in batch_files:
                file_path = os.path.join(fake_root_dir, file)
                process_audio_file(file_path, 1)  # Label 1 for fake

    # Process real files in batches
    real_files = os.listdir(real_root_dir)
    for i in range(0, len(real_files), batch_size):
        batch_files = real_files[i:i + batch_size]
        for file in batch_files:
            file_path = os.path.join(real_root_dir, file)
            process_audio_file(file_path, 0)  # Label 0 for real

    # Convert features and labels to numpy arrays
    return np.array(features), np.array(labels)

# Specify fake and real directories
fake_root_dirs = [
    '/kaggle/input/wavefake-test/generated_audio/ljspeech_melgan',
    '/kaggle/input/wavefake-test/generated_audio/ljspeech_melgan_large',
    '/kaggle/input/wavefake-test/generated_audio/ljspeech_waveglow'
]
real_root_dir = '/kaggle/input/the-lj-speech-dataset/LJSpeech-1.1/wavs'

# Extract features for fake and real audio
x, y = extract_features(fake_root_dirs, real_root_dir)

# Flatten the 2D log-mel spectrogram arrays for Random Forest
x = x.reshape(x.shape[0], -1)

# Split the data with 30% for testing
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.3, random_state=42)

# Initialize Random Forest classifier
model = RandomForestClassifier(n_estimators=100, max_depth=6, random_state=42)

# Train the model
model.fit(xtrain, ytrain)

# Predict on the test set
ypred = model.predict(xtest)

# Evaluate the model
accuracy = accuracy_score(ytest, ypred)
print(f"Accuracy: {accuracy:.4f}")

# Detailed classification report
print(classification_report(ytest, ypred))

# Free up memory
gc.collect()


Accuracy: 0.7517
              precision    recall  f1-score   support

           0       0.00      0.00      0.00      3904
           1       0.75      1.00      0.86     11816

    accuracy                           0.75     15720
   macro avg       0.38      0.50      0.43     15720
weighted avg       0.56      0.75      0.65     15720



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


0

In [2]:
import joblib  # Ensure joblib is imported# Save the trained model using joblib
joblib.dump(model, '/kaggle/working/random_forest_model.pkl')

['/kaggle/working/random_forest_model.pkl']

In [3]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import gc

# Data augmentation function
def add_noise(audio, noise_factor=0.005):
    noise = np.random.randn(len(audio))
    augmented_data = audio + noise_factor * noise
    return np.clip(augmented_data, -1.0, 1.0)

# Feature extraction function with log-mel spectrograms
def extract_features(fake_root_dirs, real_root_dir, max_length=300, n_mels=40, sample_rate=8000, batch_size=32):
    features = []
    labels = []
    
    def process_audio_file(file_path, label):
        try:
            # Load audio file
            audio, _ = librosa.load(file_path, sr=sample_rate)
            audio = add_noise(audio)  # Add noise for augmentation
            
            # Extract log-mel spectrogram
            mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_mels=n_mels)
            log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)

            # Pad or trim to the fixed length
            if log_mel_spectrogram.shape[1] < max_length:
                log_mel_spectrogram = np.pad(log_mel_spectrogram, ((0, 0), (0, max_length - log_mel_spectrogram.shape[1])), mode='constant')
            else:
                log_mel_spectrogram = log_mel_spectrogram[:, :max_length]
            
            features.append(log_mel_spectrogram)
            labels.append(label)
        except Exception as e:
            print(f"Error with file: {file_path} -> {str(e)}")

    # Process fake files from multiple directories in batches
    for fake_root_dir in fake_root_dirs:
        files = os.listdir(fake_root_dir)
        for i in range(0, len(files), batch_size):
            batch_files = files[i:i + batch_size]
            for file in batch_files:
                file_path = os.path.join(fake_root_dir, file)
                process_audio_file(file_path, 1)  # Label 1 for fake

    # Process real files in batches
    real_files = os.listdir(real_root_dir)
    for i in range(0, len(real_files), batch_size):
        batch_files = real_files[i:i + batch_size]
        for file in batch_files:
            file_path = os.path.join(real_root_dir, file)
            process_audio_file(file_path, 0)  # Label 0 for real

    return np.array(features), np.array(labels)

# Specify fake and real directories
fake_root_dirs = [
    '/kaggle/input/wavefake-test/generated_audio/ljspeech_melgan',
    '/kaggle/input/wavefake-test/generated_audio/ljspeech_melgan_large',
    '/kaggle/input/wavefake-test/generated_audio/ljspeech_waveglow'
]
real_root_dir = '/kaggle/input/the-lj-speech-dataset/LJSpeech-1.1/wavs'

# Extract features for fake and real audio
x, y = extract_features(fake_root_dirs, real_root_dir)

# Flatten the 2D log-mel spectrogram arrays for Random Forest
x = x.reshape(x.shape[0], -1)

# Split the data with 30% for testing
xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.3, random_state=42)

# Initialize Random Forest classifier
model = RandomForestClassifier(n_estimators=200, max_depth=10, random_state=42)  # Adjusted parameters

# Train the model
model.fit(xtrain, ytrain)

# Predict on the test set
ypred = model.predict(xtest)

# Evaluate the model
accuracy = accuracy_score(ytest, ypred)
print(f"Accuracy: {accuracy:.4f}")

# Detailed classification report
print(classification_report(ytest, ypred))

# Free up memory
gc.collect()


Accuracy: 0.7513
              precision    recall  f1-score   support

           0       0.00      0.00      0.00      3904
           1       0.75      1.00      0.86     11816

    accuracy                           0.75     15720
   macro avg       0.38      0.50      0.43     15720
weighted avg       0.56      0.75      0.64     15720



54

In [4]:
import joblib  # Ensure joblib is imported# Save the trained model using joblib
joblib.dump(model, '/kaggle/working/random_forest_model.pkl')

['/kaggle/working/random_forest_model.pkl']

In [1]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import soundfile as sf
import gc


In [2]:
def add_noise(audio, noise_factor=0.005):
    noise = np.random.randn(len(audio))
    augmented_data = audio + noise_factor * noise
    return np.clip(augmented_data, -1.0, 1.0)


In [3]:
def preprocess_audio_file(file_path, sample_rate=8000):  # Reduced sample rate
    try:
        if os.path.getsize(file_path) == 0:  # Skip 0-bit files
            return None
        audio, sr = sf.read(file_path)
        if len(audio) < sample_rate:  # Zero-pad if less than sample_rate
            audio = np.pad(audio, (0, sample_rate - len(audio)), mode='constant')
        return audio
    except Exception as e:
        print(f"Error with file: {file_path} -> {str(e)}")
        return None


In [7]:
# Extract features
def extract_features_combined(fake_root_dirs, real_root_dir, max_length=300, n_mels=40, sample_rate=8000, batch_size=16):
    features = []
    labels = []

    def process_audio_file(file_path, label):
        audio = preprocess_audio_file(file_path, sample_rate)
        if audio is None:  # Skip invalid or empty files
            return

        # Add noise for data augmentation
        audio = add_noise(audio)

        # Extract features
        mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=sample_rate, n_mels=n_mels)
        log_mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)
        spectrogram = np.abs(librosa.stft(audio))
        chroma = librosa.feature.chroma_stft(y=audio, sr=sample_rate)
        
        # Adjusted parameters for spectral_contrast
        spectral_contrast = librosa.feature.spectral_contrast(y=audio, sr=sample_rate, n_bands=4, fmin=50.0)

        # Padding or trimming the features to match max_length
        def pad_or_trim(feature):
            if feature.shape[1] < max_length:
                return np.pad(feature, ((0, 0), (0, max_length - feature.shape[1])), mode='constant')
            else:
                return feature[:, :max_length]

        log_mel_spectrogram = pad_or_trim(log_mel_spectrogram)
        spectrogram = pad_or_trim(spectrogram)
        chroma = pad_or_trim(chroma)
        spectral_contrast = pad_or_trim(spectral_contrast)

        # Concatenate all features
        combined_features = np.concatenate((log_mel_spectrogram, spectrogram, chroma, spectral_contrast), axis=0)
        features.append(combined_features.astype(np.float32))  # Use float32
        labels.append(label)

    # Process fake files from multiple directories
    for fake_root_dir in fake_root_dirs:
        files = list(set(os.listdir(fake_root_dir)))  # Remove duplicates
        for i in range(0, len(files), batch_size):
            batch_files = files[i:i + batch_size]
            for file in batch_files:
                file_path = os.path.join(fake_root_dir, file)
                process_audio_file(file_path, 1)  # Label 1 for fake

    # Process real files
    real_files = list(set(os.listdir(real_root_dir)))  # Remove duplicates
    for i in range(0, len(real_files), batch_size):
        batch_files = real_files[i:i + batch_size]
        for file in batch_files:
            file_path = os.path.join(real_root_dir, file)
            process_audio_file(file_path, 0)  # Label 0 for real

    return np.array(features), np.array(labels)


In [8]:
def train_and_evaluate_model(x, y):
    # Flatten the features
    x = x.reshape(x.shape[0], -1)

    # Normalize data
    scaler = StandardScaler()
    x = scaler.fit_transform(x)

    # Split data
    xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.3, random_state=42)

    # Define model and parameter grid
    param_dist = {
        'n_estimators': [100, 200],
        'max_depth': [None, 10],
        'min_samples_split': [2, 5],
        'min_samples_leaf': [1, 2]
    }

    # RandomizedSearchCV
    random_search = RandomizedSearchCV(RandomForestClassifier(random_state=42), param_distributions=param_dist, n_iter=5, cv=3, n_jobs=-1)

    # Train model
    random_search.fit(xtrain, ytrain)

    # Get best model
    best_model = random_search.best_estimator_

    # Predict
    ypred = best_model.predict(xtest)

    # Evaluate
    accuracy = accuracy_score(ytest, ypred)
    print(f"Accuracy: {accuracy:.4f}")
    print(classification_report(ytest, ypred))

    # Free up memory
    del x, y, xtrain, xtest, ytrain, ytest, ypred, best_model, random_search
    gc.collect()


In [None]:
if __name__ == "__main__":
    # Specify directories
    fake_root_dirs = [
        '/kaggle/input/wavefake-test/generated_audio/ljspeech_melgan',
        '/kaggle/input/wavefake-test/generated_audio/ljspeech_melgan_large',
        '/kaggle/input/wavefake-test/generated_audio/ljspeech_waveglow'
    ]
    real_root_dir = '/kaggle/input/the-lj-speech-dataset/LJSpeech-1.1/wavs'

    # Extract features
    x, y = extract_features_combined(fake_root_dirs, real_root_dir)

    # Train and evaluate model
    train_and_evaluate_model(x, y)


In [1]:
import os
import numpy as np
import librosa
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier  # Import Random Forest
from sklearn.metrics import accuracy_score, classification_report
import gc
from concurrent.futures import ProcessPoolExecutor

# Function to process a single audio file
def process_audio_file(file_info, max_length=100):
    file_path, label = file_info
    try:
        audio, _ = librosa.load(file_path, sr=4000)  # Downsample to 4000 Hz
        stft = librosa.stft(audio, n_fft=256, hop_length=128)  # Short-Time Fourier Transform
        spectrogram = np.abs(stft)

        if spectrogram.shape[1] < max_length:
            spectrogram = np.pad(spectrogram, ((0, 0), (0, max_length - spectrogram.shape[1])), mode='constant')
        else:
            spectrogram = spectrogram[:, :max_length]

        return spectrogram.astype(np.float32), label
    except Exception as e:
        print(f"Error with file: {file_path} -> {str(e)}")
        return None, None

# Generator to yield audio files and labels
def audio_file_generator(fake_root_dirs, real_root_dir):
    for fake_root_dir in fake_root_dirs:
        for file in os.listdir(fake_root_dir):
            file_path = os.path.join(fake_root_dir, file)
            yield file_path, 1  # Label 1 for fake
    for file in os.listdir(real_root_dir):
        file_path = os.path.join(real_root_dir, file)
        yield file_path, 0  # Label 0 for real

# Feature extraction for spectrogram
def extract_spectrogram_features(fake_root_dirs, real_root_dir, max_length=250, max_workers=4):
    features = []
    labels = []
    total_files = sum(len(os.listdir(d)) for d in fake_root_dirs) + len(os.listdir(real_root_dir))

    with ProcessPoolExecutor(max_workers=max_workers) as executor:
        results = list(tqdm(executor.map(process_audio_file, 
                                          audio_file_generator(fake_root_dirs, real_root_dir)), 
                            total=total_files, desc='Processing audio files'))

    for spectrogram, label in results:
        if spectrogram is not None:
            features.append(spectrogram)
            labels.append(label)

    del results
    gc.collect()

    return np.array(features), np.array(labels)

# Specify fake and real directories
fake_root_dirs = [
    '/kaggle/input/wavefake-test/generated_audio/ljspeech_melgan',
    '/kaggle/input/wavefake-test/generated_audio/ljspeech_melgan_large',
    '/kaggle/input/wavefake-test/generated_audio/ljspeech_waveglow'
]
real_root_dir = '/kaggle/input/the-lj-speech-dataset/LJSpeech-1.1/wavs'

# Extract features for spectrogram
x_spectrogram, y_spectrogram = extract_spectrogram_features(fake_root_dirs, real_root_dir)

# Flatten the 2D spectrogram arrays for Random Forest
x_spectrogram = x_spectrogram.reshape(x_spectrogram.shape[0], -1)

# Train and evaluate Spectrogram model with Random Forest
xtrain_s, xtest_s, ytrain_s, ytest_s = train_test_split(x_spectrogram, y_spectrogram, test_size=0.3, random_state=42)
model_spectrogram = RandomForestClassifier(n_estimators=100, max_depth=6, random_state=42)  # Use Random Forest
model_spectrogram.fit(xtrain_s, ytrain_s)
ypred_s = model_spectrogram.predict(xtest_s)

# Evaluate the model
accuracy_s = accuracy_score(ytest_s, ypred_s)
print(f"Spectrogram Model Accuracy with Random Forest: {accuracy_s:.4f}")
print(classification_report(ytest_s, ypred_s))


Processing audio files: 100%|██████████| 52400/52400 [04:56<00:00, 176.93it/s]


Spectrogram Model Accuracy with Random Forest: 0.8541
              precision    recall  f1-score   support

           0       1.00      0.41      0.58      3904
           1       0.84      1.00      0.91     11816

    accuracy                           0.85     15720
   macro avg       0.92      0.71      0.75     15720
weighted avg       0.88      0.85      0.83     15720



In [4]:
import joblib  # Ensure joblib is imported

# Save the trained Random Forest model
joblib.dump(model_spectrogram, '/kaggle/working/random.pkl')


['/kaggle/working/random.pkl']