In [32]:
import pandas as pd

import os
import sys

import numpy as np
import tensorflow as tf

tf.random.set_seed(42)
np.random.seed(42)

Dataset =  "D:\model\stegano\dataset\for-2seconds"

  Dataset =  "D:\model\stegano\dataset\for-2seconds"


In [36]:
import librosa
from pydub import AudioSegment

def preprocess_data(dataset_path):
    X = []
    y = []

    for variant in os.listdir(dataset_path):
        variant_path = os.path.join(dataset_path, variant)
        for classesz in os.listdir(variant_path):
            classes_path = os.path.join(variant_path, classesz)
            for audio in os.listdir(classes_path):
                audio_path = os.path.join(classes_path, audio)
                
                try:
                    raw_audio = AudioSegment.from_file(audio_path)
                    
                    samples = np.array(raw_audio.get_array_of_samples(), dtype='float32')
                    trimmed, _ = librosa.effects.trim(samples, top_db=25)
                    padding = max(0, 50000 - len(trimmed))
                    padded = np.pad(trimmed, (0, padding), 'constant')

                    X.append(padded)
                    y.append(classesz)
                                        
                except Exception as e:
                    print(f"Processing error {audio_path}: {e}")
    
    return X, y

Dataset = r"D:\model\stegano\dataset\for-2seconds"
X, y = preprocess_data(Dataset)

In [37]:
def encode_labels(y):
    encoded_labels = []
    for label in y:
        if label == 'real':
            encoded_labels.append(1)
        elif label == 'fake':
            encoded_labels.append(0)
    return encoded_labels

y_encoded = encode_labels(y)

In [38]:
X = np.array(X)
y = np.array(y_encoded)

In [39]:
def extract_features(X):
    zcr_list = []
    rms_list = []
    mfccs_list = []

    FRAME_LENGTH = 2048
    HOP_LENGTH = 512

    for audio in X:
        zcr = librosa.feature.zero_crossing_rate(audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
        rms = librosa.feature.rms(y=audio, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
        mfccs = librosa.feature.mfcc(y=audio, sr=44100, n_mfcc=13, hop_length=HOP_LENGTH)

        zcr_list.append(zcr)
        rms_list.append(rms)
        mfccs_list.append(mfccs)

    return zcr_list, rms_list, mfccs_list

zcr_features, rms_features, mfccs_features = extract_features(X)

In [40]:
def combine_features(zcr_list, rms_list, mfccs_list):
    zcr_features = np.swapaxes(zcr_list, 1, 2)
    rms_features = np.swapaxes(rms_list, 1, 2)
    mfccs_features = np.swapaxes(mfccs_list, 1, 2)

    X_features = np.concatenate((zcr_features, rms_features, mfccs_features), axis=2)

    return X_features

X_features = combine_features(zcr_features, rms_features, mfccs_features)


In [56]:
from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau


def train_model(X_features, y):
    X_train, X_to_split, y_train, y_to_split = train_test_split(X_features, y, test_size=0.3, random_state=1)
    X_val, X_test, y_val, y_test = train_test_split(X_to_split, y_to_split, test_size=0.4, random_state=1)

    y_train_class = to_categorical(y_train, 2)
    y_val_class = to_categorical(y_val, 2)

    model = Sequential()
    model.add(LSTM(64, return_sequences=True, input_shape=(X_features.shape[1:3])))
    model.add(LSTM(64))
    model.add(Dense(2, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='RMSProp', metrics=['categorical_accuracy'])

    early_stopping = EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='min', restore_best_weights=True)
    checkpoint = ModelCheckpoint('CustomModel.keras', monitor='val_loss', verbose=1, save_best_only=True, mode='min')
    rlrop = ReduceLROnPlateau(monitor='val_categorical_accuracy', factor=0.02, patience=100)

    history = model.fit(X_train, y_train_class, epochs=70, batch_size=6, validation_data=(X_val, y_val_class), callbacks=[rlrop, early_stopping, checkpoint])

    return model, history, X_test, y_test

model, history, X_test, y_test = train_model(X_features, y)

Epoch 1/70


  super().__init__(**kwargs)


[1m2084/2085[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 45ms/step - categorical_accuracy: 0.7103 - loss: 0.5724
Epoch 1: val_loss improved from inf to 0.43492, saving model to CustomModel.keras
[1m2085/2085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m101s[0m 47ms/step - categorical_accuracy: 0.7103 - loss: 0.5724 - val_categorical_accuracy: 0.8063 - val_loss: 0.4349 - learning_rate: 0.0010
Epoch 2/70
[1m2085/2085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 45ms/step - categorical_accuracy: 0.7836 - loss: 0.4727
Epoch 2: val_loss improved from 0.43492 to 0.40549, saving model to CustomModel.keras
[1m2085/2085[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 47ms/step - categorical_accuracy: 0.7836 - loss: 0.4727 - val_categorical_accuracy: 0.8119 - val_loss: 0.4055 - learning_rate: 0.0010
Epoch 3/70
[1m2084/2085[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 45ms/step - categorical_accuracy: 0.8170 - loss: 0.4211
Epoch 3: val_loss improved from

In [57]:
def test_model(model, X_test, y_test):
    y_test_class = to_categorical(y_test, 2)
    test_loss, test_accuracy = model.evaluate(X_test, y_test_class, verbose=0)
    
    print(f'Test Loss: {test_loss}')
    print(f'Test Accuracy: {test_accuracy}')

test_model(model, X_test, y_test)

Test Loss: 0.1674717366695404
Test Accuracy: 0.9310023188591003


In [58]:
model.save('deepfakedetectioncyber.keras')

In [59]:
import numpy as np
import librosa
from pydub import AudioSegment
import os
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report

def test_existing_model(model, audio_path, is_directory=False):
    """
    Test the existing trained model with new audio file(s).
    
    Parameters:
    - model: The trained model object (already in memory)
    - audio_path: Path to a single audio file or directory of audio files
    - is_directory: Boolean indicating if audio_path is a directory
    
    Returns:
    - Results of the predictions
    """
    if is_directory:
        return batch_test_existing_model(model, audio_path)
    else:
        return test_single_audio_existing_model(model, audio_path)

def preprocess_single_audio(audio_path):
    """
    Preprocess a single audio file the same way as in training.
    
    Parameters:
    - audio_path: Path to the audio file
    
    Returns:
    - Feature vector ready for model input
    """
    try:
        # Load and preprocess audio
        raw_audio = AudioSegment.from_file(audio_path)
        samples = np.array(raw_audio.get_array_of_samples(), dtype='float32')
        trimmed, _ = librosa.effects.trim(samples, top_db=25)
        padding = max(0, 50000 - len(trimmed))
        padded = np.pad(trimmed, (0, padding), 'constant')
        
        # Extract features
        FRAME_LENGTH = 2048
        HOP_LENGTH = 512
        
        # Extract ZCR
        zcr = librosa.feature.zero_crossing_rate(padded, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
        zcr = np.expand_dims(zcr, axis=0)
        
        # Extract RMS
        rms = librosa.feature.rms(y=padded, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
        rms = np.expand_dims(rms, axis=0)
        
        # Extract MFCCs
        mfccs = librosa.feature.mfcc(y=padded, sr=44100, n_mfcc=13, hop_length=HOP_LENGTH)
        mfccs = np.expand_dims(mfccs, axis=0)
        
        # Combine features
        zcr_features = np.swapaxes(zcr, 1, 2)
        rms_features = np.swapaxes(rms, 1, 2)
        mfccs_features = np.swapaxes(mfccs, 1, 2)
        
        X_features = np.concatenate((zcr_features, rms_features, mfccs_features), axis=2)
        
        return X_features
        
    except Exception as e:
        print(f"Error processing audio {audio_path}: {e}")
        return None

def test_single_audio_existing_model(model, audio_path):
    """
    Test a single audio file with the existing model.
    
    Parameters:
    - model: The trained model object
    - audio_path: Path to the audio file
    
    Returns:
    - predicted_class: 'real' or 'fake'
    - confidence: Confidence score
    """
    X_features = preprocess_single_audio(audio_path)
    
    if X_features is None:
        return None, None
    
    # Make prediction
    prediction = model.predict(X_features, verbose=0)[0]
    
    # Get class and confidence
    predicted_class_idx = np.argmax(prediction)
    confidence = prediction[predicted_class_idx]
    
    # Map class index to label
    predicted_class = 'real' if predicted_class_idx == 1 else 'fake'
    
    print(f"Audio: {os.path.basename(audio_path)}")
    print(f"Prediction: {predicted_class}")
    print(f"Confidence: {confidence:.4f}")
    
    return predicted_class, confidence

def batch_test_existing_model(model, directory_path):
    """
    Test a directory of audio files with the existing model.
    
    Parameters:
    - model: The trained model object
    - directory_path: Path to the directory containing audio files
    
    Returns:
    - results: List of dictionaries with prediction results
    """
    results = []
    true_labels = []
    predicted_labels = []
    
    # Check if the directory has real/fake subdirectories for evaluation
    has_ground_truth = False
    if 'real' in os.listdir(directory_path) and 'fake' in os.listdir(directory_path):
        has_ground_truth = True
    
    if has_ground_truth:
        # Process files with known ground truth
        for class_name in ['real', 'fake']:
            class_dir = os.path.join(directory_path, class_name)
            for file in os.listdir(class_dir):
                if file.endswith(('.wav', '.mp3', '.m4a', '.flac')):
                    audio_path = os.path.join(class_dir, file)
                    predicted_class, confidence = test_single_audio_existing_model(model, audio_path)
                    
                    if predicted_class is not None:
                        results.append({
                            'file': file,
                            'true_class': class_name,
                            'prediction': predicted_class,
                            'confidence': float(confidence),
                            'correct': class_name == predicted_class
                        })
                        
                        # For confusion matrix
                        true_labels.append(1 if class_name == 'real' else 0)
                        predicted_labels.append(1 if predicted_class == 'real' else 0)
    else:
        # Process files without known ground truth
        for file in os.listdir(directory_path):
            if file.endswith(('.wav', '.mp3', '.m4a', '.flac')):
                audio_path = os.path.join(directory_path, file)
                predicted_class, confidence = test_single_audio_existing_model(model, audio_path)
                
                if predicted_class is not None:
                    results.append({
                        'file': file,
                        'prediction': predicted_class,
                        'confidence': float(confidence)
                    })
    
    # Print summary statistics
    print("\nSummary:")
    print(f"Total files analyzed: {len(results)}")
    
    real_preds = [r for r in results if r['prediction'] == 'real']
    fake_preds = [r for r in results if r['prediction'] == 'fake']
    print(f"Predicted as real: {len(real_preds)} ({len(real_preds)/len(results)*100:.1f}%)")
    print(f"Predicted as fake: {len(fake_preds)} ({len(fake_preds)/len(results)*100:.1f}%)")
    print(f"Average confidence: {np.mean([r['confidence'] for r in results]):.4f}")
    
    # If we have ground truth, calculate accuracy metrics
    if has_ground_truth:
        correct_count = sum(1 for r in results if r.get('correct', False))
        accuracy = correct_count / len(results)
        print(f"Accuracy: {accuracy:.4f} ({correct_count}/{len(results)})")
        
        # Display confusion matrix
        cm = confusion_matrix(true_labels, predicted_labels)
        print("\nConfusion Matrix:")
        print(cm)
        
        # Display classification report
        target_names = ['fake', 'real']
        print("\nClassification Report:")
        print(classification_report(true_labels, predicted_labels, target_names=target_names))
        
        # Plot confusion matrix
        plt.figure(figsize=(8, 6))
        plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
        plt.title('Confusion Matrix')
        plt.colorbar()
        tick_marks = np.arange(2)
        plt.xticks(tick_marks, target_names)
        plt.yticks(tick_marks, target_names)
        
        # Add text annotations to the confusion matrix
        thresh = cm.max() / 2
        for i in range(cm.shape[0]):
            for j in range(cm.shape[1]):
                plt.text(j, i, format(cm[i, j], 'd'),
                        ha="center", va="center",
                        color="white" if cm[i, j] > thresh else "black")
        
        plt.ylabel('True label')
        plt.xlabel('Predicted label')
        plt.tight_layout()
        plt.show()
    
    return results



test_existing_model(model, r"dataset/for-norm/training/fake/file66.mp3.wav_16k.wav_norm.wav_mono.wav_silence.wav", is_directory=False)

# Directory test
# test_existing_model(model, "path/to/test_directory", is_directory=True)

Audio: file66.mp3.wav_16k.wav_norm.wav_mono.wav_silence.wav
Prediction: fake
Confidence: 0.9979


('fake', np.float32(0.9978618))

In [4]:
import numpy as np
import librosa
from pydub import AudioSegment
import os
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report
from tensorflow.keras.models import load_model

def load_saved_model(model_path):
    """
    Load a saved Keras model from disk.
    
    Parameters:
    - model_path: Path to the .keras model file
    
    Returns:
    - The loaded model object
    """
    try:
        model = load_model(model_path)
        print(f"Model successfully loaded from {model_path}")
        return model
    except Exception as e:
        print(f"Error loading model: {e}")
        return None

def preprocess_single_audio(audio_path):
    """
    Preprocess a single audio file the same way as in training.
    
    Parameters:
    - audio_path: Path to the audio file
    
    Returns:
    - Feature vector ready for model input
    """
    try:
        # Load and preprocess audio
        raw_audio = AudioSegment.from_file(audio_path)
        samples = np.array(raw_audio.get_array_of_samples(), dtype='float32')
        trimmed, _ = librosa.effects.trim(samples, top_db=25)
        padding = max(0, 50000 - len(trimmed))
        padded = np.pad(trimmed, (0, padding), 'constant')
        
        # Extract features
        FRAME_LENGTH = 2048
        HOP_LENGTH = 512
        
        # Extract ZCR
        zcr = librosa.feature.zero_crossing_rate(padded, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
        zcr = np.expand_dims(zcr, axis=0)
        
        # Extract RMS
        rms = librosa.feature.rms(y=padded, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
        rms = np.expand_dims(rms, axis=0)
        
        # Extract MFCCs
        mfccs = librosa.feature.mfcc(y=padded, sr=44100, n_mfcc=13, hop_length=HOP_LENGTH)
        mfccs = np.expand_dims(mfccs, axis=0)
        
        # Combine features
        zcr_features = np.swapaxes(zcr, 1, 2)
        rms_features = np.swapaxes(rms, 1, 2)
        mfccs_features = np.swapaxes(mfccs, 1, 2)
        
        X_features = np.concatenate((zcr_features, rms_features, mfccs_features), axis=2)
        
        return X_features
        
    except Exception as e:
        print(f"Error processing audio {audio_path}: {e}")
        return None

def test_single_audio(model, audio_path):
    """
    Test a single audio file with the model.
    
    Parameters:
    - model: The loaded model object
    - audio_path: Path to the audio file
    
    Returns:
    - predicted_class: 'real' or 'fake'
    - confidence: Confidence score
    """
    X_features = preprocess_single_audio(audio_path)
    
    if X_features is None:
        return None, None
    
    # Make prediction
    prediction = model.predict(X_features, verbose=0)[0]
    
    # Get class and confidence
    predicted_class_idx = np.argmax(prediction)
    confidence = prediction[predicted_class_idx]
    
    # Map class index to label
    predicted_class = 'real' if predicted_class_idx == 1 else 'fake'
    
    print(f"Audio: {os.path.basename(audio_path)}")
    print(f"Prediction: {predicted_class}")
    print(f"Confidence: {confidence:.4f}")
    
    return predicted_class, confidence

def batch_test(model, directory_path):
    """
    Test a directory of audio files with the model.
    
    Parameters:
    - model: The loaded model object
    - directory_path: Path to the directory containing audio files
    
    Returns:
    - results: List of dictionaries with prediction results
    """
    results = []
    true_labels = []
    predicted_labels = []
    
    # Check if the directory has real/fake subdirectories for evaluation
    has_ground_truth = False
    if os.path.isdir(os.path.join(directory_path, 'real')) and os.path.isdir(os.path.join(directory_path, 'fake')):
        has_ground_truth = True
    
    if has_ground_truth:
        # Process files with known ground truth
        for class_name in ['real', 'fake']:
            class_dir = os.path.join(directory_path, class_name)
            for file in os.listdir(class_dir):
                if file.endswith(('.wav', '.mp3', '.m4a', '.flac')):
                    audio_path = os.path.join(class_dir, file)
                    predicted_class, confidence = test_single_audio(model, audio_path)
                    
                    if predicted_class is not None:
                        results.append({
                            'file': file,
                            'true_class': class_name,
                            'prediction': predicted_class,
                            'confidence': float(confidence),
                            'correct': class_name == predicted_class
                        })
                        
                        # For confusion matrix
                        true_labels.append(1 if class_name == 'real' else 0)
                        predicted_labels.append(1 if predicted_class == 'real' else 0)
    else:
        # Process files without known ground truth
        for file in os.listdir(directory_path):
            if file.endswith(('.wav', '.mp3', '.m4a', '.flac')):
                audio_path = os.path.join(directory_path, file)
                predicted_class, confidence = test_single_audio(model, audio_path)
                
                if predicted_class is not None:
                    results.append({
                        'file': file,
                        'prediction': predicted_class,
                        'confidence': float(confidence)
                    })
    
    # Print summary statistics
    print("\nSummary:")
    print(f"Total files analyzed: {len(results)}")
    
    real_preds = [r for r in results if r['prediction'] == 'real']
    fake_preds = [r for r in results if r['prediction'] == 'fake']
    print(f"Predicted as real: {len(real_preds)} ({len(real_preds)/len(results)*100:.1f}%)")
    print(f"Predicted as fake: {len(fake_preds)} ({len(fake_preds)/len(results)*100:.1f}%)")
    print(f"Average confidence: {np.mean([r['confidence'] for r in results]):.4f}")
    
    # If we have ground truth, calculate accuracy metrics
    if has_ground_truth:
        correct_count = sum(1 for r in results if r.get('correct', False))
        accuracy = correct_count / len(results)
        print(f"Accuracy: {accuracy:.4f} ({correct_count}/{len(results)})")
        
        # Display confusion matrix
        cm = confusion_matrix(true_labels, predicted_labels)
        print("\nConfusion Matrix:")
        print(cm)
        
        # Display classification report
        target_names = ['fake', 'real']
        print("\nClassification Report:")
        print(classification_report(true_labels, predicted_labels, target_names=target_names))
        
        # Plot confusion matrix
        plt.figure(figsize=(8, 6))
        plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
        plt.title('Confusion Matrix')
        plt.colorbar()
        tick_marks = np.arange(2)
        plt.xticks(tick_marks, target_names)
        plt.yticks(tick_marks, target_names)
        
        # Add text annotations to the confusion matrix
        thresh = cm.max() / 2
        for i in range(cm.shape[0]):
            for j in range(cm.shape[1]):
                plt.text(j, i, format(cm[i, j], 'd'),
                        ha="center", va="center",
                        color="white" if cm[i, j] > thresh else "black")
        
        plt.ylabel('True label')
        plt.xlabel('Predicted label')
        plt.tight_layout()
        plt.show()
    
    return results

def test_saved_model(model_path, audio_path, is_directory=False):
 
    model = load_saved_model(model_path)
    
    if model is None:
        print("Failed to load model. Exiting.")
        return None
    
    # Test the model
    if is_directory:
        return batch_test(model, audio_path)
    else:
        return test_single_audio(model, audio_path)


# Example usage:
if __name__ == "__main__":

    MODEL_PATH = "deepfakedetectioncyber.keras"
   
    print("\n=== Testing Single File ===")
    result = test_saved_model(MODEL_PATH, r"C:\Users\chawk\Downloads\file82.mp3.wav_16k.wav_norm.wav_mono.wav_silence.wav", is_directory=False)
    
    # Test a directory
    # print("\n=== Testing Directory ===")
    # results = test_saved_model(MODEL_PATH, "path/to/test_directory", is_directory=True)


=== Testing Single File ===
Model successfully loaded from deepfakedetectioncyber.keras
Audio: file82.mp3.wav_16k.wav_norm.wav_mono.wav_silence.wav
Prediction: fake
Confidence: 0.9427
