In [19]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import ReduceLROnPlateau
import sklearn.metrics  


In [None]:

def load_and_preprocess_data(train_file, dev_file, test_file):
    train = pd.read_csv(train_file)
    dev = pd.read_csv(dev_file)
    test = pd.read_csv(test_file)
    
    X_train = train.iloc[:, 1:-1].values
    y_train = train['class'].values

    X_dev = dev.iloc[:, 1:-1].values
    y_dev = dev['class'].values

    X_test = test.iloc[:, 1:-1].values
    y_test = test['class'].values

    encoder = LabelEncoder()
    y_train = encoder.fit_transform(y_train)
    y_dev = encoder.transform(y_dev)
    y_test = encoder.transform(y_test)

    X_train = np.expand_dims(X_train, axis=2)
    X_dev = np.expand_dims(X_dev, axis=2)
    X_test = np.expand_dims(X_test, axis=2)
    
    return X_train, y_train, X_dev, y_dev, X_test, y_test

def compute_eer(label, pred, positive_label=1):
    fpr, tpr, threshold = sklearn.metrics.roc_curve(label, pred, pos_label=positive_label)
    fnr = 1 - tpr
    eer_threshold = threshold[np.nanargmin(np.absolute((fnr - fpr)))]
    eer_1 = fpr[np.nanargmin(np.absolute((fnr - fpr)))]
    eer_2 = fnr[np.nanargmin(np.absolute((fnr - fpr)))]
    eer = (eer_1 + eer_2) / 2
    return eer

def build_cnn_model(input_shape, initial_learning_rate=0.001):
    model = Sequential([
        Conv1D(64, kernel_size=3, activation='relu', input_shape=input_shape),
        BatchNormalization(), 
        MaxPooling1D(pool_size=2),
        Dropout(0.2),   
        Conv1D(128, kernel_size=3, activation='relu'),
        BatchNormalization(),  
        MaxPooling1D(pool_size=2),
        Dropout(0.2),
        Flatten(),
        Dense(128, activation='relu'),
        BatchNormalization(),  
        Dropout(0.5),
        Dense(2, activation='softmax')  
    ])
    
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=initial_learning_rate),
        loss='sparse_categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

def train_and_evaluate_cnn(train_file, dev_file, test_file, epochs=10, batch_size=32, initial_learning_rate=0.001, model_save_path="cnn_MMS.h5"):
    X_train, y_train, X_dev, y_dev, X_test, y_test = load_and_preprocess_data(train_file, dev_file, test_file)
    
    model = build_cnn_model(input_shape=(X_train.shape[1], 1), initial_learning_rate=initial_learning_rate)
  
    reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)
    
    model.fit(
        X_train, y_train, 
        validation_data=(X_dev, y_dev), 
        epochs=epochs, 
        batch_size=batch_size,
        callbacks=[reduce_lr]
    )
    
    y_pred_prob = model.predict(X_test)[:, 1]  
    y_pred = np.argmax(model.predict(X_test), axis=1)
    print(classification_report(y_test, y_pred, target_names=['bonafide', 'spoof'], digits=4))

    eer = compute_eer(y_test, y_pred_prob, positive_label=1)
    eer_percentage = eer * 100  
    print(f'Equal Error Rate (EER): {eer_percentage:.20f}%')

    model.save("#")  # SavedModel format
    print("Model saved in TensorFlow's SavedModel format.")

    keras_model_path = "cnn_MMS.keras"
    model.save(keras_model_path)  
    
    return model


In [None]:
train_file = '#'
dev_file = '#'
test_file = '#'

cnn_model = train_and_evaluate_cnn(
    train_file, 
    dev_file, 
    test_file, 
    epochs=10, 
    batch_size=32, 
    initial_learning_rate=0.001 
)


In [1]:
# emofake

In [None]:
import os
import numpy as np
import torch
from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2Model
import torchaudio
from torchaudio.transforms import Resample
file_path = "0019_Angry_000351.wav"  
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("#")
model = Wav2Vec2Model.from_pretrained("#").to(device)

def preprocess_audio(audio_path):
    try:
        waveform, sampling_rate = torchaudio.load(audio_path)

        desired_sampling_rate = 16000
        if sampling_rate != desired_sampling_rate:
            resampler = Resample(sampling_rate, desired_sampling_rate)
            waveform = resampler(waveform)
        if waveform.shape[0] > 1:
            waveform = waveform.mean(dim=0, keepdim=True)
        
        return waveform, desired_sampling_rate
    except Exception as e:
        print(f"Error loading audio file {audio_path}: {e}")
        return None, None

def extract_features(audio_path, feature_extractor, model, device):
    waveform, fs = preprocess_audio(audio_path)
    if waveform is None:
        return None
    inputs = feature_extractor(waveform.squeeze().numpy(), sampling_rate=fs, return_tensors="pt").to(device)
    
    with torch.no_grad():
        outputs = model(**inputs)
        embeddings = outputs.last_hidden_state.cpu().numpy()
    avg_embeddings = np.mean(embeddings.squeeze(), axis=0)
    print(avg_embeddings.shape)
    return avg_embeddings

features = extract_features(file_path, feature_extractor, model, device)
if features is not None:
    print(f"Shape of the extracted features: {features.shape}")
else:
    print("Failed to extract features.")


  from .autonotebook import tqdm as notebook_tqdm
2024-11-18 20:14:30.628289: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-18 20:14:30.632294: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2024-11-18 20:14:30.632314: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


Using device: cuda
(1280,)
Shape of the extracted features: (1280,)


In [None]:
from tensorflow.keras.models import load_model

saved_model_path = "#"

try:
    cnn_model = load_model(saved_model_path)
    print("Model loaded successfully from SavedModel format.")
except Exception as e:
    print(f"Error loading model: {e}")


Model loaded successfully from SavedModel format.


In [None]:
import os
import numpy as np
import torch
from transformers import Wav2Vec2FeatureExtractor, Wav2Vec2Model
import torchaudio
from torchaudio.transforms import Resample
from tensorflow.keras.models import load_model

file_path = "0019_Angry_000351.wav" 
saved_model_path = "#" 
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")
feature_extractor = Wav2Vec2FeatureExtractor.from_pretrained("#")
wav2vec_model = Wav2Vec2Model.from_pretrained("#").to(device)

def preprocess_audio(audio_path):
    try:
        waveform, sampling_rate = torchaudio.load(audio_path)

        desired_sampling_rate = 16000
        if sampling_rate != desired_sampling_rate:
            resampler = Resample(sampling_rate, desired_sampling_rate)
            waveform = resampler(waveform)
        if waveform.shape[0] > 1:  # Convert to mono if stereo
            waveform = waveform.mean(dim=0, keepdim=True)
        
        return waveform, desired_sampling_rate
    except Exception as e:
        print(f"Error loading audio file {audio_path}: {e}")
        return None, None

def extract_features(audio_path, feature_extractor, wav2vec_model, device):
    waveform, fs = preprocess_audio(audio_path)
    if waveform is None:
        return None
    inputs = feature_extractor(waveform.squeeze().numpy(), sampling_rate=fs, return_tensors="pt").to(device)
    
    with torch.no_grad():
        outputs = wav2vec_model(**inputs)
        embeddings = outputs.last_hidden_state.cpu().numpy()
    avg_embeddings = np.mean(embeddings.squeeze(), axis=0)
    print(f"Extracted Embeddings Shape: {avg_embeddings.shape}")
    return avg_embeddings

try:
    cnn_model = load_model(saved_model_path)
    print("TensorFlow model loaded successfully.")
except Exception as e:
    print(f"Error loading TensorFlow model: {e}")
    exit()

def predict_with_cnn(audio_path, cnn_model, feature_extractor, wav2vec_model, device):
    features = extract_features(audio_path, feature_extractor, wav2vec_model, device)
    if features is None:
        print("Failed to extract features.")
        return

    features = np.expand_dims(features, axis=0)  
    features = np.expand_dims(features, axis=2)  
    print(f"Reshaped Features for CNN: {features.shape}")

    predictions = cnn_model.predict(features)
    predicted_class = np.argmax(predictions, axis=1)

    class_names = ["bonafide", "spoof"]
    print(f"Predicted Class: {class_names[predicted_class[0]]}")
    print(f"Class Probabilities: {predictions}")

predict_with_cnn(file_path, cnn_model, feature_extractor, wav2vec_model, device)


Using device: cuda
TensorFlow model loaded successfully.
Extracted Embeddings Shape: (1280,)
Reshaped Features for CNN: (1, 1280, 1)
Predicted Class: bonafide
Class Probabilities: [[9.9999833e-01 1.6508124e-06]]
