In [1]:
notebookName = 'original-audio-deepfake-detection'
runJobId = 'ASVspoof-2019_training'
random_state = 186

In [2]:
import joblib
import numpy as np
import librosa
import tensorflow as tf
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical

import configuration.configuration as configuration
from configuration.configuration import Job
from notebook_utils import notebookToPython
from readers.label_reader import readTrainingLabelsWithJob

In [3]:
config = configuration.ConfigLoader('config.yml')

notebookToPython(notebookName)
job: Job = config.getJobConfig(runJobId)

Write python file
Generating new model name: output/ASVspoof-2019_training_2025-03-27T20-07-45.848482.libjob
Assigned model name: output/ASVspoof-2019_training_2025-03-27T20-07-45.848482.libjob


In [4]:
trainingLabels = readTrainingLabelsWithJob(job)

Loading C:/Users/tubas/workspace/Deepfake/data/ASVspoof-2019/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt...


In [5]:
X = []
y = []


fullDataPath = job.fullJoinFilePath(job.dataPathRoot, job.dataPathSuffix)

for filename, label in trainingLabels.items():
    audioSourceFilename = job.fullJoinFilePath(fullDataPath, filename + job.dataExtension)
    
    audio, _ = librosa.load(audioSourceFilename, sr = job.sampleRate, duration = job.duration)

    mel_spectrogram = librosa.feature.melspectrogram(y = audio, sr = job.sampleRate, n_mels = job.numMels)
    mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)

    if (mel_spectrogram.shape[1] < job.maxTimeSteps):
        padWidth = ((0, 0), (0, job.maxTimeSteps - mel_spectrogram.shape[1]))
        mel_spectrogram = np.pad(array=mel_spectrogram, pad_width=padWidth, mode='constant')
    else:
        mel_spectrogram = mel_spectrogram[:, :job.maxTimeSteps]

    X.append(mel_spectrogram)
    y.append(label)

In [6]:
X = np.array(X)
y = np.array(y)
y_encoded = to_categorical(y, job.numClasses)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=random_state)    # test data is 20% of all data

In [8]:
# Define CNN model architecture
input_shape = (job.numMels, X_train.shape[2], 1)  # Input shape for CNN (height, width, channels)
model_input = Input(shape=input_shape)


In [9]:
# TODO - why were these parameters selected? What purpose do they serve? Should they be configurable?
x = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(model_input)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(units=128, activation='relu')(x)
x = Dropout(0.5)(x)

model_output = Dense(job.numClasses, activation='softmax')(x)

In [10]:
model = Model(inputs=model_input, outputs=model_output)


In [11]:
model.compile(optimizer=job.optimizer, loss=job.loss, metrics=job.metrics)

In [12]:
# Train the Model
model.fit(X_train, y_train, batch_size=job.batchSize, epochs=job.numEpochs, validation_data=(X_test, y_test))

Epoch 1/10
[1m635/635[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 61ms/step - accuracy: 0.8859 - loss: 4.7077 - val_accuracy: 0.8983 - val_loss: 0.2594
Epoch 2/10
[1m635/635[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 62ms/step - accuracy: 0.9203 - loss: 0.1870 - val_accuracy: 0.9060 - val_loss: 0.3390
Epoch 3/10
[1m635/635[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 63ms/step - accuracy: 0.9424 - loss: 0.1407 - val_accuracy: 0.9817 - val_loss: 0.0483
Epoch 4/10
[1m635/635[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 64ms/step - accuracy: 0.9748 - loss: 0.0657 - val_accuracy: 0.9819 - val_loss: 0.0497
Epoch 5/10
[1m635/635[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 64ms/step - accuracy: 0.9723 - loss: 0.0735 - val_accuracy: 0.9641 - val_loss: 0.0907
Epoch 6/10
[1m635/635[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 64ms/step - accuracy: 0.9661 - loss: 0.0832 - val_accuracy: 0.9787 - val_loss: 0.0572
Epoch 7/10
[1m6

<keras.src.callbacks.history.History at 0x1a70766ed80>

In [13]:
joblib.dump(model, job.persistedModel)

['output/ASVspoof-2019_training_2025-03-27T20-07-45.848482.libjob']

In [14]:
y_pred = model.predict(X_test)
y_pred = np.argmax(y_pred, axis=1)
y_test = np.argmax(y_test, axis=1)
y_pred

[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step


array([0, 0, 0, ..., 0, 0, 1])

In [15]:
y_test

array([0, 0, 0, ..., 0, 0, 1])

In [16]:
score = accuracy_score(y_test, y_pred)
score

0.9909377462568952