In [1]:
import joblib
import numpy as np
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.utils import to_categorical

import configuration
from notebook_utils import notebookToPython
from label_reader import readLabels

In [2]:
config = configuration.ConfigLoader('config.yml')

notebookToPython(config.projectName)
job = config.getJobConfig(config.activeJobId)

Write python file


In [3]:
trainingLabels = readLabels(job)

Loading C:/Users/tubas/workspace/Deepfake/data/ASVspoof-2019/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt...


In [4]:
X = []
y = []


fullDataPath = job.fullJoinFilePath(job.dataPath, job.trainingDataPath)

for filename, label in trainingLabels.items():
    audioSourceFilename = job.fullJoinFilePath(fullDataPath, filename + job.trainingDataExtension)
    
    audio, _ = librosa.load(audioSourceFilename, sr = job.sampleRate, duration = job.duration)

    mel_spectrogram = librosa.feature.melspectrogram(y = audio, sr = job.sampleRate, n_mels = job.numMels)
    mel_spectrogram = librosa.power_to_db(mel_spectrogram, ref=np.max)

    if (mel_spectrogram.shape[1] < job.maxTimeSteps):
        padWidth = ((0, 0), (0, job.maxTimeSteps - mel_spectrogram.shape[1]))
        mel_spectrogram = np.pad(array=mel_spectrogram, pad_width=padWidth, mode='constant')
    else:
        mel_spectrogram = mel_spectrogram[:, :job.maxTimeSteps]

    X.append(mel_spectrogram)
    y.append(label)

In [5]:
X = np.array(X)
y = np.array(y)
y_encoded = to_categorical(y, job.numClasses)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2)    # test data is 20% of all data

In [7]:
# Define CNN model architecture
input_shape = (job.numMels, X_train.shape[2], 1)  # Input shape for CNN (height, width, channels)
model_input = Input(shape=input_shape)


In [8]:
# TODO - why were these parameters selected? What purpose do they serve? Should they be configurable?
x = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(model_input)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(x)
x = MaxPooling2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(units=128, activation='relu')(x)
x = Dropout(0.5)(x)

model_output = Dense(job.numClasses, activation='softmax')(x)

In [9]:
model = Model(inputs=model_input, outputs=model_output)


In [10]:
model.compile(optimizer=job.optimizer, loss=job.loss, metrics=job.metrics)

In [11]:
# Train the Model
model.fit(X_train, y_train, batch_size=job.batchSize, epochs=job.numEpochs, validation_data=(X_test, y_test))

Epoch 1/10
[1m635/635[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m43s[0m 66ms/step - accuracy: 0.8861 - loss: 5.4227 - val_accuracy: 0.9102 - val_loss: 0.2121
Epoch 2/10
[1m635/635[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m42s[0m 66ms/step - accuracy: 0.9082 - loss: 0.2225 - val_accuracy: 0.9257 - val_loss: 0.2010
Epoch 3/10
[1m635/635[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 65ms/step - accuracy: 0.9235 - loss: 0.1899 - val_accuracy: 0.9358 - val_loss: 0.1743
Epoch 4/10
[1m635/635[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 62ms/step - accuracy: 0.9356 - loss: 0.1644 - val_accuracy: 0.9468 - val_loss: 0.1527
Epoch 5/10
[1m635/635[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m39s[0m 62ms/step - accuracy: 0.9456 - loss: 0.1325 - val_accuracy: 0.9600 - val_loss: 0.1179
Epoch 6/10
[1m635/635[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 64ms/step - accuracy: 0.9590 - loss: 0.1072 - val_accuracy: 0.9651 - val_loss: 0.1060
Epoch 7/10
[1m6

<keras.src.callbacks.history.History at 0x2228cf4ac30>

In [12]:
joblib.dump(model, job.persistedModel)

['ASVspoof-2019-1.libjob']