In [1]:
import json
import pytz
import joblib
import numpy as np
from datetime import datetime
from sklearn.model_selection import train_test_split

import configuration.configuration as configuration
import model_definitions.model_cnn_definition as model_cnn_definition
from mel_spectrogram.mel_spectrogram import MelSpectrogramGenerator
from notebook_utils import notebookToPython

In [15]:
config = configuration.ConfigLoader('config.yml')

notebookToPython(config.projectName)
job = config.getJobConfig(config.activeJobId)

Write python file
Using configured model name: ASVspoof-2019-1_2025-03-16T21-46-18.789612.libjob
Assigned model name: ASVspoof-2019-1_2025-03-16T21-46-18.789612.libjob


In [3]:
generator = MelSpectrogramGenerator()
X, y_encoded = generator.generateMelSpectrograms(job, job.dataPathSuffix)

Loading C:/Users/tubas/workspace/Deepfake/data/ASVspoof-2019/LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt...
fullDataPath: C:/Users/tubas/workspace/Deepfake/data/ASVspoof-2019/LA/ASVspoof2019_LA_train/flac
Loading audio files: 1269
Loading audio files: 2538
Loading audio files: 3807
Loading audio files: 5076
Loading audio files: 6345
Loading audio files: 7614
Loading audio files: 8883
Loading audio files: 10152
Loading audio files: 11421
Loading audio files: 12690
Loading audio files: 13959
Loading audio files: 15228
Loading audio files: 16497
Loading audio files: 17766
Loading audio files: 19035
Loading audio files: 20304
Loading audio files: 21573
Loading audio files: 22842
Loading audio files: 24111
Loading audio files: 25380
Number of audio files load: 25380


In [None]:
if (job.newModelGenerated):
    print("Selecting training and test data")
    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2)    # test data is 20% of all data
else:
    print("Assigning all data for evaluation")
    y_train = None
    X_train = None
    X_test = X
    y_test = y_encoded
    job.persistedModelResults = job.newPersistedModelResultsName(job.persistedModel, True)

Assigning all data for evaluation


In [5]:
if (job.newModelGenerated):
    modelDef = model_cnn_definition.ModelCnnDefinition(job, X_train.shape[2], 1)
    model = modelDef.buildModel()

In [6]:
if (job.newModelGenerated):
    model.compile(optimizer=job.optimizer, loss=job.loss, metrics=job.metrics)

In [7]:
if (job.newModelGenerated):
    print("Training the Model...")
    model.fit(X_train, y_train, batch_size=job.batchSize, epochs=job.numEpochs, validation_data=(X_test, y_test))

In [8]:
if (job.newModelGenerated):
    print(f"Saving model: {job.persistedModel}")
    joblib.dump(model, job.persistedModel)
else:
    print(f"Loading model: {job.persistedModel}")
    model = joblib.load(job.persistedModel)

Loading model: ASVspoof-2019-1_2025-03-16T21-46-18.789612.libjob


### Test Model

In [9]:
y_pred = model.predict(X_test)
y_pred_work = np.argmax(y_pred, axis=1)
y_test_work = np.argmax(y_test, axis=1)
y_pred_work

[1m794/794[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 20ms/step


array([1, 1, 1, ..., 0, 0, 0])

In [10]:
y_test_work

array([1, 1, 1, ..., 0, 0, 0])

In [11]:
from sklearn.metrics import accuracy_score

score = accuracy_score(y_test_work, y_pred_work)

timestamp_utc = datetime.now(pytz.utc)

In [12]:
prettyJson = json.dumps(job.__dict__, indent=4)

report = f"job completed: {timestamp_utc.isoformat()}\n"
report = report + f"model file: {job.persistedModel}\n"
report = report + f"accuracy_score: {score}\n\n"
report = report + f"job: {prettyJson}\n"

print(report)

with open(job.persistedModelResults, "w") as file:
    file.write(report)

job completed: 2025-03-18T03:04:39.242912+00:00
model file: ASVspoof-2019-1_2025-03-16T21-46-18.789612.libjob
accuracy_score: 0.9908589440504334

job: {
    "jobId": "ASVspoof-2019_test-1",
    "dataPathRootRaw": "$HOMEDRIVE$HOMEPATH/workspace/Deepfake/data/ASVspoof-2019",
    "dataPathRoot": "C:/Users/tubas/workspace/Deepfake/data/ASVspoof-2019",
    "dataPathSuffix": "LA/ASVspoof2019_LA_train/flac",
    "dataExtension": ".flac",
    "labelFilename": "LA/ASVspoof2019_LA_cm_protocols/ASVspoof2019.LA.cm.train.trn.txt",
    "executeToCategoricalForLabels": true,
    "numClasses": 2,
    "sampleRate": 16000,
    "duration": 5,
    "numMels": 128,
    "maxTimeSteps": 109,
    "optimizer": "adam",
    "loss": "categorical_crossentropy",
    "metrics": [
        "accuracy"
    ],
    "batchSize": 32,
    "numEpochs": 10,
    "newModelGenerated": false,
    "persistedModel": "ASVspoof-2019-1_2025-03-16T21-46-18.789612.libjob",
    "persistedModelResults": "ASVspoof-2019-1_2025-03-16T21-46-18.