In [1]:
import sys
import os
import pandas as pd
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent))
from utils.utils import save_experiment, train_and_evaluate_linear_svm, grid_search_model, evaluate_model_on_parquet
from configs.config_local import DATASET_PATH, ITW_DATASET_PATH, MODELS_PATH

In [2]:
train_data_path = os.path.join(DATASET_PATH, "training_features_40_512_256_128.parquet")
test_data_path = os.path.join(DATASET_PATH, "testing_features_40_512_256_128.parquet")

df = pd.read_parquet(train_data_path)
df_test = pd.read_parquet(test_data_path)
#print(df.head())
print(df_test.head())
print(df_test.shape)
#df.shape()

svc_params = {
            "C": 1000,
            "class_weight": "balanced",
            "max_iter": 20000,
            "random_state": 42
        }
pipeline, metrics, svc_params, feature_names, metadata_extra = train_and_evaluate_linear_svm(train_data_path, test_data_path, svc_params)

  label                                           filename  mfcc_mean_0  \
0  real    file1.wav_16k.wav_norm.wav_mono.wav_silence.wav  -335.984039   
1  real   file10.wav_16k.wav_norm.wav_mono.wav_silence.wav  -335.599396   
2  real  file100.wav_16k.wav_norm.wav_mono.wav_silence.wav  -309.998932   
3  real  file1000.wav_16k.wav_norm.wav_mono.wav_silence...  -329.094482   
4  real  file1001.wav_16k.wav_norm.wav_mono.wav_silence...  -369.833069   

   mfcc_std_0  mfcc_mean_1  mfcc_std_1  mfcc_mean_2  mfcc_std_2  mfcc_mean_3  \
0  106.660751   134.430344   62.437195     9.798609   39.744644    13.036348   
1  115.704865   106.857346   50.405762    12.835211   42.213852    37.456020   
2   72.639381   143.500931   32.701843    16.544865   31.033148    49.439816   
3  119.318130    90.687096   70.433701    12.409759   38.385540    40.940025   
4  129.029114    77.314659   67.398788     2.168967   48.882267    34.795979   

   mfcc_std_3  ...  mel_spectrogram_mean_123  mel_spectrogram_std_12

In [3]:
print(metrics)

{'accuracy': 0.8088044885627967, 'precision': 0.868421052631579, 'recall': 0.7379746835443038, 'f1': 0.8082463892226666, 'roc_auc': 0.8104626067898197}


In [4]:
save_experiment(
    model=pipeline,
    metrics=metrics,
    experiment_dir=os.path.join(sys.path[0], "notebooks", "experiments", "linear_svm"),
    model_params=svc_params,
    feature_names=feature_names,
    metadata_extra=metadata_extra,
)


Experiment saved to: c:\Users\konst\Documents\GitHub\audio-deepfake-detection\notebooks\experiments\linear_svm\exp_20260111_195243


'c:\\Users\\konst\\Documents\\GitHub\\audio-deepfake-detection\\notebooks\\experiments\\linear_svm\\exp_20260111_195243'

In [2]:
from sklearn.pipeline import Pipeline
from sklearn.svm import LinearSVC
from sklearn.preprocessing import StandardScaler
import os
model = Pipeline([
    ("scaler", StandardScaler()),
    ("svm",  LinearSVC(max_iter=20000, random_state=42)),
])

param_grid = {
    "svm__C": [0.1, 1, 10, 100, 1000],  # maybe stop at 100 for RBF
    "svm__class_weight": [None, "balanced"],
}

train_data_path = os.path.join(DATASET_PATH, "training_features_40_512_256_128.parquet")
test_data_path = os.path.join(DATASET_PATH, "testing_features_40_512_256_128.parquet")



best_model, metrics, best_params_, metadata, feature_names = grid_search_model(
    model,
    param_grid,
    train_data_path,
    test_data_path,
    scoring= "f1_macro",
    cv = 5,
    n_jobs= 1,
    verbose = 2,
)

print(best_params_)
print(metrics)


save_experiment(
    model=best_model,
    metrics=metrics,
    experiment_dir=os.path.join(sys.path[0], "notebooks", "experiments",  "linear_svm",),
    model_params=best_params_,
    feature_names=feature_names,
    metadata_extra=metadata,
)

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END .................svm__C=0.1, svm__class_weight=None; total time=   8.9s
[CV] END .................svm__C=0.1, svm__class_weight=None; total time=   5.9s
[CV] END .................svm__C=0.1, svm__class_weight=None; total time=   4.5s
[CV] END .................svm__C=0.1, svm__class_weight=None; total time=   9.0s
[CV] END .................svm__C=0.1, svm__class_weight=None; total time=   8.9s
[CV] END .............svm__C=0.1, svm__class_weight=balanced; total time=   7.2s
[CV] END .............svm__C=0.1, svm__class_weight=balanced; total time=   7.5s
[CV] END .............svm__C=0.1, svm__class_weight=balanced; total time=   6.7s
[CV] END .............svm__C=0.1, svm__class_weight=balanced; total time=   7.1s
[CV] END .............svm__C=0.1, svm__class_weight=balanced; total time=   6.0s
[CV] END ...................svm__C=1, svm__class_weight=None; total time=   8.4s
[CV] END ...................svm__C=1, svm__class

'c:\\Users\\konst\\Documents\\GitHub\\audio-deepfake-detection\\notebooks\\experiments\\linear_svm\\exp_20260126_182448'

In [3]:
itw_test_data_path = os.path.join(ITW_DATASET_PATH, 'normalized_features',"itw_features_40_2048_512_128_trimmed.parquet")
metrics, metadata_extra = evaluate_model_on_parquet(best_model, itw_test_data_path)
print(metrics)

{'accuracy': 0.6677989777453253, 'precision': 0.575513954289652, 'recall': 0.42541811698785975, 'f1': 0.6215349395831098}
