In [1]:
import sys
import os
import pandas as pd
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent))
from utils.utils import save_experiment, train_and_evaluate_logistic_regression, train_and_evaluate_linear_svm, train_and_evaluate_non_linear_svm, train_and_evaluate_decision_tree, train_and_evaluate_random_forest, train_and_evaluate_xgboost
from configs.config import DATASET_PATH, FEATURES_DIR, ITW_DATASET_PATH, MODELS_PATH


from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.feature_selection import SequentialFeatureSelector
import numpy as np

import matplotlib.pyplot as plt
import joblib


### Parquet paths

In [2]:
train_data_path = os.path.join(FEATURES_DIR, "training_features_mean_20_128_256_128.parquet")
test_data_path = os.path.join(FEATURES_DIR, "testing_features_mean_20_128_256_128.parquet")
itw_data_path = os.path.join(ITW_DATASET_PATH, 'normalized_features', "itw_features_mean_20_128_256_128_trimmed_loudness_normalized.parquet")

#no mel features
train_data_path_no_mel = os.path.join(FEATURES_DIR, "training_features_mean_20_128_256_128_no_mel.parquet")
test_data_path_no_mel = os.path.join(FEATURES_DIR, "testing_features_mean_20_128_256_128_no_mel.parquet")
itw_data_path_no_mel = os.path.join(ITW_DATASET_PATH, 'normalized_features', "itw_features_mean_20_128_256_128_no_mel_trimmed_loudness_normalized.parquet")

### Logistic Regression

In [None]:
#params: {'clf__C': np.float64(2.1544346900318843), 'clf__max_iter': 1000, 'clf__penalty': 'l2', 'clf__solver': 'saga'}


lr_params = {
            "C": np.float64(2.1544346900318843),  # Regularization strength
            "class_weight":  {0: 1, 1: 5},  # Handle imbalanced classes
            "max_iter": 1000,  # Usually enough to converge
            "random_state": 42,
            "solver": "saga",  # Good for small-medium datasets, handles binary classification well
            "penalty": "l2",  # Standard L2 regularization
        }

pipeline, metrics, lr_params, feature_names, metadata_extra = train_and_evaluate_logistic_regression(
    train_data_path, 
    itw_data_path, 
    lr_params)

print(metadata_extra)
print(metrics)

save_experiment(
    model=pipeline,
    metrics=metrics,
    experiment_dir=os.path.join(sys.path[0], "notebooks", "experiments",  "logistic_reg",),
    model_params=lr_params,
    feature_names=feature_names,
    metadata_extra=metadata_extra,
)


FileNotFoundError: [Errno 2] No such file or directory: 'c:\\Users\\konst\\Documents\\GitHub\\audio-deepfake-detection\\in-the-wild-audio-deepfake\\normalized_features\\itw_features_mean_20_128_256_128_trimmed_loudness_normalized.parquet'

### Linear SVM

In [None]:
#params: {'svm__C': 0.01}

svc_params = {
            "C": 0.01,
            "class_weight":  {0: 1, 1: 5},
            "max_iter": 20000,
            "random_state": 42,
        }

pipeline, metrics, svc_params, feature_names, metadata_extra = train_and_evaluate_linear_svm(
    train_data_path, 
    itw_data_path, 
    svc_params)

print(metadata_extra)
print(metrics)

save_experiment(
    model=pipeline,
    metrics=metrics,
    experiment_dir=os.path.join(sys.path[0], "notebooks", "experiments",  "linear_svm",),
    model_params=svc_params,
    feature_names=feature_names,
    metadata_extra=metadata_extra,
)

### RBF SVM

In [None]:
#params: {'svm__C': 0.1, 'svm__gamma': 0.01, 'svm__kernel': 'rbf'}
svm_params = {
            "kernel": "rbf",
            "C": 0.1,
            "gamma":0.01,
            "class_weight": {0: 1, 1: 5},
            "random_state": 42,
        }

pipeline, metrics, svm_params, feature_names, metadata_extra = train_and_evaluate_non_linear_svm(
    train_data_path, 
    itw_data_path, 
    svm_params)

print(metadata_extra)
print(metrics)

save_experiment(
    model=pipeline,
    metrics=metrics,
    experiment_dir=os.path.join(sys.path[0], "notebooks", "experiments",  "rbf_svm",),
    model_params=svm_params,
    feature_names=feature_names,
    metadata_extra=metadata_extra,
)

### Poly SVM

In [None]:
#params: {'svm__C': 1, 'svm__coef0': 0.0, 'svm__degree': 2, 'svm__gamma': 0.01, 'svm__kernel': 'poly'}

svm_params = {
            "kernel": "poly",
            "C": 1,
            "coef0": 0.0,
            "degree":2,
            "gamma":0.01,
            "class_weight": {0: 1, 1: 5},
            "random_state": 42,
        }

pipeline, metrics, svm_params, feature_names, metadata_extra = train_and_evaluate_non_linear_svm(
    train_data_path, 
    itw_data_path, 
    svm_params)

print(metadata_extra)
print(metrics)

save_experiment(
    model=pipeline,
    metrics=metrics,
    experiment_dir=os.path.join(sys.path[0], "notebooks", "experiments",  "poly_svm",),
    model_params=svm_params,
    feature_names=feature_names,
    metadata_extra=metadata_extra,
)

### Sigmoid Kernel

In [None]:
#params: {'svm__C': 0.1, 'svm__coef0': -1.0, 'svm__gamma': 'scale', 'svm__kernel': 'sigmoid'}

svm_params = {
            "kernel": "sigmoid",
            "C": 0.1,
            "coef0": -1.0,
            "gamma":'scale',
            "class_weight": {0: 1, 1: 5},
            "random_state": 42,
        }

pipeline, metrics, svm_params, feature_names, metadata_extra = train_and_evaluate_non_linear_svm(
    train_data_path,
    itw_data_path, 
    svm_params)

print(metadata_extra)
print(metrics)

save_experiment(
    model=pipeline,
    metrics=metrics,
    experiment_dir=os.path.join(sys.path[0], "notebooks", "experiments",  "poly_svm",),
    model_params=svm_params,
    feature_names=feature_names,
    metadata_extra=metadata_extra,
)

### Decission Tree

In [None]:
params= {'class_weight': {0: 1, 1: 5},
         'criterion': 'gini',
         'max_depth': 10,
         'min_samples_leaf': 2,
         'min_samples_split': 2
         }


clf, metrics, dt_params, feature_names, metadata_extra = train_and_evaluate_decision_tree(
    train_path=train_data_path,
    val_path=None,
    test_path=itw_data_path,
    dt_params=params
)

save_experiment(
    model=pipeline,
    metrics=metrics,
    experiment_dir=os.path.join(sys.path[0], "notebooks", "experiments",  "Dtree",),
    model_params=params,
    feature_names=feature_names,
    metadata_extra=metadata_extra,
)
print(metadata_extra)
print(metrics)

### Random Forest

In [None]:
params={'class_weight': {0: 1, 1: 5},
     'max_depth': None,
     'max_features': 'sqrt',
     'min_samples_leaf': 4,
     'n_estimators': 300
     }


pipeline, metrics, rf_params, feature_names, metadata_extra, oob_score = train_and_evaluate_random_forest(
    train_path=train_data_path,
    val_path=None,
    test_path=itw_data_path,
    rf_params=params
)

save_experiment(
    model=pipeline,
    metrics=metrics,
    experiment_dir=os.path.join(sys.path[0], "notebooks", "experiments",  "RF",),
    model_params=rf_params,
    feature_names=feature_names,
    metadata_extra=metadata_extra,
)

print("Metadata:", metadata_extra)
print("Metrics:", metrics)

### XGBoost

#### XGBoost Mel features


In [None]:


params = {
    "max_depth": 4,
    "learning_rate": 0.25,
    "subsample": 0.8,
    "colsample_bytree": 0.8,
    "scale_pos_weight": 2,
    "eval_metric": "aucpr",
    "random_state": 42,
    "objective":"binary:logistic"
}

pipeline, metrics, xgb_params, feature_names, metadata_extra = train_and_evaluate_xgboost(
    train_path=train_data_path_no_mel,
    val_path=test_data_path_no_mel,
    test_path=itw_data_path_no_mel,
    xgb_params=params
)


save_experiment(
    pipeline,
    metrics,
    experiment_dir=os.path.join("experiments", "XGB"),
    model_params=xgb_params,
    feature_names=feature_names,
    metadata_extra=metadata_extra,
)

print("Metadata:", metadata_extra)
print("Metrics:", metrics)

#### XGBoost without mel features

In [None]:
params = {
    "max_depth": 4,
    "learning_rate": 0.25,
    "subsample": 0.8,
    "colsample_bytree": 0.8,
    "scale_pos_weight": 2,
    "eval_metric": "aucpr",
    "random_state": 42,
    #"objective":"binary:logistic"
}

pipeline, metrics, xgb_params, feature_names, metadata_extra = train_and_evaluate_xgboost(
    train_path=train_data_path,
    val_path=test_data_path,
    test_path=itw_data_path,
    xgb_params=params
)


save_experiment(
    pipeline,
    metrics,
    experiment_dir=os.path.join("experiments", "XGB_no_mel"),
    model_params=xgb_params,
    feature_names=feature_names,
    metadata_extra=metadata_extra,
)

print("Metadata:", metadata_extra)
print("Metrics:", metrics)