In [1]:
import sys 
import os
import pandas as pd
from pathlib import Path
sys.path.insert(0, str(Path.cwd().parent))
from utils.utils import save_experiment, evaluate_model_on_parquet, grid_search_model, train_and_evaluate_non_linear_svm
from configs.config_local import FEATURES_DIR, ITW_DATASET_PATH, MODELS_PATH
import joblib


In [None]:
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler

model = Pipeline([
    ("scaler", StandardScaler()),
    ("svm", SVC()),
])

param_grid = {
    "svm__kernel": ["rbf"],
    "svm__C": [0.1, 1, 10, 100],  # maybe stop at 100 for RBF
    "svm__gamma": ["scale", 0.01, 0.001],
    "svm__class_weight": [None, "balanced",{0: 1, 1: 5}],
}

train_data_path = os.path.join(FEATURES_DIR, "training_features_mean_20_128_256_128.parquet")
test_data_path = os.path.join(FEATURES_DIR, "testing_features_mean_20_128_256_128.parquet")



best_model, metrics, best_params_, metadata, feature_names = grid_search_model(
    model,
    param_grid,
    train_data_path,
    test_data_path,
    scoring= "f1_macro",
    cv = 5,
    n_jobs= 1,
    verbose = 2,
)

print(best_params_)
print(metrics)


save_experiment(
    model=best_model,
    metrics=metrics,
    experiment_dir=os.path.join(sys.path[0], "notebooks", "experiments",  "rbf_svm",),
    model_params=best_params_,
    feature_names=feature_names,
    metadata_extra=metadata,
)

Fitting 5 folds for each of 36 candidates, totalling 180 fits


### RBF Kernel

In [2]:
train_data_path = os.path.join(FEATURES_DIR, "training_features_mean_20_128_256_128.parquet")
test_data_path = os.path.join(FEATURES_DIR, "testing_features_mean_20_128_256_128.parquet")

df = pd.read_parquet(train_data_path)
df_test = pd.read_parquet(test_data_path)
print(df.head())
print(df_test.head())
print(df_test.shape)
#df.shape()

svc_params = {
            "kernel": "rbf",
            "C": 1.0,
            "class_weight": {0: 1, 1: 5},
            "max_iter": 20000,
            "random_state": 42,
        }
pipeline, metrics, svc_params, feature_names, metadata_extra = train_and_evaluate_non_linear_svm(train_data_path, test_data_path, svc_params)
print(metrics)

  label                                           filename  mfcc_mean_0  \
0  real    file1.wav_16k.wav_norm.wav_mono.wav_silence.wav  -622.954895   
1  real   file10.wav_16k.wav_norm.wav_mono.wav_silence.wav  -579.459290   
2  real  file100.wav_16k.wav_norm.wav_mono.wav_silence.wav  -666.195618   
3  real  file1000.wav_16k.wav_norm.wav_mono.wav_silence...  -658.100586   
4  real  file10000.wav_16k.wav_norm.wav_mono.wav_silenc...  -603.779663   

   mfcc_mean_1  mfcc_mean_2  mfcc_mean_3  mfcc_mean_4  mfcc_mean_5  \
0    -1.842725   -22.277306    27.215727   -12.594766   -10.986565   
1    -8.681670   -14.242689    46.426991   -28.671131    15.031695   
2    -4.181276    10.106292    28.408127    -8.293526    -0.839287   
3    -0.066092    -6.204688    22.124472   -18.052919    -6.783300   
4     0.579489   -15.271199    20.162930   -10.814433    -7.932480   

   mfcc_mean_6  mfcc_mean_7  ...  mel_spectrogram_mean_118  \
0   -12.324221   -12.312344  ...                  0.005954   
1   

Test Best Model to ITW

In [3]:
itw_test_data_path = os.path.join(ITW_DATASET_PATH, 'normalized_features',"itw_features_mean_20_128_256_128_trimmed_loudness_normalized.parquet")

In [4]:

metrics, metadata_extra = evaluate_model_on_parquet(pipeline, itw_test_data_path)
print(metrics)

{'accuracy': 0.7092558523123771, 'precision': 0.664124046517444, 'recall': 0.4504282927656687, 'f1_macro': 0.6624639587542147, 'roc_auc': 0.7872281308888013}


### Polynomial Kernel

In [6]:
train_data_path = os.path.join(FEATURES_DIR, "training_features_no_std_20_128_256_128.parquet")
test_data_path = os.path.join(FEATURES_DIR, "testing_features_no_std_20_128_256_128.parquet")

df = pd.read_parquet(train_data_path)
df_test = pd.read_parquet(test_data_path)
#print(df.head())
print(df_test.head())
print(df_test.shape)
#df.shape()

svc_params = {
            "kernel": "poly",
            "C": 1.0,
            "degree": 2,
            "coef0": 0.0,
            "class_weight": {0: 1, 1: 5},
            "max_iter": 20000,
            "random_state": 42,
        }
pipeline, metrics, svc_params, feature_names, metadata_extra = train_and_evaluate_non_linear_svm(train_data_path, test_data_path, svc_params)
print(metrics)

  label                                           filename  mfcc_mean_0  \
0  real    file1.wav_16k.wav_norm.wav_mono.wav_silence.wav  -611.930420   
1  real   file10.wav_16k.wav_norm.wav_mono.wav_silence.wav  -605.008606   
2  real  file100.wav_16k.wav_norm.wav_mono.wav_silence.wav  -597.774414   
3  real  file1000.wav_16k.wav_norm.wav_mono.wav_silence...  -601.991211   
4  real  file1001.wav_16k.wav_norm.wav_mono.wav_silence...  -624.717590   

   mfcc_mean_1  mfcc_mean_2  mfcc_mean_3  mfcc_mean_4  mfcc_mean_5  \
0    12.266665   -12.647933    22.397125   -18.097271     6.636819   
1    -0.973098   -12.821069    38.964752   -17.512388    -5.345759   
2     7.211399   -16.026072    50.700493   -41.957108    -4.920807   
3   -23.718582    -9.058393    42.911995   -23.795502    -0.905054   
4   -23.051550   -12.977422    38.695904   -18.944435    -0.490484   

   mfcc_mean_6  mfcc_mean_7  ...  mel_spectrogram_mean_118  \
0   -13.808917     1.950459  ...                  0.104165   
1   



{'accuracy': 0.7339231765213639, 'precision': 0.7535173386197171, 'recall': 0.7369792831477091, 'f1': 0.7302465503138975, 'roc_auc': 0.8031705207914002}


In [9]:
itw_test_data_path = os.path.join(ITW_DATASET_PATH, 'normalized_features',"itw_features_mean_20_128_256_128_trimmed_loudness_normalized.parquet")

In [10]:

metrics, metadata_extra = evaluate_model_on_parquet(pipeline, itw_test_data_path)
print(metrics)

{'accuracy': 0.6624690731459747, 'precision': 0.5684686830197666, 'recall': 0.40488508184208294, 'f1_macro': 0.6123373661164553, 'roc_auc': 0.6722254332633197}


### Sigmoid Kernel

In [11]:
train_data_path = os.path.join(FEATURES_DIR, "training_features_no_std_20_128_256_128.parquet")
test_data_path = os.path.join(FEATURES_DIR, "testing_features_no_std_20_128_256_128.parquet")

df = pd.read_parquet(train_data_path)
df_test = pd.read_parquet(test_data_path)
#print(df.head())
print(df_test.head())
print(df_test.shape)
#df.shape()

svc_params = {
            "kernel": "sigmoid",
            "C": 1.0,
            "coef0": 0.0,
            "class_weight": {0: 1, 1: 5},
            "max_iter": 20000,
            "random_state": 42,
        }
pipeline, metrics, svc_params, feature_names, metadata_extra = train_and_evaluate_non_linear_svm(train_data_path, test_data_path, svc_params)
print(metrics)

  label                                           filename  mfcc_mean_0  \
0  real    file1.wav_16k.wav_norm.wav_mono.wav_silence.wav  -611.930420   
1  real   file10.wav_16k.wav_norm.wav_mono.wav_silence.wav  -605.008606   
2  real  file100.wav_16k.wav_norm.wav_mono.wav_silence.wav  -597.774414   
3  real  file1000.wav_16k.wav_norm.wav_mono.wav_silence...  -601.991211   
4  real  file1001.wav_16k.wav_norm.wav_mono.wav_silence...  -624.717590   

   mfcc_mean_1  mfcc_mean_2  mfcc_mean_3  mfcc_mean_4  mfcc_mean_5  \
0    12.266665   -12.647933    22.397125   -18.097271     6.636819   
1    -0.973098   -12.821069    38.964752   -17.512388    -5.345759   
2     7.211399   -16.026072    50.700493   -41.957108    -4.920807   
3   -23.718582    -9.058393    42.911995   -23.795502    -0.905054   
4   -23.051550   -12.977422    38.695904   -18.944435    -0.490484   

   mfcc_mean_6  mfcc_mean_7  ...  mel_spectrogram_mean_118  \
0   -13.808917     1.950459  ...                  0.104165   
1   

In [12]:
itw_test_data_path = os.path.join(ITW_DATASET_PATH, 'normalized_features',"itw_features_mean_20_128_256_128_trimmed_loudness_normalized.parquet")

In [13]:

metrics, metadata_extra = evaluate_model_on_parquet(pipeline, itw_test_data_path)
print(metrics)

{'accuracy': 0.530514495971579, 'precision': 0.39321697176103304, 'recall': 0.47001950640318885, 'f1_macro': 0.5149860244516344, 'roc_auc': 0.524308511748095}
