In [1]:
# %% Package imports
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split, GridSearchCV, KFold
from deep_audio import Audio, Visualization, Directory, Model, JSON
import numpy as np
import joblib

In [2]:
# %% Load dataset

sampling_rate = 16000

method_algo = 'mfcc'

x, y, mapping = Directory.load_json_data(f'processed/{method_algo}/{method_algo}_{sampling_rate}.json',
                                         inputs_fieldname=method_algo)

In [3]:
# %% preprocessing
x_holder = []

for row in x:
    x_holder.append(row.flatten())

x = np.array(x_holder)


x, y = shuffle(x, y)

n = 3000

x = x[:n]
y = y[:n]


random_state = 42

# kf = KFold(n_splits=5, shuffle=True, random_state=42)
# kf.get_n_splits(x)

x_train, x_val, y_train, y_val = train_test_split(x, y, train_size=0.8, random_state=random_state, stratify=y)

In [4]:
data = {
    "mapping": mapping.tolist(),
    "labels": y_val.tolist(),
    "mfcc": x_val.tolist(),
}

JSON.create_json_file(f'datatest/svm/{method_algo}_{sampling_rate}/datatest_{random_state}_{x.shape[0]}.json', data)

In [5]:
# for train_index, test_index in kf.split(x):
#     x_train, x_val = x[train_index], x[test_index]
#     y_train, y_val = y[train_index], y[test_index]

# %% Split data

param_grid = {
    'C': [0.1, 1, 10, 100, 1000],
#     'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
    'kernel': ['linear', 'rbf'],
    'decision_function_shape': ['ovo', 'ovr']
}

# %% training

# model = RandomForestClassifier()
# model = svm.SVC(C=10, kernel='rbf')

model = GridSearchCV(svm.SVC(), param_grid, refit=True, verbose=3, n_jobs=8)

model.fit(x_train, y_train)

Fitting 5 folds for each of 20 candidates, totalling 100 fits


GridSearchCV(estimator=SVC(), n_jobs=8,
             param_grid={'C': [0.1, 1, 10, 100, 1000],
                         'decision_function_shape': ['ovo', 'ovr'],
                         'kernel': ['linear', 'rbf']},
             verbose=3)

In [6]:
# %%

# # print best parameter after tuning
print(model.best_params_)
#
# # print how our model looks after hyper-parameter tuning
print(model.best_estimator_)

grid_predictions = model.predict(x_val)
#
# # print classification report
print(classification_report(y_val, grid_predictions))

{'C': 0.1, 'decision_function_shape': 'ovo', 'kernel': 'linear'}
SVC(C=0.1, decision_function_shape='ovo', kernel='linear')
              precision    recall  f1-score   support

           0       0.67      0.67      0.67         6
           1       1.00      0.80      0.89         5
           2       1.00      1.00      1.00         5
           3       0.42      0.71      0.53         7
           4       1.00      0.80      0.89         5
           5       0.60      1.00      0.75         6
           6       0.00      0.00      0.00         4
           7       0.71      0.83      0.77         6
           8       1.00      0.50      0.67         4
           9       1.00      0.67      0.80         3
          10       0.38      0.60      0.46         5
          11       0.56      1.00      0.72         9
          12       0.70      1.00      0.82         7
          13       0.88      0.78      0.82         9
          14       0.70      1.00      0.82         7
          1

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [7]:
# %% SCORE

print(model.score(x_val, y_val))

print(model.predict(np.array([x_val[3]])), y_val[3])

0.755
[44] 94


In [10]:
dump_info = {
    score_test
    ...model.best_params_
}

0.7091666666666667

In [None]:
joblib.dump(gs, f'model/svm/{method_algo}_{sampling_rate}.pkl')