In [1]:
# %% Package imports
from sklearn import svm
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, f1_score, accuracy_score
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split, GridSearchCV, KFold, cross_val_score
from deep_audio import Audio, Visualization, Directory, Model, JSON
import numpy as np
from joblib import Parallel, delayed
import multiprocessing
import joblib

In [2]:
kfold_split = False

In [3]:
# %% Load dataset
sampling_rate = 22050

method_algo = 'lpc'

x, y, mapping = Directory.load_json_data(f'processed/{method_algo}/{method_algo}_{sampling_rate}.json',
                                         inputs_fieldname=method_algo)

In [4]:
# SPLIT 5 FOLDS
kernel = 'linear'
decision_function_shape = 'ovo'
random_state = 42

x_holder = []

for row in x:
    x_holder.append(row.flatten())

x = np.array(x_holder)

n = len(x)

x = x[:n]
y = y[:n]

kf = KFold(n_splits=5, shuffle=True, random_state=random_state)
kf.get_n_splits(x)

5

In [5]:
def process_fold(train_index, test_index):
    x_train, x_val = x[train_index], x[test_index]
    y_train, y_val = y[train_index], y[test_index]
    
    param_grid = {
        'C': [0.1, 1, 10, 100, 1000],
        # 'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
        'kernel': ['linear', 'rbf', 'poly'],
        'decision_function_shape': ['ovo', 'ovr']
    }

    # %% training

    # model = RandomForestClassifier()
    # model = svm.SVC(C=10, kernel='rbf')

    model = GridSearchCV(svm.SVC(), param_grid, refit=True, verbose=3, n_jobs=8)

    model.fit(x_train, y_train)

    # # print best parameter after tuning
    print(model.best_params_)
    #
    # # print how our model looks after hyper-parameter tuning
    print(model.best_estimator_)
    print(model.get_params())
    
    # TESTA ACCURÁCIAS
    
    score_test = model.score(x_val, y_val)

    score_train = model.score(x_train, y_train)

    y_hat = model.predict(x_val)
    
    # SALVA MODELO
    filename = ''
    
#     if kfold_split:
#         filename = f'models/gridsvm/5fold/{method_algo}_{sampling_rate}_{kernel}_{decision_function_shape}/acc{score_test}_seed{random_state}.sav'
#     else:
#         filename = f'models/gridsvm/{method_algo}_{sampling_rate}_{kernel}_{decision_function_shape}/acc{score_test}_seed{random_state}.sav'
    
#     Directory.create_directory(filename, file=True)
    
#     joblib.dump(model, filename)
    
    # SALVA ACURÁCIAS E PARAMETROS
    
    return {
        'method': 'Grid Search Support Vector Machines',
        'seed': random_state,
        'feature_method': method_algo,
        'sample_rate': sampling_rate,
        'train_test': [len(x_train), len(x_val)],
        'score_train': score_train,
        'score_test': score_test,
        'f1_micro': f1_score(y_hat, y_val, average='micro'),
        'f1_macro': f1_score(y_hat, y_val, average='macro'),
        'model_file': f'acc{score_test}_seed{random_state}.sav',
#         'params': model.get_params()
    }

In [6]:
num_cores = multiprocessing.cpu_count()

if kfold_split:
    dump_info = Parallel(n_jobs=num_cores, verbose=3)(
        delayed(process_fold)(train_index, test_index) for train_index, test_index in kf.split(x))
#     JSON.create_json_file(f'models/gridsvm/5fold/{method_algo}_{sampling_rate}_{kernel}_{decision_function_shape}/info.json', dump_info)
else:
    for train_index, test_index in kf.split(x):
        dump_info = process_fold(train_index, test_index)
        break
#     JSON.create_json_file(f'models/gridsvm/{method_algo}_{sampling_rate}_{kernel}_{decision_function_shape}/info.json', dump_info)

Fitting 5 folds for each of 30 candidates, totalling 150 fits


KeyboardInterrupt: 

In [None]:
    dump_info