In [17]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC, LinearSVC
from sklearn.decomposition import TruncatedSVD
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.externals import joblib



In [18]:
def load_x_train(features, sparse=False):
    x = np.empty((1440, 0))
    if "mfcc" in features:
        #matrix = np.load('../data/mfcc_speech_origin.npz')['arr_0']
        matrix = np.load('../data/mfcc_speech_mean_max_min.npz')['arr_0']
        x = np.hstack(( x, matrix))
        print(x.shape)
    if "delta" in features:
        #matrix = np.load('../data/delta_speech_origin.npz')['arr_0']
        matrix = np.load('../data/delta_speech_mean_max_min.npz')['arr_0']
        x = np.hstack(( x, matrix))
        print(x.shape)
    if "power" in features:
        #matrix = np.load('../data/power_speech_origin.npz')['arr_0']
        matrix = np.load('../data/power_speech_mean_max_min.npz')['arr_0']
        x = np.hstack(( x, matrix))
        print(x.shape)
    train_x = x          
    return train_x

def load_y_train():
    return np.load('../data/label_speech_origin.npz')['arr_0'].astype('int') -1

In [80]:
y = load_y_train()
#x = load_x_train([ "mfcc", "delta", "power" ])
x = load_x_train([ "mfcc"])
skf = StratifiedKFold(n_splits=6, shuffle=True, random_state=71)
dummy_x = np.zeros(len(y))
skf = list(skf.split(dummy_x, y))

params_SVC = {
              'kernel' : 'linear', 
              'random_state': 71,
              'C': 100, 
              'max_iter': -1,
              'tol': 0.001, 
              'class_weight': None, 
              'verbose': False
}

SVD1 = True
params_svd_mfcc = {
        'random_state' : 71,
        'n_iter' : 8,
        'n_components' : 15,
}
SVD2 = True
params_svd_delta = {
        'random_state' : 71,
        'n_iter' : 8,
        'n_components' : 30,
}

SVD3 = True
params_svd_power = {
        'random_state' : 71,
        'n_iter' : 8,
        'n_components' : 50
}
np.random.seed(71)
def svd(x_tr, x_te, svd_params):
    scaler = StandardScaler()
    scaler_sc = scaler.fit(x_tr)
    x_tr = scaler_sc.transform(x_tr)
    x_te = scaler_sc.transform(x_te)
    svd = TruncatedSVD(**svd_params)
    svd = svd.fit(x_tr)
    x_tr = svd.transform(x_tr)
    x_te = svd.transform(x_te)
    return x_tr, x_te

(1440, 39)


In [81]:
from sklearn.metrics import accuracy_score
result = []
for i, (tr_idx, te_idx) in enumerate(skf):
    tr_x, tr_y = x[tr_idx], y[tr_idx]
    te_x, te_y = x[te_idx], y[te_idx]
    model = SVC(**params_SVC)
    X_tr = np.empty((len(tr_idx), 0))
    X_te = np.empty((len(te_idx), 0))
    if SVD1:
        tmp_tr_x, tmp_te_x = svd(tr_x[:, :39], te_x[:, :39], params_svd_mfcc)
        X_tr = np.hstack((X_tr, tmp_tr_x))
        X_te = np.hstack((X_te, tmp_te_x))
    else:
        X_tr = np.hstack((X_tr, tr_x[:, :39]))
        X_te = np.hstack((X_te, te_x[:, :39]))
    if SVD2:
        tmp_tr_x, tmp_te_x = svd(tr_x[:, 39:78], te_x[:, 39:78], params_svd_delta)
        X_tr = np.hstack((X_tr, tmp_tr_x))
        X_te = np.hstack((X_te, tmp_te_x))
    else:
        X_tr = np.hstack((X_tr, tr_x[:, 39:78]))
        X_te = np.hstack((X_te, te_x[:, 39:78]))
        
    if SVD3:
        tmp_tr_x, tmp_te_x = svd(tr_x[:, 78:], te_x[:, 78:],params_svd_power)
        X_tr = np.hstack((X_tr, tmp_tr_x))
        X_te = np.hstack((X_te, tmp_te_x))
    else:
        X_tr = np.hstack((X_tr, tr_x[:, 78:]))
        X_te = np.hstack((X_te, te_x[:, 78:]))
    
    scaler = StandardScaler()
    scaler_sc = scaler.fit(X_tr)
    X_tr = scaler_sc.transform(X_tr)
    X_te = scaler_sc.transform(X_te)
    model.fit(X_tr, tr_y)
    result.append( accuracy_score(te_y,model.predict(X_te)))
    joblib.dump(model, f'../model/{i}.model', compress=True)
print(np.mean(result))

0.4305555555555555
