# 用SVM对特征进行分类
2019年02月27日

In [15]:
import sklearn
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import os
import os.path as path
import pickle
from tqdm import tqdm as progress

## 对每个样本抽取时间序列的中间长度为800帧的子序列

In [48]:
def select_mid(sample, length=800):
    '''
    sample - shape like (40, Tx) (Tx ≥ 800)
    length - length of selected sequence
    
    return - selected 800 time frames in the sample, shape like (40, 800)
    '''
    mid = sample.shape[1] // 2
    semi_len = length // 2
    return sample[:, mid - semi_len : mid + semi_len]

In [66]:
os.chdir('./Data/Sounds/MP3/')
os.listdir()

['Positive', '.DS_Store', 'Negative']

In [70]:
samples = []
labels = []
time_len = 800

In [72]:
os.chdir('../Positive/')
for filename in progress(os.listdir()):
    if filename.endswith('.ftr'):
        with open(filename, 'rb') as f:
            sample = pickle.load(f)
        sample = select_mid(sample, time_len)
        samples.append(sample)
        labels.append('+')

100%|██████████| 72/72 [00:00<00:00, 1485.49it/s]


In [73]:
os.chdir('../Negative/')
for filename in progress(os.listdir()):
    if filename.endswith('.ftr'):
        with open(filename, 'rb') as f:
            sample = pickle.load(f)
        sample = select_mid(sample, time_len)
        samples.append(sample)
        labels.append('-')

100%|██████████| 70/70 [00:00<00:00, 1300.36it/s]


## 扁平化特征分类法

In [78]:
# flatten features
flatten_samlpes = [sample.flatten() for sample in samples]

In [133]:
X_train1, X_test1, Y_train1, Y_test1 = train_test_split(flatten_samlpes, labels, shuffle=True, test_size=0.2)

In [141]:
clf1 = SVC(kernel='rbf', gamma=1e-8)
clf1.fit(X_train1, Y_train1)
clf1.score(X_test1, Y_test1)

1.0

In [142]:
clf1.score(X_test1, Y_test1)

1.0

In [155]:
len(X_train1)

56

In [157]:
len(X_test1)

14

In [160]:
os.chdir('../../../../voice/')

### 保存模型

In [161]:
with open('flatten-svc.obj', 'wb') as f:
    pickle.dump(clf1, f)

## 对每一帧打分，加权投票法

In [93]:
# average features
avg_samples = [sample.mean(axis=1) for sample in samples]

In [130]:
X_train2, X_test2, Y_train2, Y_test2 = train_test_split(avg_samples, labels, shuffle=True, test_size=0.2)

In [153]:
clf2 = SVC(kernel='rbf', gamma=1e-3)
clf2.fit(X_train2, Y_train2)
clf2.score(X_train2, Y_train2)

1.0

In [154]:
clf2.score(X_test2, Y_test2)

0.9285714285714286

### 保存模型

In [162]:
with open('average-svc.obj', 'wb') as f:
    pickle.dump(clf2, f)