# 用SVM对特征进行分类
2019年02月27日

In [71]:
import sklearn
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
import os
import os.path as path
import pickle
from tqdm import tqdm as progress

## Some Constants

In [52]:
FRAME_MS_RATIO = 0.09380235476687636 # frames per milisecond
offset = int(2000 * FRAME_MS_RATIO) # offset of subsampling, in frames (2s in this eg.)
duration = int(6000 * FRAME_MS_RATIO) # maximun length of subsampling range, in frames
unit = int(80 * FRAME_MS_RATIO) # length of a single subsample, in frames
stride = unit // 2 # step in frames

## Subsampling from `.ftr` Files

In [86]:
# IO support
def load_from_file(filename):
    with open(filename, 'rb') as f:
        obj = pickle.load(f)
    return obj

def save_to_file(obj, filename):
    with open(filename, 'wb') as f:
        pickle.dump(obj, f)

In [89]:
def subsampling(mfcc: object, offset, duration, unit, stride=None):
    '''
    mfcc: shape like (40, xxx)
    return: list of subsamples
    '''
    if stride is None: stride = unit // 2
    subsamples = []
    high = offset + duration
    left = offset
    right = left + unit
    while right <= high:
        subsamples.append(mfcc[:, left : right])
        left += stride
        right += stride
    return subsamples

In [87]:
def suffix_filter(files, suffix):
    '''
    return list of files with given suffix
    '''
    return filter(lambda x: x.endswith(suffix), files)

def scan_dir(file_dir, label):
    '''
    scan .ftr files in file_dir and return (samples, labels)
    labels are copies of label
    '''
    samples = []
    labels = []
    old_path = os.getcwd()
    os.chdir(file_dir)
    
    files = suffix_filter(os.listdir(), '.ftr')
    for filename in progress(files):
        mfcc = load_from_file(filename)
        subs = subsampling(mfcc, offset, duration, unit, stride)
        samples += subs
        labels += [label for _ in subs]
    
    os.chdir(old_path)
    return samples, labels

In [None]:
os.chdir('../Data/Sounds/yzc/')
os.listdir()

In [53]:
p_samples, p_labels = scan_dir('Positive/', '+')
n_samples, n_labels = scan_dir('Negative/', '-')

36it [00:00, 1000.89it/s]
36it [00:00, 924.36it/s]


In [56]:
print(np.array(p_samples).shape)
print(np.array(p_labels).shape)

(6660, 40, 7)
(6660,)


In [59]:
samples = p_samples + n_samples # dataset
labels = p_labels + n_labels

## Flatten Features

In [63]:
flattened_samples = [sample.flatten() for sample in samples]
np.array(flattened_samples).shape

(13320, 280)

In [64]:
X_train1, X_test1, Y_train1, Y_test1 = train_test_split(flattened_samples, labels, shuffle=True, test_size=0.1)

In [70]:
print(len(X_train1))
print(len(X_test1))

11988
1332


In [78]:
clf = SVC(kernel='rbf', gamma=3e-5)
clf.fit(X_train1, Y_train1)
print('train score:', clf.score(X_train1, Y_train1))
print('test  score:', clf.score(X_test1, Y_test1))

train score: 0.9764764764764765
test  score: 0.93993993993994


### Save Model

In [81]:
with open('../../../voice/svm98-94.clf', 'wb') as f: # (train 98%, test 94% acc) classifier state
    pickle.dump(clf, f)

## Test Batch Generator

Generate test batches from a `.ftr` file

In [91]:
def get_batches(subsamples, batch_size):
    '''
    batch-size is suggested to be an odd number
    return a list of test-batches from subsamples
    '''
    batches, batch = [], []
    for sample in subsamples:
        batch.append(sample)
        if len(batch) == batch_size:
            batches.append(batch)
            batch = []
    return batches

In [120]:
def scan_dir_batchly(file_dir, batch_size, label):
    '''
    scan .ftr files in file_dir and return (batches, labels)
    labels are copies of label
    '''
    batches = []
    labels = []
    old_path = os.getcwd()
    os.chdir(file_dir)
    
    files = suffix_filter(os.listdir(), '.ftr')
    for filename in progress(files):
        mfcc = load_from_file(filename)
        subs = subsampling(mfcc, offset, duration, unit, stride)
        new_batches = get_batches(subs, batch_size)
        batches += new_batches
        labels += [label for _ in new_batches]
    
    os.chdir(old_path)
    return batches, labels

In [121]:
def predict_batchly(clf, batch):
    '''
    predict a class label based on in-batch voting
    batch-size is suggested to be an odd number
    clf: svm classifier
    batch: one batch, shape like (xxx, 40, 7)
    '''
    flattened_batch = [sample.flatten() for sample in batch] # shape (xxx, 280)
    votes = clf.predict(flattened_batch)
    p_cnt = len(votes[votes == '+'])
    n_cnt = len(votes[votes == '-'])
    return '+' if p_cnt > n_cnt else '-'

### Let's take a test

In [118]:
mfcc = load_from_file('Positive/横屏 小声 3.ftr')
subsamples = subsampling(mfcc, offset, duration, unit)
batches = get_batches(subsamples, 5)
np.array(batches).shape

(37, 5, 40, 7)

In [123]:
cnt = 0
correct = 0
for batch in batches:
    cnt += 1
    correct += (predict_batchly(clf, batch) == '+')
correct / cnt

1.0

### Define a test function

In [124]:
def score_batches(clf, batches, labels):
    '''
    score a classifier's performance on batches
    return: acc (float number)
    '''
    total, correct = len(batches), 0
    for batch, label in zip(batches, labels):
        correct += 1 if predict_batchly(clf, batch) == label else 0
    return correct / total

## Scoring Performance Batchly

In [162]:
# load in batches

batch_size = 11 # adjustable
p_batches, p_labels = scan_dir_batchly('Positive/', batch_size, '+')
n_batches, n_labels = scan_dir_batchly('Negative/', batch_size, '-')
batches, labels = p_batches + n_batches, p_labels + n_labels

36it [00:00, 1631.99it/s]
36it [00:00, 747.60it/s]


In [163]:
np.array(batches).shape

(1152, 11, 40, 7)

In [164]:
score_batches(clf, batches, labels)

0.9939236111111112

## Transfer Learning Performance

on Fengshi Zheng dataset

In [131]:
os.chdir('../MP3/')
os.getcwd()

'/Users/james/MobileProximateSpeech/Analysis/Data/Sounds/MP3'

In [139]:
# load in Fengshi's batches

batch_size = 1 # adjustable
p_batches, p_labels = scan_dir_batchly('Positive/', batch_size, '+')
n_batches, n_labels = scan_dir_batchly('Negative/', batch_size, '-')
batches, labels = p_batches + n_batches, p_labels + n_labels

35it [00:00, 799.73it/s]
35it [00:00, 966.95it/s]


In [141]:
np.array(batches).shape

(13020, 1, 40, 7)

In [140]:
score_batches(clf, batches, labels)

0.6459293394777266

In [150]:
os.chdir('../yzc/')
os.getcwd()

'/Users/james/MobileProximateSpeech/Analysis/Data/Sounds/yzc'

## Todo
- 哪些容易被错分？
- 一个源音频提供多个.ftr
- 50% overlap