### Helper Classes

First we get all of our helper modules. The prepare_EMG module will prepare the EMG data for phoneme recognition. The prepare_outputs module will prepare our target labels and align them with our EMG data. The module 'prepare_data' will help us read data from CSV into a dataframe. Finally, 'vis' will help visualize EMG data in both time and frequency domains. 

In [13]:
%load_ext autoreload
%autoreload 2

import prepare_EMG, prepare_outputs, prepare_data, vis
# autodetector = Output_Prep.detector
EMG_Prep = prepare_EMG.EMG_preparer()
# Output_Prep = prepare_outputs.output_preparer(subvocal_detector = autodetector, window_size=30.0)
Output_Prep = prepare_outputs.output_preparer()

Data_Prep = prepare_data.data_preparer()



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
singles = Data_Prep.load_singletons(1)
print(singles.keys())

File b'simple-svr-data/raspy-1' does not exist
dict_keys(['dusty', 'march', 'direful', 'complete', 'superb', 'poised', 'wait', 'quaint', 'save', 'copy', 'interest', 'separate', 'bright', 'utter', 'bored', 'nondescript', 'license', 'vest', 'dance', 'money', 'languid', 'swim', 'enthusiastic', 'quartz', 'planes', 'spiritual', 'imperfect', 'coal', 'hobbies', 'sound', 'bow', 'squirrel', 'push', 'treatment', 'mine', 'precede', 'weather', 'amazing', 'round', 'stingy', 'signal', 'marry', 'country', 'uncle', 'dust', 'certain', 'loose', 'knock', 'advice', 'confuse', 'animated', 'loving', 'feeling', 'absorbing', 'trick', 'spare', 'rod', 'caption', 'throne', 'clumsy', 'vague', 'tow', 'hang', 'rely', 'tired', 'barbarous', 'pan', 'innocent', 'combative', 'low', 'rub', 'mixed', 'actually', 'faulty', 'thirsty', 'dam', 'doubtful', 'flowers', 'defective', 'frogs', 'outstanding', 'ducks', 'icicle', 'fry', 'load', 'cracker', 'efficient', 'hop', 'fax', 'fancy', 'reading', 'real', 'addicted', 'motion', 'cle

### Labeling the Data

First, we generate the phoneme and articulatory feature labels from each word. We'll use these to process the data in each file based on the length of the file and how many phonemes it should contain. We scale the FFT windows relative to the length of time we expect an even distribution of phonemes across the file to have. 

In [37]:
# data_1 = Data_Prep.load('Sat Mar  4 00:44:23 2017')
# data_2 = Data_Prep.load('Sat Mar  4 00:45:02 2017')
# data_3 = Data_Prep.load('Sat Mar  4 00:45:47 2017')
# data_4 = Data_Prep.load('Sat Mar  4 00:47:01 2017')
# data_5 = Data_Prep.load('Sat Mar  4 00:47:36 2017')
# data_6 = Data_Prep.load('Sat Mar  4 00:48:09 2017')
# data_7 = Data_Prep.load('Sat Mar  4 00:49:05 2017')
# data_8 = Data_Prep.load('Sat Mar  4 00:49:41 2017')
# data_9 = Data_Prep.load('Sat Mar  4 00:50:22 2017')
# data_10 = Data_Prep.load('Sat Mar  4 00:51:17 2017')
# data_11 = Data_Prep.load('Sat Mar  4 00:52:02 2017')
# data_12 = Data_Prep.load('Sat Mar  4 00:52:38 2017')
# data_13 = Data_Prep.load('Sat Mar  4 00:53:24 2017')
# data_14 = Data_Prep.load('Sat Mar  4 00:53:51 2017')
# data_15 = Data_Prep.load('Sat Mar  4 00:54:25 2017')
# data_16 = Data_Prep.load('Sat Mar  4 00:54:57 2017')
# data_17 = Data_Prep.load('Sat Mar  4 00:56:01 2017')
# data_18 = Data_Prep.load('Sat Mar  4 00:56:35 2017')
# data_19 = Data_Prep.load('Sat Mar  4 00:57:21 2017')
# data_20 = Data_Prep.load('Sat Mar  4 00:57:49 2017')
# data_21 = Data_Prep.load('Sat Mar  4 00:58:59 2017')
# data_22 = Data_Prep.load('Sat Mar  4 00:59:53 2017')

# data_list = [data_1, data_2, data_3, data_4, data_5, data_6, data_7, data_8, data_9, data_10, data_11, data_12, data_13, data_14, data_15, data_16, data_17, data_18, data_19, data_20, data_21, data_22]
labels = {}
windows = {}
for word in singles:
    label = Output_Prep.transform(word)
    num_phonemes = label.shape[0]
    labels[word] = label
    windows[word] = EMG_Prep.process(singles[word],num_phonemes)




In [36]:
test_word = 'license'
test_word2 = 'money'
test_EMG = EMG_Prep.process(singles[test_word], labels[test_word].shape[0])
test_EMG2 = EMG_Prep.process(singles[test_word2], labels[test_word2].shape[0])

print(test_EMG.head(6), labels[test_word], test_EMG2.head(), labels[test_word2])

       0.0       10.0      10.0      20.0      20.0      30.0      30.0   \
0  43.492969  0.020570  2.259229  1.954676  1.681519  1.445389  2.413685   
1  47.746875  2.017764  0.095421  0.633403  0.322932  2.490014  1.047570   
2  43.325391  1.125540  0.838279  0.147355  0.309692  0.321898  0.573655   
3  42.680859  1.142250  0.402145  1.505252  0.509298  0.329306  0.888195   
4  44.421094  0.062747  1.107197  0.216720  0.385406  1.835386  1.378310   
5  44.098828  0.190692  0.318395  1.432442  0.888541  0.892307  0.651676   

      40.0      40.0      50.0     ...        450.0     460.0     460.0  \
0  0.423931  1.066141  1.716362    ...     0.394087  0.043894  0.195630   
1  0.822770  4.314441  0.909039    ...     0.924509  0.736296  1.002601   
2  4.172037  2.061649  0.181364    ...     1.295144  0.545687  1.338983   
3  1.115764  2.908927  1.167312    ...     0.145217  0.530457  0.348105   
4  1.094253  2.286328  0.503626    ...     0.123156  2.375713  1.121218   
5  2.017346  2.06

In [17]:
import pandas
%autoreload 2

# num_files = len(data_list)
# labels_frame = pandas.read_csv('austen_subvocal.csv')
# trans_labels = Output_Prep.transform(labels_frame.iloc[0][0])
# data_1_proc = EMG_Prep.process(data_1)
# aligned_data, trans_labels= Output_Prep.zip(data_1_proc, trans_labels, repeat=3)

# for file in range(1, num_files):
#     trans_labels_iter = Output_Prep.transform(labels_frame.iloc[file][0])
#     data_proc_iter = EMG_Prep.process(data_list[file])
#     aligned_data_iter, trans_labels_iter = Output_Prep.zip(data_proc_iter, trans_labels_iter, repeat=3)

#     aligned_data = aligned_data.append(aligned_data_iter)
#     trans_labels = trans_labels.append(trans_labels_iter)
    
# print('Aligned Data shape:',aligned_data.shape,'Trans labels shape:',trans_labels.shape)
for word in labels:
#     append labels to the master label dataframe
#     Use phonemes to name each series in 'windows' for that word
#     Append windows to a master window dataframe


        NaN        inf       inf       inf       inf       inf       inf  \
0  47.914453  1.158442  1.401185  3.283131  0.277644  0.267410  0.169687   
1  46.264453  0.050700  0.546690  0.662641  1.239360  0.448413  1.328471   
2  50.569922  2.983494  1.049043  0.930923  0.472148  2.044457  0.101294   
3  49.770703  0.043322  2.336511  1.358702  0.614687  0.364910  0.341432   
4  47.630859  0.727348  0.231554  2.098967  1.644969  1.083569  1.455825   

        inf       inf       inf    ...          inf       inf       inf  \
0  1.323168  0.466061  0.956487    ...     1.822187  1.601457  5.380026   
1  1.004214  1.435452  3.199529    ...     2.771921  1.710445  3.321455   
2  2.701824  2.107786  5.631576    ...     0.459451  2.579507  0.463937   
3  0.641346  2.528079  4.349871    ...     0.121954  0.506894  0.841946   
4  1.038744  0.158296  4.088807    ...     4.347557  0.907961  1.048890   

        inf       inf       inf       inf       inf       inf       inf  
0  0.386002  4.816

### AF Extractor Models

These models will be optimized for extracting AF's from the data, before passing those AF's onto an MLPC for identifying the most likely phoneme. 

In [77]:
# Prepare lists of parameters for our GridSearch
# First, our layer sizes
layer_sizes = []
for i in range(2,3):
    for j in range(0,180,30):
        if j:
            tup = []
            for k in range(i):
                tup.append(j)
            layer_sizes.append(tuple(tup))
print('number layer sizes:',len(layer_sizes),'here be layer sizes',layer_sizes)

# Next, our alpha values
alphas = [0.0000001,1,1000]

number layer sizes: 5 here be layer sizes [(30, 30), (60, 60), (90, 90), (120, 120), (150, 150)]


### Preparing GridSearch and Assesing Stock MLPC as AF extractor models

We setup the objects for performing gridsearch on each one of the Articulatory Feature Extractor models. We also train untuned, stock MLPC models to serve as a performance baseline. We will compare the performance of these baseline, untuned models to our gridsearched models to determine whether gridsearch has in fact improved the model parameters for each AF extractor. 

In [78]:
from sklearn.neural_network import MLPClassifier as MLPC
# Import other models to try for feature extraction
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.decomposition import PCA
from sklearn.feature_selection import SelectKBest

import copy

X_train, X_test, y_train, y_test = train_test_split(aligned_data, trans_labels, test_size=0.15, random_state=12)

combined_features = FeatureUnion([
    ('pca',PCA(random_state=18)),
    ('kbest',SelectKBest(k=1))
])

pipeline = Pipeline([
    ('features', combined_features),
    ('model', MLPC(random_state=12))
])


param_grid = {
    'features__pca__n_components':[0.5,0.66,0.9],
    'model__solver':['adam'],
    'model__hidden_layer_sizes':layer_sizes,
    'model__activation':['relu'],
    'model__alpha': alphas,
    'model__max_iter':[200]
}


grid_search = GridSearchCV(pipeline, param_grid, n_jobs=-1)

manner_classifier = MLPC(solver='adam',random_state=3)
manner_classifier.fit(X_train, y_train['manner'])
m_score = manner_classifier.score(X_test, y_test['manner'])

place_classifier = MLPC(solver='adam',random_state=6)
place_classifier.fit(X_train, y_train['place'])
p_score = place_classifier.score(X_test, y_test['place'])

height_classifier = MLPC(solver='adam',random_state=9)
height_classifier.fit(X_train, y_train['height'])
h_score = height_classifier.score(X_test, y_test['height'])

vowel_classifier = MLPC(solver='adam',random_state=12)
vowel_classifier.fit(X_train, y_train['vowel'])
v_score = vowel_classifier.score(X_test, y_test['vowel'])

print('manner score:',m_score,'place score:',p_score,'height score:',h_score,'vowel score:',v_score)
# print(data_1_proc.head(50), trans_labels['manner'].head(50))

manner score: 0.403773584906 place score: 0.283018867925 height score: 0.491823899371 vowel score: 0.592452830189


In [35]:
manner_classifier2 = copy.deepcopy(grid_search)
manner_classifier2.fit(aligned_data, trans_labels['manner'])
m_score2 = manner_classifier2.score(aligned_data, trans_labels['manner'])

print('manner score:',m_score2)



manner score: 0.389433962264


In [36]:
place_classifier2 = copy.deepcopy(grid_search)
place_classifier2.fit(aligned_data, trans_labels['place'])
p_score2 = place_classifier2.score(aligned_data, trans_labels['place'])

print('place score:',p_score2)



place score: 0.280754716981


In [37]:
height_classifier2 = copy.deepcopy(grid_search)
height_classifier2.fit(aligned_data, trans_labels['height'])
h_score2 = height_classifier2.score(aligned_data, trans_labels['height'])

print('vowel score:',h_score2)



vowel score: 0.504905660377


In [38]:
vowel_classifier2 = copy.deepcopy(grid_search)
vowel_classifier2.fit(aligned_data, trans_labels['vowel'])
v_score2 = vowel_classifier2.score(aligned_data, trans_labels['vowel'])

print('vowel score:',v_score2)



vowel score: 0.610566037736


In [40]:
# Experiment with PCA here

from sklearn.decomposition import PCA


manner_union = FeatureUnion([('pca',PCA(n_components=0.03)),('kbest',SelectKBest(k=1))])
manner_reduced_data = manner_union.fit_transform(aligned_data, trans_labels['manner'])
manner_X_train, manner_X_test, manner_y_train, manner_y_test = train_test_split(manner_reduced_data, trans_labels, test_size=0.15, random_state=12)


manner_classifier3 = MLPC(solver='adam',alpha=1000,hidden_layer_sizes=(1),random_state=3,max_iter=300)
manner_classifier3.fit(manner_X_train, manner_y_train['manner'])
m_score3 = manner_classifier3.score(manner_X_test, manner_y_test['manner'])
print('manner score:',m_score3)

manner score: 0.405031446541


In [41]:
place_union = FeatureUnion([('pca',PCA(n_components=0.9)),('kbest',SelectKBest(k=1))])
place_reduced_data = place_union.fit_transform(aligned_data, trans_labels['place'])
place_X_train, place_X_test, place_y_train, place_y_test = train_test_split(place_reduced_data, trans_labels, test_size=0.15, random_state=12)


place_classifier3 = MLPC(solver='adam',alpha=0.00001,hidden_layer_sizes=(120,120),random_state=6,max_iter=300)
place_classifier3.fit(place_X_train, place_y_train['place'])
p_score3 = place_classifier3.score(place_X_test, place_y_test['place'])
print('place classifier score:',p_score3)

place classifier score: 0.161006289308


In [7]:
height_union = FeatureUnion([('pca',PCA(n_components=0.9)),('kbest',SelectKBest(k=1))])
height_reduced_data = height_union.fit_transform(aligned_data, trans_labels['height'])
height_X_train, height_X_test, height_y_train, height_y_test = train_test_split(height_reduced_data, trans_labels, test_size=0.15, random_state=12)


height_classifier3 = MLPC(solver='adam',alpha=0.00001,hidden_layer_sizes=(180,180),random_state=12,max_iter=300)
height_classifier3.fit(height_X_train, height_y_train['height'])
h_score3 = height_classifier3.score(height_X_test, height_y_test['height'])
print('height score:',h_score3)

height score: 0.353459119497


In [8]:
vowel_union = FeatureUnion([('pca',PCA(n_components=0.9)),('kbest',SelectKBest(k=1))])
vowel_reduced_data = vowel_union.fit_transform(aligned_data, trans_labels['vowel'])
vowel_X_train, vowel_X_test, vowel_y_train, vowel_y_test = train_test_split(vowel_reduced_data, trans_labels, test_size=0.15, random_state=12)


vowel_classifier3 = MLPC(solver='adam',alpha=0.00001,hidden_layer_sizes=(180,180),random_state=12,max_iter=300)
vowel_classifier3.fit(vowel_X_train, vowel_y_train['vowel'])
v_score3 = vowel_classifier3.score(vowel_X_test, vowel_y_test['vowel'])
print('vowel score:',v_score3)

vowel score: 0.522012578616


In [80]:
pho_score = phoneme_classifier.score(aligned_data, phoneme_labels)
print(pho_score)

0.0922641509434


In [79]:
from sklearn.preprocessing import LabelEncoder as LE
from sklearn.feature_extraction import DictVectorizer as DV
from sklearn.preprocessing import MultiLabelBinarizer as MLB
from sklearn.preprocessing import OneHotEncoder as OHE
from collections import Counter

m_count = Counter()
p_count = Counter()
h_count = Counter()
v_count = Counter()

for row in range(trans_labels.shape[0]):
    m_count.update([trans_labels.iloc[row]['manner']])
    p_count.update([trans_labels.iloc[row]['place']])
    h_count.update([trans_labels.iloc[row]['height']])
    v_count.update([trans_labels.iloc[row]['vowel']])
    
counters = [m_count,p_count,h_count,v_count]

feature_dict = {}
for count in counters:
    current = 0
    for feature in count.keys():
        feature_dict[feature] = current
        current += 1
        
print(feature_dict)

{'voiced-fricative': 0, 'vowel': 1, 'fricative': 2, 'nasal': 3, 'approximant': 4, 'voiced-stop': 5, 'aspirated': 6, 'stop': 7, 'dental': 0, 'mid': 1, 'labial': 2, 'mid-front': 3, 'lateral': 4, 'front': 5, 'alveolar': 6, 'back': 7, 'mid-back': 8, 'uknown': 9, 'dorsal': 10, 'retroflex': 11, 'max': 0, 'low': 2, 'very high': 3, 'very-high': 4, 'high': 5, 'mid-low': 6, 'mid-high': 7, 'no': 0, 'yes': 1}


In [14]:
num_labels = copy.copy(trans_labels)
for row in range(trans_labels.shape[0]):
    m_feat = trans_labels.iloc[row]['manner']
    p_feat = trans_labels.iloc[row]['place']
    h_feat = trans_labels.iloc[row]['height']
    v_feat = trans_labels.iloc[row]['vowel']
    num_labels.iloc[row]['manner'] = feature_dict[m_feat]
    num_labels.iloc[row]['place'] = feature_dict[p_feat]
    num_labels.iloc[row]['height'] = feature_dict[h_feat]
    num_labels.iloc[row]['vowel'] = feature_dict[v_feat]
num_labels.head()

In [80]:
encoder = OHE()
new_labels = encoder.fit_transform(num_labels)
print(new_labels.toarray())
enc_labels = pandas.DataFrame(new_labels.toarray())
print(enc_labels.head())

[[ 1.  0.  0. ...,  0.  1.  0.]
 [ 0.  1.  0. ...,  0.  0.  1.]
 [ 0.  0.  1. ...,  0.  1.  0.]
 ..., 
 [ 0.  0.  0. ...,  0.  1.  0.]
 [ 0.  1.  0. ...,  1.  0.  1.]
 [ 0.  0.  0. ...,  0.  1.  0.]]
    0    1    2    3    4    5    6    7    8    9  ...    20   21   22   23  \
0  1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  1.0  0.0 ...   1.0  0.0  0.0  0.0   
1  0.0  1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  1.0 ...   0.0  1.0  0.0  0.0   
2  0.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0 ...   1.0  0.0  0.0  0.0   
3  0.0  1.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0  0.0 ...   0.0  0.0  1.0  0.0   
4  0.0  0.0  0.0  1.0  0.0  0.0  0.0  0.0  0.0  0.0 ...   1.0  0.0  0.0  0.0   

    24   25   26   27   28   29  
0  0.0  0.0  0.0  0.0  1.0  0.0  
1  0.0  0.0  0.0  0.0  0.0  1.0  
2  0.0  0.0  0.0  0.0  1.0  0.0  
3  0.0  0.0  0.0  0.0  0.0  1.0  
4  0.0  0.0  0.0  0.0  1.0  0.0  

[5 rows x 30 columns]


In [81]:
cols = list(aligned_data.axes[1]) + list(enc_labels.axes[1])
# print(cols)

phoneme_inputs = pandas.DataFrame(columns=cols)
phoneme_labels = trans_labels.axes[0]

for row in range(aligned_data.shape[0]):
    new_row = aligned_data.iloc[row].append(enc_labels.iloc[row])
    new_row = new_row.rename(trans_labels.iloc[row].name)
    phoneme_inputs = phoneme_inputs.append(new_row)

pho_X_train, pho_X_test, pho_y_train, pho_y_test = train_test_split(phoneme_inputs,phoneme_labels, test_size=0.15, random_state=12)

phoneme_classifier = MLPC(solver='adam',hidden_layer_sizes=(90,90),random_state=6, max_iter=300)
phoneme_classifier.fit(pho_X_train, pho_y_train)

MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(90, 90), learning_rate='constant',
       learning_rate_init=0.001, max_iter=300, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=6, shuffle=True,
       solver='adam', tol=0.0001, validation_fraction=0.1, verbose=False,
       warm_start=False)

In [82]:
pho_score = phoneme_classifier.score(pho_X_test, pho_y_test)
print(pho_score)


0.923270440252


In [89]:
# feature_extractors = FeatureUnion([
#     ('manner_extractor',manner_classifier2.best_estimator_),
#     ('place_extractor',place_classifier2.best_estimator_),
#     ('height_extractor',height_classifier2.best_estimator_),
#     ('vowel_extractor',vowel_classifier2.best_estimator_),
#     ('pho_pca',PCA())
# ])

# pho_pipe = Pipeline([
#     ('AF_extractors',feature_extractors),
#     ('pho_classifier',MLPC())
# ])
manner_inputs = manner_classifier.predict(pho_X_test.iloc[:,0:60])
place_inputs = place_classifier.predict(pho_X_test.iloc[:,0:60])
height_inputs = height_classifier.predict(pho_X_test.iloc[:,0:60])
vowel_inputs = vowel_classifier.predict(pho_X_test.iloc[:,0:60])
raw_inputs = copy.copy(trans_labels)
for row in range(len(manner_inputs)):

    raw_inputs.iloc[row]['manner'] = manner_inputs[row]
    raw_inputs.iloc[row]['place'] = place_inputs[row]
    raw_inputs.iloc[row]['height'] = height_inputs[row]
    raw_inputs.iloc[row]['vowel'] = vowel_inputs[row]
    
# print(raw_inputs.head(300))

test_num_labels = copy.copy(raw_inputs)
for row in range(pho_X_test.shape[0]):
    m_feat = raw_inputs.iloc[row]['manner']
#     print(m_feat)
    p_feat = raw_inputs.iloc[row]['place']
    h_feat = raw_inputs.iloc[row]['height']
    v_feat = raw_inputs.iloc[row]['vowel']
    test_num_labels.iloc[row]['manner'] = feature_dict[m_feat]
    test_num_labels.iloc[row]['place'] = feature_dict[p_feat]
    test_num_labels.iloc[row]['height'] = feature_dict[h_feat]
    test_num_labels.iloc[row]['vowel'] = feature_dict[v_feat]
# num_labels.head()

encoder = OHE()
test_new_labels = encoder.fit_transform(test_num_labels)
# print(new_labels.toarray())
test_enc_labels = pandas.DataFrame(test_new_labels.toarray())
# print(enc_labels.head())

cols = list(aligned_data.axes[1]) + list(test_enc_labels.axes[1])
# print(cols)

test_pho_inputs = pandas.DataFrame(columns=cols)
phoneme_labels = trans_labels.axes[0]

for row in range(pho_X_test.shape[0]):
    new_row = aligned_data.iloc[row].append(test_enc_labels.iloc[row])
    new_row = new_row.rename(trans_labels.iloc[row].name)
    test_pho_inputs = test_pho_inputs.append(new_row)

# pho_pipe.fit(pho_X_train, pho_y_train)
pho_test_score = phoneme_classifier.score(test_pho_inputs, pho_y_test)
print('phoneme model test score:',pho_test_score)

phoneme model test score: 0.0603773584906
