# Project MentalWords

The basic idea is to take a signal from openBCI electrodes placed around relevant vocal EMG sources,
pre-process them to remove "NOISE" and known artifacts,
cut up the recorded data with a correct "LABEL",
train a convolution neural network ("CNN"),
run the data through the trained model to send queries to Google search.

Simple, right? :)

## First stage - get data from LSL and record mental words
LSL - Lab Streaming Layer


In [None]:
import numpy as np
import pandas as pd
import random
from time import time, strftime, gmtime
from optparse import OptionParser
from pylsl import StreamInlet, resolve_byprop
from sklearn.linear_model import LinearRegression

default_fname = ("D:\Recordings\MentalWords\data_%s.csv" % strftime("%Y-%m-%d-%H.%M.%S", gmtime()))
parser = OptionParser()
parser.add_option("-d", "--duration",
                  dest="duration", type='int', default=200,
                  help="duration of the recording in seconds.")
parser.add_option("-f", "--filename",
                  dest="filename", type='str', default=default_fname,
                  help="Name of the recording file.")

# dejitter timestamps
dejitter = False
(options, args) = parser.parse_args()

# find LSL channel
print("looking for an EEG stream...")
streams = resolve_byprop('type', 'EEG', timeout=2)
if len(streams) == 0:
    raise(RuntimeError, "Cant find EEG stream")

# start data input
print("Start aquiring data")
inlet = StreamInlet(streams[0], max_chunklen=12)
eeg_time_correction = inlet.time_correction()

print("looking for a Markers stream...")
marker_streams = resolve_byprop('type', 'Markers', timeout=2)

if marker_streams:
    inlet_marker = StreamInlet(marker_streams[0])
    marker_time_correction = inlet_marker.time_correction()
else:
    inlet_marker = False
    print("Cant find Markers stream")

# grab meta-data
info = inlet.info()
description = info.desc()
freq = info.nominal_srate()
Nchan = info.channel_count()
ch = description.child('channels').first_child()
ch_names = [ch.child_value('label')]
for i in range(1, Nchan):
    ch = ch.next_sibling()
    ch_names.append(ch.child_value('label'))

## Word Capturing ##
currentWord = 1
currentTerm = ""
t_word = time() + 1 * 2
words = []
terms = []
termBank = ["KAPIOT", "KARNAF", "MEXICANI"]

res = []
timestamps = []
markers = []
t_init = time()
print('Start recording at time t=%.3f' % t_init)
print("\n")
print('Will work for appx %.1f' % options.duration)
while (time() - t_init) < options.duration:
	# Check for new word
    if time() >= t_word:
        currentTerm = random.choice(termBank)
        print("\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n" + str(currentWord) +": " +currentTerm)
        currentWord += 1
        t_word = time() + 1 * 2
    try:
        data, timestamp = inlet.pull_chunk(timeout=1.0, max_samples=12)
        if timestamp:
            res.append(data)
            timestamps.extend(timestamp)
            words.extend([currentWord] * len(timestamp))
            terms.extend([currentTerm] * len(timestamp))
        if inlet_marker:
            marker, timestamp = inlet_marker.pull_sample(timeout=0.0)
            if timestamp:
                markers.append([marker, timestamp])
    except KeyboardInterrupt:
        break

res = np.concatenate(res, axis=0)
timestamps = np.array(timestamps)

if dejitter:
    y = timestamps
    X = np.atleast_2d(np.arange(0, len(y))).T
    lr = LinearRegression()
    lr.fit(X, y)
    timestamps = lr.predict(X)

res = np.c_[timestamps, words, terms, res]
data = pd.DataFrame(data=res, columns=['timestamps'] + ['words'] + ['terms'] + ch_names)

data['Marker'] = 0
# process markers:
for marker in markers:
    # find index of markers
    ix = np.argmin(np.abs(marker[1] - timestamps))
    val = timestamps[ix]
    data.loc[ix, 'Marker'] = marker[0][0]


data.to_csv(options.filename, float_format='%.3f', index=False)

print('Done !')

## Second part - segment the data with labels

In [None]:
# Clean up the recorded data for the necessary data 

my_data1 = pd.read_csv(options.filename, sep=',', header=0)

del my_data1['timestamps']
del my_data1['Marker']
A = my_data1.ix[:, 2]
B = my_data1.ix[:, 3]
C = my_data1.ix[:, 4]
D = my_data1.ix[:, 5]
terms = my_data1.ix[:, 1]
words = my_data1.ix[:, 0]
class_names = ['MEXICANI','KAPIOT','KARNAF']
classes = my_data1.ix[:,6]
my_data = np.zeros((5, 39600))
my_data[0] = (words)
my_data[1] = (A)
my_data[2] = (B)
my_data[3] = (C)
my_data[4] = (D)
my_data = np.transpose(my_data)

lineIndex = 0
currentWord = 2
imageLength = 110
currentImage = np.zeros(4)
imageDimensions = (imageLength, 4)
imageDirectory = np.zeros(imageDimensions)
answerDirectory = np.zeros(1)

while lineIndex < terms.shape[0]:
    currentLine = np.array(my_data[lineIndex])
    if int(currentLine[0]) == currentWord:
        currentImage = np.vstack((currentImage, currentLine[1:]))
    else:
        currentImageTrimmed = np.delete(currentImage, 0, 0)
        currentImageTrimmed = np.vsplit(currentImageTrimmed, ([imageLength]))[0]
        if currentImageTrimmed.shape[0] < imageLength:
            print("ERROR: Invalid Image at currentWord = " + str(currentWord))
            exit(1)
        imageDirectory = np.dstack((imageDirectory, currentImageTrimmed))
        answerDirectory = np.vstack((answerDirectory, classes[lineIndex]))
        print(str(imageDirectory.shape) + "\n")
        currentImage = np.zeros(4)
        currentWord = currentLine[0]
    lineIndex += 1

imageDirectory = np.transpose(imageDirectory, (2, 0, 1))
imageDirectory = np.delete(imageDirectory, 0, 0)
answerDirectory = np.delete(answerDirectory, 0, 0)
np.save('imageDirectory.npy', imageDirectory)
answerDirectory = [answerDirectory - 1 for answerDirectory]
np.save('answerDirectory.npy', answerDirectory)

## Third stage - Train a convolution neural network

In [None]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, GlobalAveragePooling1D, MaxPooling1D, Conv1D
from sklearn.model_selection import train_test_split

# Load previous files
imageDirectory = np.load('imageDirectory.npy')
answerDirectory = np.load('answerDirectory.npy')
imageLength = 110 # arbitrary, but needs to be exactly like reConstructData.py

# Turn answerDirectory into one-hot array
oneHotAnswers = np.zeros((answerDirectory.size,3)) #initialize one-hot array
answerDirectory = answerDirectory.astype(np.int64) # turn into int (from float)
answerDirectory = np.subtract(answerDirectory,1) # subtract 1 from each answer so it will start from 0
oneHotAnswers[np.arange(answerDirectory.size),(answerDirectory)] = 1 # set the one-hot array

# Split entire dataset to Training (70%) and Testing Set (30%)
X_train, X_test, y_train, y_test = train_test_split(imageDirectory, oneHotAnswers, test_size=0.3)

# Build Model
model = Sequential()
model.add(Conv1D(40, 10, strides=2, padding='same', activation='relu', input_shape=(imageLength, 4)))
model.add(Dropout(0.2))
model.add(MaxPooling1D(3))
model.add(GlobalAveragePooling1D())
model.add(Dense(50, activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(3, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Train Model
model.fit(X_train, y_train, validation_data=(X_test, y_test), batch_size=100, epochs=300)

# Test Model
y_predicted = model.predict(X_test)
print(y_predicted)

## Last stage - send to Google :)

In [None]:
import webbrowser

# choose which classification to send to Google:
qID = 9 

baseString = "https://wwww.google.com/search?query="
queryString = ""
if classPrediction[qID] == 0:
    queryString = "directions+to+Mexicani+near+me"
elif classPrediction[qID] == 1:
    queryString = "directions+to+Kapiot+near+me"
elif classPrediction[qID] == 2:
    queryString = "directions+to+karnaf+near+me"
else:
    webbrowser.open("https://www.brainstormil.com/projects")
urlString = baseString + queryString
webbrowser.open(urlString, new=2)