# Analyze Distances Between Various Groups of Word Pairs
By: Adam Li


In [1]:
# Import Necessary Libraries
import numpy as np
import os, csv, json
import math
import random

import itertools
import matplotlib
from matplotlib import *
from matplotlib import pyplot as plt

import scipy.io

# pretty charting
import seaborn as sns
sns.set_palette('muted')
sns.set_style('darkgrid')

%matplotlib inline

In [6]:
######## Get list of files (.mat) we want to work with ########
filedir = '../condensed_data/groups/'
files = []
groups = []

# get all unique word match pairs and store in 'groups' list
for file in os.listdir(filedir):
    groups.append(file)
    if file.endswith('.mat'):
        files.append(file)

######## Load in EVENTS struct to find correct events
eventsDir = '../NIH034/behavioral/paRemap/' + 'events.mat'

events = scipy.io.loadmat(eventsDir)
events = events['events']

# print number of incorrect events and which words they belonged to
incorrectIndices = events['isCorrect'] == 0
incorrectEvents = events[incorrectIndices]
incorrectWords = []
wordList = {}
for i in range(0, len(incorrectEvents)):
    incorrectWords.append(incorrectEvents['probeWord'][i][0])

for word in np.unique(incorrectEvents['probeWord']):
    wordList[str(word)] = sum(incorrectWords == word)
    
print "There were ",len(incorrectEvents), " number of incorrect events."
print "The list of incorrect probe words: \n", wordList
# 
# get only correct events
correctIndices = events['isCorrect'] == 1
events = events[correctIndices]

print "\nThis is the length of the events struct with only correct responses: ", len(events)
print "The group of word pairings are: ", groups

There were  49  number of incorrect events.
The list of incorrect probe words: 
{"[u'PANTS']": 7, "[u'JUICE']": 8, "[u'BRICK']": 12, "[u'CLOCK']": 13, "[u'GLASS']": 9}

This is the length of the events struct with only correct responses:  1431
The group of word pairings are:  ['BRICK_CLOCK', 'BRICK_JUICE', 'BRICK_PANTS', 'CLOCK_BRICK', 'CLOCK_GLASS', 'GLASS_CLOCK', 'GLASS_JUICE', 'GLASS_PANTS', 'JUICE_BRICK', 'JUICE_GLASS', 'PANTS_BRICK', 'PANTS_GLASS']


In [88]:
diff_words_groups = []
reverse_words_groups = []
probe_words_groups = ()
target_words_groups = []

def inGroup(group, names):
    for i in range(0, len(group)):
        if cmpT(group[i],names):
            return True
    return False

def cmpT(t1, t2): 
    return sorted(t1) == sorted(t2)

# Create different groups
for pair_first in groups:
    # split words by delimiter '_' to determine groups
    firstpair = pair_first.split('_')
    
    for pair_second in groups:
        secondpair = pair_second.split('_')

        # make directory names for each word pair
        firstname = '_'.join(firstpair)
        secondname = '_'.join(secondpair)
        names = (firstname, secondname)
        
        ## 01: Different words group
        if not any(x in secondpair for x in firstpair) and not inGroup(diff_words_groups,names):
            diff_words_groups += (names,)
                
        ## 02: Probe Word Overlap Group
        if firstpair[0] == secondpair[0] and firstpair[1] != secondpair[1] and not inGroup(probe_words_groups,names):
            probe_words_groups += (names,)
        
        ## 03: Target Word Overlap Group
        if firstpair[1] == secondpair[1] and firstpair[0] != secondpair[0] and not inGroup(target_words_groups,names):
            target_words_groups += (names,)
            
        ## 04: Reverse words Group
        reverse_firstpair = firstpair
        reverse_firstpair.reverse()
        if '_'.join(reverse_firstpair) == secondname and not inGroup(reverse_words_groups,names):
            reverse_words_groups += (names,)

## printing 
print "For different words:"
for i in range(0, len(diff_words_groups)):
    print diff_words_groups[i]
    
print "For reverse words:"
for i in range(0, len(reverse_words_groups)):
    print reverse_words_groups[i]
    
print "For probe words:"
for i in range(0, len(probe_words_groups)):
    print probe_words_groups[i]
    
print "For target words:"
for i in range(0, len(diff_words_groups)):
    print diff_words_groups[i]

For different words:
('BRICK_CLOCK', 'GLASS_JUICE')
('CLOCK_BRICK', 'GLASS_PANTS')
('CLOCK_BRICK', 'JUICE_GLASS')
('CLOCK_BRICK', 'PANTS_GLASS')
('BRICK_JUICE', 'CLOCK_GLASS')
('JUICE_BRICK', 'GLASS_CLOCK')
('JUICE_BRICK', 'GLASS_PANTS')
('JUICE_BRICK', 'PANTS_GLASS')
('BRICK_PANTS', 'CLOCK_GLASS')
('PANTS_BRICK', 'GLASS_CLOCK')
('BRICK_PANTS', 'GLASS_JUICE')
('PANTS_BRICK', 'JUICE_GLASS')
('CLOCK_BRICK', 'GLASS_JUICE')
('BRICK_CLOCK', 'GLASS_PANTS')
('BRICK_CLOCK', 'JUICE_GLASS')
('BRICK_CLOCK', 'PANTS_GLASS')
('GLASS_CLOCK', 'BRICK_JUICE')
('CLOCK_GLASS', 'JUICE_BRICK')
('CLOCK_GLASS', 'PANTS_BRICK')
('GLASS_CLOCK', 'BRICK_PANTS')
('GLASS_JUICE', 'PANTS_BRICK')
('PANTS_GLASS', 'BRICK_JUICE')
('BRICK_JUICE', 'GLASS_PANTS')
('JUICE_GLASS', 'BRICK_PANTS')
For reverse words:
('JUICE_BRICK', 'BRICK_JUICE')
('BRICK_PANTS', 'PANTS_BRICK')
('CLOCK_BRICK', 'BRICK_CLOCK')
('GLASS_CLOCK', 'CLOCK_GLASS')
('PANTS_GLASS', 'GLASS_PANTS')
('JUICE_GLASS', 'GLASS_JUICE')
For probe words:
('BRICK_CLOCK

## Next:
Now we have lists of combinations of word pairs that we want to compare for each group. Hypothetically, we would assume that the "different word pairs" have distances farthest away and the reversed words, have distances closer, and probe and target are also closer. 

Next, using ANOVA to perform a cutoff on channels and only taking the delta, theta and high gamma frequencies, we want to uncover this result.

1. Load Data
2. Extract Features
3. Plot Distance Metrics

### Different Word Groups

In [94]:
diff_words_groups

for group in diff_words_groups:
    for directory in group:
        ######## Get list of files (.mat) we want to work with ########
        filedir = '../condensed_data/groups/'
        filedir = filedir + directory
        files = []

        for file in os.listdir(filedir):
            if file.endswith('.mat'):
                files.append(file)
        print filedir
        print len(files)
        print files
        break
    break
    

../condensed_data/groups/BRICK_CLOCK
96
['10_G10-global_groupData.mat', '11_G11-global_groupData.mat', '12_G12-global_groupData.mat', '13_G13-global_groupData.mat', '14_G14-global_groupData.mat', '15_G15-global_groupData.mat', '16_G16-global_groupData.mat', '17_G17-global_groupData.mat', '18_G18-global_groupData.mat', '19_G19-global_groupData.mat', '1_G1-global_groupData.mat', '20_G20-global_groupData.mat', '21_G21-global_groupData.mat', '22_G22-global_groupData.mat', '23_G23-global_groupData.mat', '24_G24-global_groupData.mat', '25_G25-global_groupData.mat', '26_G26-global_groupData.mat', '27_G27-global_groupData.mat', '28_G28-global_groupData.mat', '29_G29-global_groupData.mat', '2_G2-global_groupData.mat', '30_G30-global_groupData.mat', '31_G31-global_groupData.mat', '32_G32-global_groupData.mat', '33_TT1-global_groupData.mat', '34_TT2-global_groupData.mat', '35_TT3-global_groupData.mat', '36_TT4-global_groupData.mat', '37_TT5-global_groupData.mat', '38_TT6-global_groupData.mat', '3