# ESA with Verbal Fluency 

## ESA FINN

In [179]:
from dasem.semantic import Semantic
import numpy as np
from io import open
import collections
from itertools import compress, groupby
import math
import time
from os.path import join

In [180]:
#Define misc variables
ID = "0001"
textfile = "test2.txt"
home_path = "/Users/au183362/Documents/postdoc/Parkinson-DBS/Eira_Aksnes/"
file_path = join(home_path, 'raw', textfile)
save_path = join(home_path, 'output')

iterations = 30000 # input to Semantic-module

#From Yund et al.:
#ESA switch threshold is "PercentRESA * MeanAllESA" or the (Minimum sequence ESA)+0.00001 
#so that there is at least one ESA switch.
PercentRESA = 0.75


### Example from DASEM, semantic relatedness

#Example:

  >>> semantic = Semantic(30000)  # and wait
    >>> semantic.relatedness(['hund', 'kat', 'mus', 'fisk']).round(3)
    array([[ 1.   ,  0.022,  0.005,  0.001],
           [ 0.022,  1.   ,  0.002,  0.   ],
           [ 0.005,  0.002,  1.   ,  0.01 ],
           [ 0.001,  0.   ,  0.01 ,  1.   ]])

## NB! Run the following cell only once! You're downloading all of the Danish wikipedia...

In [3]:
#!python -m dasem.wikipedia download --verbose

2019-09-22 22:04:23,957 - __main__ - INFO - Downloading https://dumps.wikimedia.org/dawiki/latest/dawiki-latest-pages-articles.xml.bz2 to /Users/au183362/dasem_data/wikipedia/dawiki-latest-pages-articles.xml.bz2


### NB! The following cell may take somewhere between 6-12 mins to run, so be patient

In [5]:
t = time.time()

#On Eira's macbook:
#Both Semantic(300) and Semantic(30) = 600-700 sec ≈ 10-12 min
#On AH's macbook: Semantic(30000) = 500-535 sec ≈ 8-9 min
semantic = Semantic(iterations)

elapsed = time.time()-t
print(elapsed)


603.078813791


In [192]:
inputs = [line.rstrip() for line in open(file_path)]
for i, inp in enumerate(inputs):
    print(u"{0:d}: {1:s}".format(i, inp))

0: Mus
1: Ko
2: Hest
3: Gris
4: Elefant
5: Kat
6: Pindsvin
7: Krage
8: Måge
9: Fugl
10: Mosegris
11: Ørentvist
12: Flue
13: Elefant
14: Næsehorn
15: Løve
16: Tiger
17: Bjørn
18: Æsel
19: Hest
20: Krokodille
21: Næsehorn
22: Løve
23: Kat
24: Rotte


In [193]:
ESA_table = semantic.relatedness(inputs).round(3)
#print(raw_ESA_table[:,0])
for i in range(int(math.ceil((len(inputs)/12.)))):
    print("  ".join(map(unicode,inputs[12*i:12*(i+1)])))
    print(ESA_table[:,12*i:12*(i+1)])
    print("\n")

Mus  Ko  Hest  Gris  Elefant  Kat  Pindsvin  Krage  Måge  Fugl  Mosegris  Ørentvist
[[1.    0.005 0.002 0.003 0.003 0.01  0.007 0.004 0.001 0.01  0.004 0.   ]
 [0.005 1.    0.003 0.006 0.005 0.004 0.    0.001 0.    0.001 0.    0.   ]
 [0.002 0.003 1.    0.003 0.011 0.003 0.    0.    0.    0.004 0.    0.   ]
 [0.003 0.006 0.003 1.    0.002 0.003 0.003 0.009 0.    0.005 0.    0.   ]
 [0.003 0.005 0.011 0.002 1.    0.003 0.001 0.001 0.    0.002 0.    0.   ]
 [0.01  0.004 0.003 0.003 0.003 1.    0.001 0.001 0.    0.004 0.001 0.   ]
 [0.007 0.    0.    0.003 0.001 0.001 1.    0.007 0.001 0.002 0.001 0.   ]
 [0.004 0.001 0.    0.009 0.001 0.001 0.007 1.    0.002 0.015 0.001 0.   ]
 [0.001 0.    0.    0.    0.    0.    0.001 0.002 1.    0.033 0.    0.   ]
 [0.01  0.001 0.004 0.005 0.002 0.004 0.002 0.015 0.033 1.    0.001 0.   ]
 [0.004 0.    0.    0.    0.    0.001 0.001 0.001 0.    0.001 1.    0.   ]
 [0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    0.    1.   ]
 [0.01  0.002 0.

In [194]:
#Total number of words (animals), correct, duplicates, and unknowns
N_Totl = len(inputs)
N_Uniq = len(np.unique(inputs))
N_Dups = len([dupl for dupl in collections.Counter(inputs).values() if dupl>1])
N_UNKN = sum([np.isnan(unk).all() for unk in ESA_table])

print("Participant ID: {0:s}".format(ID))
print("\nTotal number of \nwords: {0:d}\ncorrects: {1:d}" 
      "\nduplicates: {2:d}\nunknowns: {3:d}".format(N_Totl, N_Uniq, N_Dups, N_UNKN))

Participant ID: 0001

Total number of 
words: 25
corrects: 20
duplicates: 5
unknowns: 0


In [195]:
#Extract ESA values for continuous (i.e., chronological) pairs
ESA = []
DictESA = {}
SumESA = 0.0

for n, i in enumerate(inputs):     
    if n > 0:
        print(u"{0:s}-{1:s} = {2:2.3f}".format(inputs[n-1], i, ESA_table[n, n-1]))
        OneESA = ESA_table[n, n-1]
        DictESA[u"{0:s}-{1:s}".format(inputs[n-1], i)] = OneESA 
        ESA = ESA + [OneESA]
        #ESA.append(OneESA)
        SumESA = SumESA + OneESA

MeanESA = SumESA / len(ESA)

print("\nMean (chronological pairwise) ESA for Participant ID {0:s}: {1:2.3f}".format(ID, MeanESA.round(3)))

Mus-Ko = 0.005
Ko-Hest = 0.003
Hest-Gris = 0.003
Gris-Elefant = 0.002
Elefant-Kat = 0.003
Kat-Pindsvin = 0.001
Pindsvin-Krage = 0.007
Krage-Måge = 0.002
Måge-Fugl = 0.033
Fugl-Mosegris = 0.001
Mosegris-Ørentvist = 0.000
Ørentvist-Flue = 0.003
Flue-Elefant = 0.001
Elefant-Næsehorn = 0.019
Næsehorn-Løve = 0.004
Løve-Tiger = 0.014
Tiger-Bjørn = 0.001
Bjørn-Æsel = 0.002
Æsel-Hest = 0.038
Hest-Krokodille = 0.000
Krokodille-Næsehorn = 0.002
Næsehorn-Løve = 0.004
Løve-Kat = 0.002
Kat-Rotte = 0.004

Mean (chronological pairwise) ESA for Participant ID 0001: 0.006


In [196]:
#Extract ESA values for all possible pairs (still mainly clumsy Yund-code, but it works)
DictAllESA = {}
N_AllESA = 0
SumAllESA = 0.0

for i in range(len(inputs)-1):
    for j in range(i+1, len(inputs)):
            OneESA = ESA_table[i,j]
            DictAllESA[u"{0:s}-{1:s}".format(inputs[i], inputs[j])] = OneESA
            N_AllESA = N_AllESA + 1
            SumAllESA = SumAllESA + OneESA
        
if (N_AllESA == 0):
    MeanAllESA = 0.0
    SOI = 0.0
else:
    MeanAllESA = SumAllESA / N_AllESA
    if MeanAllESA == 0.0:
        SOI = 0.0
    else:
        SOI = MeanESA / MeanAllESA

#print(inputs)
#print(ESA_table)
#print(DictAllESA)
print("\nMean All ESA for Participant ID {0:s}: {1:2.3f}".format(ID, MeanAllESA.round(3)))
print("\nSOI Participant ID {0:s}: {1:2.3f}".format(ID, SOI.round(3)))



Mean All ESA for Participant ID 0001: 0.020

SOI Participant ID 0001: 0.327


## Clusters and switches

In [197]:
# Setting the ESA_Threshold
ESAsort = sorted(ESA) # creating a sorted copy of ESA
ESA_Threshold = PercentRESA * MeanAllESA  # ESA switch if ESA < ESA_Threshold
if ESA_Threshold < ESAsort[0]:
    ESA_Threshold = ESAsort[0] + 0.000001
print(ESA_Threshold)

0.014719999999999974


In [None]:
#ESA switches
ESAsw = [int(esa<ESA_Threshold) for esa in ESA]

ESAswCount = collections.Counter(ESAsw).most_common() # counting the ones and the zeros of the switch-vector

if 1 in ESAswCount[0]:
    sumESAsw = ESAswCount[0][1] # assigning the count of ones to sumESAsw (2nd element of 1st tuple)
    NumCS = ESAswCount[1][1] # assigning the count of zeros to NumCS (2nd element of 2nd tuple)
elif 0 in ESAswCount[0]:
    sumESAsw = ESAswCount[1][1] # assigning the count of ones to sumESAsw (2nd element of 2nd tuple)
    NumCS = ESAswCount[0][1] # assigning the count of zeros to NumCS (2nd element of 1st tuple)

print("Number of switches: {0:d}\nNumber of clusters: {1:d}\n".format(sumESAsw, NumCS))
#print("  ".join(map(unicode,inputs)))
print("Switch index:")

for sw, i in zip(ESAsw, inputs):
    print(u"{0:d}: {1:s}".format(sw, i))

In [199]:
SizeCS = [sum(1 for _ in g)+1 for _, g in groupby(ESAsw) if _==0] # we add one to each cluster-sum for 1st word in the cluster
SumCS = sum(SizeCS)
print("Cluster sizes: {}".format("".join(str(SizeCS))))
print("Accumulated cluster size: {}".format(SumCS))

ESAcs = ESAsw[:]
count = 0
#[ESAcs.insert(i, SizeCS[count]) for i, sw in enumerate(ESAsw)]   
for i, sw in enumerate(ESAsw):
    if sw == 0:
        ESAcs[i] = SizeCS[count]
        ESAcs[i+1:i+SizeCS[count]] = np.tile(0, SizeCS[count]-1)
        count = count + 1
print("All 'clusters': {}".format("".join(str(ESAcs))))

Cluster sizes: [2, 2, 2]
Accumulated cluster size: 6
All 'clusters': [1, 1, 1, 1, 1, 1, 1, 1, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 2, 0, 1, 1, 1, 1]


## Create/save output file

In [211]:
#Create output file for each subject (.txt)
#NB! not all entries in the below code are relevant for our current analysis

#create/open file for writing output
outfile = open(join(save_path, ID +'_out.txt'), 'w')

#summary values
outfile.write(u'Number of animals:   ')
outfile.write(format(N_Totl, u'>3d'))
outfile.write(u'\nNumber correct:      ')
outfile.write(format(N_Uniq, u'>3d'))
outfile.write(u'\nRepetitions:        ')
outfile.write(format(N_Dups, u'>3d'))
outfile.write(u'\nUnknown Animals:    ')
outfile.write(format(N_UNKN, u'>3d'))

#ESA analysis
outfile.write(u'\n\nSequence Mean ESA:  ')
outfile.write(format(MeanESA, u'10.4f'))
outfile.write(u'\nTotal Mean ESA:     ')
outfile.write(format(MeanAllESA, u'10.4f'))
outfile.write(u'\nSem Org Index (SOI):')
outfile.write(format(SOI, u'10.4f'))
outfile.write(u'\n\nESA switches:       ')
outfile.write(format(sum(ESAsw), u'>4d'))
outfile.write(u'  (')
outfile.write(format(N_Totl/sumESAsw, u'5.2f'))
outfile.write(u' words/switch)')
outfile.write(u'\n\nESA clustes:        ')
outfile.write(format(NumCS, u'>4d'))

outfile.write(u'  (')
outfile.write(format(SumCS/NumCS, u'5.2f'))
outfile.write(u' words/cluster)')

15L