<a href="https://colab.research.google.com/github/abhilasha-kumar/fluency-cogsci2022/blob/main/fluency_cogsci2022.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Phonological Intrusions in Semantic Memory Retrieval

# Importing drive, GPU, and packages

In [1]:
from google.colab import drive
drive.mount('/content/drive',force_remount=True)

Mounted at /content/drive


In [2]:
%tensorflow_version 2.x
import tensorflow as tf
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Select the Runtime > "Change runtime type" menu to enable a GPU accelerator, ')
  print('and then re-execute this cell.')
else:
  print(gpu_info)

Found GPU at: /device:GPU:0
Wed Jan 19 16:18:01 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 495.46       Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   38C    P0    31W / 250W |    375MiB / 16280MiB |      1%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------

In [3]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import heapq
import itertools
import scipy.spatial.distance

import pandas as pd
import numpy as np

from numpy.random import randint
from scipy.special import softmax
from sklearn.preprocessing import MinMaxScaler, normalize
from numpy.linalg import matrix_power
from functools import lru_cache
import glob
from scipy.special import expit

import matplotlib.pyplot as plt
import nltk
from functools import lru_cache
from itertools import product as iterprod
import itertools
from nltk.metrics import *



# Phoneme Function

In [None]:
# algo to obtain phonemes for any given strng
# obtained from: https://stackoverflow.com/questions/33666557/get-phonemes-from-any-word-in-python-nltk-or-other-modules
try:
    arpabet = nltk.corpus.cmudict.dict()
except LookupError:
    nltk.download('cmudict')
    arpabet = nltk.corpus.cmudict.dict()

@lru_cache()
def wordbreak(s):
    s = s.lower()
    if s in arpabet:
        return arpabet[s]
    middle = len(s)/2
    partition = sorted(list(range(len(s))), key=lambda x: (x-middle)**2-x)
    for i in partition:
        pre, suf = (s[:i], s[i:])
        if pre in arpabet and wordbreak(suf) is not None:
            return [x+y for x,y in iterprod(arpabet[pre], wordbreak(suf))]
    return None

def normalized_sim(w1, w2):
  return 1-edit_distance(w1,w2)/(max(len(w1), len(w2)))


[nltk_data] Downloading package cmudict to /root/nltk_data...
[nltk_data]   Unzipping corpora/cmudict.zip.


## example

In [None]:
w1 = "birds"
w2 = "pigs"
print("wordbreak(w1)[0]:",wordbreak(w1)[0])
print("wordbreak(w2)[0]:",wordbreak(w2)[0])

print("orig phon:", edit_distance(wordbreak(w1)[0],wordbreak(w2)[0]))
print("orig orth:", edit_distance(w1, w2))

print("norm orth:", normalized_sim(w1, w2))
print("norm phon:", normalized_sim(wordbreak(w1)[0],wordbreak(w2)[0]))

wordbreak(w1)[0]: ['B', 'ER1', 'D', 'Z']
wordbreak(w2)[0]: ['P', 'IH1', 'G', 'Z']
orig phon: 3
orig orth: 3
norm orth: 0.4
norm phon: 0.25


# reading data

In [None]:
parentfolder = "/content/drive/My Drive/IU-Abhilasha-Mike/Fluency/sem-phon/fluency_cogsci2022"
with tf.device('/device:GPU:0'):
  semantic_files = glob.glob(parentfolder + '/*.xlsx')
print(f"This folder has {len(semantic_files)} files")

This folder has 1 files


# reading embeddings

embeddings obtained from: http://vectors.nlpl.eu/explore/embeddings/en/models/

In [None]:
# import embeddings
with tf.device('/device:GPU:0'):
  word2vec_model = pd.read_csv(parentfolder +"vocab_vectors.csv", encoding="unicode-escape")
  word2vec = word2vec_model.transpose().values
  # new_header = word2vec.iloc[0] #grab the first row for the header
  # word2vec = word2vec[1:] #take the data less the header row
  # word2vec.columns = new_header
  # word2vec = word2vec.values.transpose()
  vocab = pd.DataFrame(word2vec_model.columns, columns=["vocab_word"])
  print(f"embeddings are shaped:", word2vec.shape)
  print(f"vocab is {len(vocab)} words")

embeddings are shaped: (4924, 300)
vocab is 4924 words


# obtaining phonemic & semantic similarity

Here we compute the phonemic & semantics similarity for the data, as well the similarity drop-based switch designations based on the semantic similarities.

In [None]:
## now we loop through each txt file
import warnings
warnings.filterwarnings("ignore")

phon_list = []

# read in the data as a pandas dataframe
data_file = pd.read_excel(semantic_files[0])
import re
for index, row in data_file.iterrows():  
  word = str(row["response"])
  ## remove all non-alphas for phonemic similarity
  mod_word = re.sub('[^a-zA-Z]+', '', word)
  if(len(mod_word)>0):
    phonemes = wordbreak(mod_word)[0]
    phon_list.append(phonemes)
  else:
    phon_list.append("wordnotfound")

data_file["phonemes"] = phon_list
data_file["response_number"] = data_file.groupby(['subject', 'domain']).cumcount()+1

# exclude rows that do not have a valid phoneme
data_file = data_file[data_file.phonemes != "wordnotfound"]
data_file = data_file.reset_index(drop= True)

#now we compute the levenshtein edit distance as a measure of orthographic/phonemic similarity 

phon_similarity = []
orth_similarity = []
semantic_similarity = []

norm_phon = []
norm_orth = []

for index, row in data_file.iterrows():
  current_word = re.sub('[^a-zA-Z]+', '', str(row["response"]))
  current_phoneme = row["phonemes"]
  if row["response_number"] == 1:
      sem_val = -999
      phon_val = -999
      orth_val = -999
      norm_phon_val = -999
      norm_orth_val = -999
  else:
    previous_word = re.sub('[^a-zA-Z]+', '', str(data_file.response[index-1]))
    previous_phoneme = data_file.phonemes[index-1]

    #calculate orthographic similarity as Levenshtein (edit) distance
    orth_val = edit_distance(previous_word, current_word)
    norm_orth_val = normalized_sim(previous_word, current_word)
    
    # can also get edit distance for the phonemes themselves (as in Siew et al. Hoosier network)

    phon_val = edit_distance(previous_phoneme, current_phoneme)
    norm_phon_val = normalized_sim(previous_phoneme, current_phoneme)
    
    # extract word embedding for current word
    ## no replacements/removals here
    current_word = str(row["response"])
    previous_word = str(data_file.response[index-1])
    if current_word in list(vocab["vocab_word"]):
      current_word_index = list(vocab["vocab_word"]).index(current_word)
      current_word_vec = word2vec[current_word_index].reshape((1,word2vec.shape[1]))
      # extract word embedding for current word
      if previous_word in list(vocab["vocab_word"]):
        previous_word_index = list(vocab["vocab_word"]).index(previous_word)
        previous_word_vec = word2vec[previous_word_index].reshape((1,word2vec.shape[1]))
        sem_val = float((1 - scipy.spatial.distance.cdist(previous_word_vec, current_word_vec, 'cosine'))[0])      
        #print(f"for {current_word} and {previous_word} similarity is {sem_val}")
      else:
        sem_val = "NA"
    else:
      sem_val = "NA"

  phon_similarity.append(phon_val)
  orth_similarity.append(orth_val)
  norm_phon.append(norm_phon_val)
  norm_orth.append(norm_orth_val)
  semantic_similarity.append(sem_val)
  
data_file["phon_similarity"] = phon_similarity
data_file["orth_similarity"] = orth_similarity
data_file["norm_phon"] = norm_phon
data_file["norm_orth"] = norm_orth
data_file["word2vec_similarity"] = semantic_similarity 

data_file.to_csv(parentfolder + f'precomputed_data.csv')

# similarity drop & troyer

In [4]:
parentfolder = "/content/drive/My Drive/IU-Abhilasha-Mike/Fluency/sem-phon/fluency_cogsci2022/"
with tf.device('/device:GPU:0'):
  precomputed_data = pd.read_csv(parentfolder +"precomputed_data.csv", encoding="unicode-escape")
  norms = pd.read_csv(parentfolder +"troyernorms.csv", encoding="unicode-escape")

['Fish']

In [None]:
## obtain similarity drop and troyer switch values

simdrop = []
troyer = []
for k, row in precomputed_data.iterrows():
  # simdrop
  if (k > 0 and k < (len(precomputed_data)-2)):
    if (precomputed_data["word2vec_similarity"][k+1] > precomputed_data["word2vec_similarity"][k]) and (precomputed_data["word2vec_similarity"][k-1] > precomputed_data["word2vec_similarity"][k]):
      simdrop.append(1)
    else:
      simdrop.append(0)
  else:
      simdrop.append(0)
  
  ## troyer
  if(k >0  and k < (len(precomputed_data)-2)):
    item1 = precomputed_data["response"][k]
    item2 = precomputed_data["response"][k-1]
    category1 = norms[norms['Animal'] == item1]['Category'].values.tolist()
    category2 = norms[norms['Animal'] == item2]['Category'].values.tolist()
    if len(list(set(category1) & set(category2)))== 0:
        troyer.append(1)
    else:
        troyer.append(0)
  else:
     troyer.append(0) 

precomputed_data["simdrop"] = simdrop
precomputed_data["troyer"] = troyer

precomputed_data.to_csv(parentfolder + f'precomputed_data.csv', index=False)
precomputed_data

Unnamed: 0.1,Unnamed: 0,dataset,subject,domain,response_number,response,response_onset_time,IRT,participant_designated_switch,phonemes,phon_similarity,orth_similarity,norm_phon,norm_orth,word2vec_similarity,simdrop,troyer
0,0,LEA,50001,animals,1,horse,2.594,2.594000,2.0,"['HH', 'AO1', 'R', 'S']",-999,-999,-999.000,-999.000000,-999.000000,0,0
1,1,LEA,50001,animals,2,pig,3.594,1.000000,0.0,"['P', 'IH1', 'G']",4,5,0.000,0.000000,0.469811,0,0
2,2,LEA,50001,animals,3,bear,4.894,1.300000,1.0,"['B', 'EH1', 'R']",3,4,0.000,0.000000,0.297824,1,1
3,3,LEA,50001,animals,4,cat,6.194,1.300000,1.0,"['K', 'AE1', 'T']",3,3,0.000,0.250000,0.383209,0,1
4,4,LEA,50001,animals,5,dog,7.394,1.200000,0.0,"['D', 'AO1', 'G']",3,3,0.000,0.000000,0.760946,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
32338,32338,HJT,1099,sports,24,jump_rope,130.000,126.911765,,"['JH', 'AH1', 'M', 'P', 'R', 'OW1', 'P']",7,7,0.125,0.222222,0.186143,0,1
32339,32339,HJT,1099,sports,25,hockey,163.000,160.838235,,"['HH', 'AA1', 'K', 'IY0']",7,8,0.000,0.000000,0.172121,1,1
32340,32340,HJT,1099,sports,26,field_hockey,169.000,164.367647,,"['F', 'IY1', 'L', 'D', 'HH', 'AA1', 'K', 'IY0']",4,5,0.500,0.545455,0.345995,0,1
32341,32341,HJT,1099,sports,27,lacrosse,171.000,168.220588,,"['L', 'AH0', 'K', 'R', 'AO1', 'S']",7,9,0.125,0.181818,0.430308,0,0


# computational search models

we only consider the "animals" domain for the computational models. we start with a predefined list of 675 animals for which we create a semantic and phonological similarity matrix and obtain word frequency estimates.

# download animals list

In [None]:
simlabels = pd.read_csv(parentfolder+ f'new_animals.csv', header=None).values.reshape(-1,).tolist()
len(simlabels)

771

## create semantic similarity matrix

In [None]:
## create similarity matrix and similarity labels file from whichever corpus you're using

def create_sim_matrix(simlabels):
  ## the vocab consists of ALL possible words in corpus, but we need only the "animals" subset here
  ## we use the similarity_labels file from psyrev to constrain the vocab
  animals_index = [list(vocab.vocab_word).index(lab) if lab in list(vocab.vocab_word) else -999 for lab in simlabels ]
  animals_index = list(filter((-999).__ne__, animals_index))

  ## now we restrict our vocab and embeddings to ONLY these animals
  embeddings_small = word2vec[animals_index, :]
  vocab_small = vocab.iloc[animals_index]
  N = len(vocab_small)
  print(f"animals vocab is {N} words")

  # create semantic similarity matrix
  matrix = 1-scipy.spatial.distance.cdist(embeddings_small, embeddings_small, 'cosine').reshape(-1)
  matrix = matrix.reshape((N,N))
  print("sim matrix has been created:", matrix.shape)

  w1_index = list(vocab_small.vocab_word).index("dolphin")
  w2_index = list(vocab_small.vocab_word).index("kitten")
  w3_index = list(vocab_small.vocab_word).index("whale")

  print("dolphin-kitten:", matrix[w1_index, w2_index])
  print("dolphin-whale:", matrix[w1_index, w3_index])
  print("dolphin-dolphin:", matrix[w1_index, w1_index])

  pd.DataFrame(matrix).to_csv(parentfolder + 'word2vec_sim_matrix.csv', index=False, header=False)
  vocab_small.to_csv(parentfolder + 'word2vec_sim_labels.csv', index=False, header=False)

  print("sim matrix has been saved to drive!")

create_sim_matrix(simlabels)


animals vocab is 771 words
sim matrix has been created: (771, 771)
dolphin-kitten: 0.37598666621788657
dolphin-whale: 0.7711714974302292
dolphin-dolphin: 1.0
sim matrix has been saved to drive!


## create phonological similarity matrix

In [None]:
def create_phon_matrix(vocab):
  # takes in a list of labels and computes the phonological similarity matrix
  vocabulary = vocab.copy()
  N = len(vocabulary)
  print(f"vocab is {N} words")
  # replace all underscores (_) with space (" ") to match with glove vectors/vocab
  vocabulary = [re.sub('[^a-zA-Z]+', '', str(v)) for v in vocabulary]
  print(f"vocab now looks like:", vocabulary[:5])
  # create phonemic similarity matrix for the small vocab
  pmatrix = np.array([normalized_sim(wordbreak(w1)[0], wordbreak(w2)[0]) for w1 in vocabulary for w2 in vocabulary]).reshape((N,N))
  print("pmatrix has been created:", pmatrix.shape)
  print(pmatrix)
  pd.DataFrame(pmatrix).to_csv(parentfolder + 'simlabels_phon_matrix.csv', index=False, header=False)  
  print("phon matrix has been saved to drive!")

simlabels = pd.read_csv(parentfolder+'word2vec_sim_labels.csv', header=None).values.reshape(-1,).tolist()
print(f"simlabels is {len(simlabels)} items:", simlabels[:5])
create_phon_matrix(simlabels)


simlabels is 771 items: ['horse', 'pig', 'bear', 'cat', 'dog']
vocab is 771 words
vocab now looks like: ['horse', 'pig', 'bear', 'cat', 'dog']
pmatrix has been created: (771, 771)
[[1.         0.         0.25       ... 0.         0.33333333 0.125     ]
 [0.         1.         0.         ... 0.         0.         0.125     ]
 [0.25       0.         1.         ... 0.14285714 0.16666667 0.125     ]
 ...
 [0.         0.         0.14285714 ... 1.         0.14285714 0.        ]
 [0.33333333 0.         0.16666667 ... 0.14285714 1.         0.125     ]
 [0.125      0.125      0.125      ... 0.         0.125      1.        ]]
phon matrix has been saved to drive!


In [None]:
np.log(float(404))

6.0014148779611505

## get frequencies

In [None]:
frequencies = pd.read_csv(parentfolder+ f'animals_frequencies.csv', header=None)
print(len(frequencies))

771


## define foraging models

For these models, the behavioral data is read in as .txt files separated by a space

In [12]:
import os

def modelFits(path, delimiter):

    ### LOAD REQUIRED PACKAGES ###
    import numpy as np
    import pandas as pd
    import re

    ### LOAD BEHAVIORAL DATA ###
    df = pd.read_csv(path, header=None, names=['SID', 'entry'], delimiter=delimiter)

    #correct behavioral fits
    df = forage.prepareData(df)

    ### LOAD SEMANTIC SIMILARITY MATRIX ###

    # (aka 'local cues', here we use cosines from word2vec)

    # Similarity labels
    simlab = []
    ofile = open(parentfolder + 'word2vec_sim_labels.csv','r')#TODO:
    for line in ofile:
        labs = line.split()
        for lab in labs:
            simlab.append(lab)
    ofile.close()

    # Similarity values
    simval = np.zeros((len(simlab), len(simlab)))
    ofile = open(parentfolder + 'word2vec_sim_matrix.csv', 'r')#TODO:
    j=0
    for line in ofile:
        line = re.sub(',\n', '', line)
        sims = line.split(',')
        i=0
        for sim in sims:
            simval[i,j] = sim
            i+=1
        j+=1
    ofile.close()

    # Make sure similarity values are non-zero
    for i in range(0,len(simval)):
        for j in range(0,len(simval)):
            if simval[i,j] <= 0:
                simval[i,j] = 0.0001

    ## PHONEMIC SIMILARTY VALUES ##
    phonval = np.zeros((len(simlab), len(simlab)))
    ofile = open(parentfolder + 'simlabels_phon_matrix.csv', 'r')#TODO:
    j=0
    for line in ofile:
        line = re.sub(',\n', '', line)
        sims = line.split(',')
        i=0
        for sim in sims:
            phonval[i,j] = sim
            i+=1
        j+=1
    ofile.close()

    # Make sure phonemic values are non-zero
    for i in range(0,len(phonval)):
        for j in range(0,len(phonval)):
            if phonval[i,j] <= 0:
                phonval[i,j] = 0.0001

    ### LOAD FREQUENCY LIST ###
    # (aka 'global cue', using NOW corpus from http://corpus.byu.edu/now/, 4.2 billion words and growing daily)

    freqlab = []
    freqval = []
    ofile = open(parentfolder + 'animals_frequencies.csv', 'r') #TODO:
    for line in ofile:
        line = re.sub('\n', '', line)
        freqs=line.split(',')
        freqlab.append(freqs[0])
        ## append log of frequency if using psyrev
        #freqval.append(np.log(float(freqs[0])))
        freqval.append(float(freqs[1]))
    ofile.close()
    freqval=np.array(freqval)

    sidlist = list(set(df['SID']))
    full_entdf = pd.DataFrame()
    full_fitlist = []
    ct = 0

    ## COMPUTE CONSECUTIVE SIMILARITY AND FREQUENCY AT SUBJECT LEVEL ##

    for sid in sidlist:
        ct+=1
        print( "SUBJECT " + str(ct) + '/' + str(len(sidlist)) + " " + str(sid))

        # My general initializations
        myfitlist = []
        myentries = np.array(df[df['SID']==sid]['entry'])
        #print("myentries:", myentries)
        myenttimes = np.array(df[df['SID']==sid].index)
        ##print("myenttimes:", myenttimes)
        myused = []
        mytime = []

        # For both frequency and similarity metrics:
            # LIST: Metrics corresponding with my observed entries
            # CURRENT: Full metric values, with observed entries becoming 0
            # HISTORY: State of full metric values (ie, "current" during each entry)

        # My frequency initializations
        # freq current contains frequencies of ALL the words in corpus
        freq_current = np.array(freqval)
        #print("freq_current.shape:",freq_current.shape)
        freq_list = []
        freq_history = []

        # My similarity initializations
        sim_current = simval.copy()
        # sim_current contains the full NxN similarity matrix
        #print("sim_current shape:",sim_current.shape)
        sim_list = []
        sim_history = []

        phon_current = phonval.copy()
        phon_list = []
        phon_history = []
        troyer_switch = []

        for i in range(0,len(myentries)):
            word = myentries[i]
            #if word not in myused: # use this to calculate number of correct responses w/out repeats
            if True:   # use this line instead of former to include repeated words along w/line 110,119 comment out

                # Frequency: Get frequency and update relevant lists
                freq_list.append( float(freq_current[freqlab.index(word)]) )
                freq_history.append( np.array(freq_current) )
                #freq_current[freqlab.index(word)] = 0.00000001

                # Get similarity between this word and preceding word
                if i > 0:         
                    sim_list.append( float(sim_current[simlab.index(myentries[i-1]), simlab.index(word)]) )
                    sim_history.append( np.array(sim_current[simlab.index(myentries[i-1]),:]) )

                    phon_list.append( float(phon_current[simlab.index(myentries[i-1]), simlab.index(word)]) )
                    phon_history.append( np.array(phon_current[simlab.index(myentries[i-1]),:]) )

                    category1 = norms[norms['Animal'] == myentries[i-1]]['Category'].values.tolist()
                    category2 = norms[norms['Animal'] == myentries[i]]['Category'].values.tolist()
                    if len(list(set(category1) & set(category2)))== 0:
                        troyer_switch.append(1)
                    else:
                        troyer_switch.append(0)

                else:
                    sim_list.append(0)
                    sim_history.append( np.array(sim_current[simlab.index(word),:]) )
                #sim_current[:,simlab.index(word)] = 0.00000001
                    phon_list.append(0)
                    phon_history.append( np.array(phon_current[simlab.index(word),:]) )

                    troyer_switch.append(0)

                # Update lists
                myused.append(word)
                mytime.append(myenttimes[i])

        # Calculate category switches, based on similarity-drop
        myswitch = np.zeros(len(myused)).astype(int)
        for i in range(1,len(myused)-1):
            if (sim_list[i+1] > sim_list[i]) and (sim_list[i-1] > sim_list[i]):
                myswitch[i] = 1

        # Save my entries with corresponding metrics
        mydf = pd.DataFrame({'sid':[sid]*len(myused) , 'ent':myused, 'freq':freq_list, 'sim':sim_list, 'phon': phon_list,
                             'simdropswitch':myswitch, 'troyer': troyer_switch, 'time':mytime},
                            columns=['sid','time','ent','freq','sim', 'phon', 'simdropswitch', 'troyerswitch'])
        full_entdf = full_entdf.append(mydf)
        # Get parameter fits for the different models
        myfitlist.append(sid)
        myfitlist.append(len(myused))
        ## obtaining the optimal/random fits for the static and dynamic model by calling the getFits function
        myfitlist.extend( forage.getfits(freq_list, freq_history, sim_list, sim_history, phon_list, phon_history, troyer_switch) )
        full_fitlist.append(myfitlist)

    print("Fits Complete.")

    # create results directory if it doesn't exist yet
    if not os.path.exists(parentfolder):
        os.makedirs(parentfolder)

    # # Output data entries with corresponding metrics for visualization in R
    print(full_entdf)
    full_entdf = full_entdf.reset_index(drop=True)
    full_entdf.to_csv(parentfolder  + 'newdata-fullmetrics.csv', index=False, header=True)

    # # Output parameter & model fits
    full_fitlist = pd.DataFrame(full_fitlist)
    full_fitlist.columns = ['subject', 'number_of_items',
                             
                            'beta_static_frequency', 'beta_static_semantic', 'errors_static_optimal', 'errors_static_random',
                            'beta_plocalstatic_frequency', 'beta_plocalstatic_semantic', 'beta_plocalstatic_phonemic','errors_plocalstatic_optimal', 'errors_plocalstatic_random',
                            
                            'beta_simdrop_dynamic_frequency', 'beta_simdrop_dynamic_semantic', 'errors_simdrop_dynamic_optimal', 'errors_simdrop_dynamic_random',
                            'beta_simdrop_pswitchonlydynamic_frequency', 'beta_simdrop_pswitchonlydynamic_semantic', 'beta_simdrop_pswitchonlydynamic_phonemic','errors_simdrop_pswitchonlydynamic_optimal', 'errors_simdrop_pswitchonlydynamic_random',

                            'beta_simdrop_plocaldynamic_frequency', 'beta_simdrop_plocaldynamic_semantic', 'beta_simdrop_plocaldynamic_phonemic','errors_simdrop_plocaldynamic_optimal', 'errors_simdrop_plocaldynamic_random',
                            'beta_simdrop_pglobaldynamic_frequency', 'beta_simdrop_pglobaldynamic_semantic', 'beta_simdrop_pglobaldynamic_phonemic','errors_simdrop_pglobaldynamic_optimal', 'errors_simdrop_pglobaldynamic_random',

                            'beta_troyer_dynamic_frequency', 'beta_troyer_dynamic_semantic', 'errors_troyer_dynamic_optimal', 'errors_troyer_dynamic_random',
                            'beta_troyer_pswitchonlydynamic_frequency', 'beta_troyer_pswitchonlydynamic_semantic', 'beta_troyer_pswitchonlydynamic_phonemic','errors_troyer_pswitchonlydynamic_optimal', 'errors_troyer_pswitchonlydynamic_random',

                            'beta_troyer_plocaldynamic_frequency', 'beta_troyer_plocaldynamic_semantic', 'beta_troyer_plocaldynamic_phonemic','errors_troyer_plocaldynamic_optimal', 'errors_troyer_plocaldynamic_random',
                            'beta_troyer_pglobaldynamic_frequency', 'beta_troyer_pglobaldynamic_semantic', 'beta_troyer_pglobaldynamic_phonemic','errors_troyer_pglobaldynamic_optimal', 'errors_troyer_pglobaldynamic_random'
                            ]

    #print("full_fitlist:",full_fitlist)
    full_fitlist.to_csv(parentfolder  + 'newdata-fullfits.csv', index=False, header=True)

    print(full_fitlist.head())
    print("Results saved to '" + parentfolder + "'.")

class forage:

    def prepareData(data):
        import pandas as pd
        import re
        # load similarity labels
        simlab = []
        ofile = open(parentfolder + 'word2vec_sim_labels.csv','r')
        for line in ofile:
            labs = line.split()
            for lab in labs:
                simlab.append(lab)
        ofile.close()

        ### LOAD CORRECTIONS ###
        # This is a look-up list that maps incorrect words onto accepted words that are in the database
        # corrections = pd.read_csv(parentfolder + 'corrections.txt', header=None, delimiter='\t')
        # corrections = corrections.set_index(corrections[0].values)
        # corrections.columns = ['_from','_to']

        elist = data['entry'].values
        newlist = []
        notfound = []

        # Use look-up table to check and correct observed entries
        for ent in elist:
            ent = re.sub(r'\W+', '', ent) # Alphanumericize it
            if ent in simlab:
                # If this entry is appropriate, keep it
                newlist.append(ent)
            # elif ent[0:len(ent)-1] in simlab:
            #     # If this entry is plural, correct to the singular verion
            #     print(f"found the entry {ent[0:len(ent)-1]} in simlab")
            #     newlist.append(ent[0:len(ent)-1])
            # elif ent in corrections._from:
            #     # If this entry is correctable, correct it
            #     newlist.append(corrections.loc[ent]._to)
            else:
                # If this entry is not found in either list, mark for removal and warn user.
                newlist.append('NA')
                notfound.append(ent)

        # Remove the rows with inappropriate entries
        data.entry = newlist
        data = data[data.entry!='NA']

        # Warn the user of removed entries
        if len(notfound) > 0:
            print('The following items were not found in the database, and were removed: [' +
                  str(len(notfound)) + ' entries removed] \n')
            print(sorted(set(notfound)))
        else:
            print('All items OK.')
        return data[data.entry!='NA']
        # TODO: return statement might not be necessary...

    def model_static(beta, freql, freqh, siml, simh):
        ## beta contains the optimization parameters for frequency (beta[0]) and semantic similarity (beta[1])
        import numpy as np
        ct = 0
        for k in range(0, len(freql)):
            if k == 0: # if first item then its probability is based on just frequency
            # P of item based on frequency alone (freq of this item / freq of all items)
                numrat = pow(freql[k],beta[0])
                denrat = sum(pow(freqh[k],beta[0]))
            else: # if not first item then its probability is based on its similarity to prev item AND frequency
            # P of item based on frequency and similarity
                numrat = pow(freql[k],beta[0]) * pow(siml[k],beta[1])
                denrat = sum(pow(freqh[k],beta[0]) * pow(simh[k],beta[1]))
            ct += -np.log(numrat/denrat) # negative Log likelihood of this item: this will be minimized eventually
        return ct

    def model_static_plocal(beta, freql, freqh, siml, simh, phonl, phonh):
        ## beta contains the optimization parameters for frequency (beta[0]) and semantic similarity (beta[1])
        import numpy as np
        ct = 0
        for k in range(0, len(freql)):
            if k == 0: # if first item then its probability is based on just frequency
            # P of item based on frequency alone (freq of this item / freq of all items)
                numrat = pow(freql[k],beta[0])
                denrat = sum(pow(freqh[k],beta[0]))
            else: # if not first item then its probability is based on its similarity to prev item AND frequency AND phonemic similarity
            # P of item based on frequency and similarity and phonology
                numrat = pow(freql[k],beta[0]) * pow(phonl[k],beta[2]) * pow(siml[k],beta[1])
                denrat = sum(pow(freqh[k],beta[0]) * pow(phonh[k],beta[2])* pow(simh[k],beta[1]))
            ct += -np.log(numrat/denrat) # negative Log likelihood of this item: this will be minimized eventually
        return ct
        
    def model_dynamic_psyrev_simdrop(beta, freql, freqh, siml, simh):
        import numpy as np
        ct = 0
        for k in range(0, len(freql)):
            if k == 0 :
            # P of item based on frequency alone (freq of this item / freq of all items)
                numrat = pow(freql[k],beta[0])
                denrat = sum(pow(freqh[k],beta[0]))
            elif k > 0 and k < (len(freql)-1) and siml[k+1] > siml[k] and siml[k-1] > siml[k]: ## "dip" based on sim-drop
            # If similarity dips, P of item is based again on frequency
                numrat = pow(freql[k],beta[0])
                denrat = sum(pow(freqh[k],beta[0]))
            else:
            # P of item based on combined frequency and similarity
                numrat = pow(freql[k],beta[0])*pow(siml[k],beta[1])
                denrat = sum(pow(freqh[k],beta[0])*pow(simh[k],beta[1]))
            ct += -np.log(numrat/denrat)
        return ct
      
    def model_dynamic_pswitchonly_simdrop(beta, freql, freqh, siml, simh, phonl, phonh):
      ## here we use phonology as a "local" cue with semantics
        import numpy as np
        ct = 0
        for k in range(0, len(freql)):
            if k == 0 :
            # P of item based on frequency alone (freq of this item / freq of all items)
                numrat = pow(freql[k],beta[0])
                denrat = sum(pow(freqh[k],beta[0]))
            elif k > 0 and k < (len(freql)-1) and siml[k+1] > siml[k] and siml[k-1] > siml[k]: ## "dip" based on sim-drop
            # If similarity dips, P of item is based on a combination of frequency and phonemic similarity
                numrat = pow(freql[k],beta[0]) * pow(phonl[k],beta[2]) 
                denrat = sum(pow(freqh[k],beta[0]) * pow(phonh[k],beta[2]) )
            else:
            # P of item based on combined frequency and similarity
                numrat = pow(freql[k],beta[0])*pow(siml[k],beta[1])
                denrat = sum(pow(freqh[k],beta[0])*pow(simh[k],beta[1]))
            ct += -np.log(numrat/denrat)
        return ct
    

    def model_dynamic_plocal_simdrop(beta, freql, freqh, siml, simh, phonl, phonh):
      ## here we use phonology as a "local" cue with semantics
        import numpy as np
        ct = 0
        for k in range(0, len(freql)):
            if k == 0 :
            # P of item based on frequency alone (freq of this item / freq of all items)
                numrat = pow(freql[k],beta[0])
                denrat = sum(pow(freqh[k],beta[0]))
            elif k > 0 and k < (len(freql)-1) and siml[k+1] > siml[k] and siml[k-1] > siml[k]: ## "dip" based on sim-drop
            # If similarity dips, P of item is based on frequency 
                numrat = pow(freql[k],beta[0]) 
                denrat = sum(pow(freqh[k],beta[0]))
            else:
            # P of item based on combined frequency and similarity
                numrat = pow(freql[k],beta[0])*pow(phonl[k],beta[2])*pow(siml[k],beta[1])
                denrat = sum(pow(freqh[k],beta[0])*pow(phonh[k],beta[2])*pow(simh[k],beta[1]))
            ct += -np.log(numrat/denrat)
        return ct

    def model_dynamic_pglobal_simdrop(beta, freql, freqh, siml, simh, phonl, phonh):
      ## here we use phonology as a "local" cue with semantics
        import numpy as np
        ct = 0
        for k in range(0, len(freql)):
            if k == 0 :
            # P of item based on frequency alone (freq of this item / freq of all items)
                numrat = pow(freql[k],beta[0])
                denrat = sum(pow(freqh[k],beta[0]))
            elif k > 0 and k < (len(freql)-1) and siml[k+1] > siml[k] and siml[k-1] > siml[k]: ## "dip" based on sim-drop
            # If similarity dips, P of item is based on a combination of frequency and phonemic similarity
                numrat = pow(freql[k],beta[0]) * pow(phonl[k],beta[2]) 
                denrat = sum(pow(freqh[k],beta[0]) * pow(phonh[k],beta[2]) )
            else:
            # P of item based on combined frequency and similarity
                numrat = pow(freql[k],beta[0])*pow(phonl[k],beta[2])*pow(siml[k],beta[1])
                denrat = sum(pow(freqh[k],beta[0])*pow(phonh[k],beta[2])*pow(simh[k],beta[1]))
            ct += -np.log(numrat/denrat)
        return ct

    ## TROYER fits : switch in dynamic models is based on troyer norms NOT similarity drop

    def model_dynamic_psyrev_troyer(beta, freql, freqh, siml, simh, troyer):
        import numpy as np
        ct = 0
        for k in range(0, len(freql)):
            if k == 0 :
            # P of item based on frequency alone (freq of this item / freq of all items)
                numrat = pow(freql[k],beta[0])
                denrat = sum(pow(freqh[k],beta[0]))
            elif troyer[k] == 1:## switch based on if troyer category changes
            # If similarity dips, P of item is based again on frequency
                numrat = pow(freql[k],beta[0])
                denrat = sum(pow(freqh[k],beta[0]))
            else:
            # P of item based on combined frequency and similarity
                numrat = pow(freql[k],beta[0])*pow(siml[k],beta[1])
                denrat = sum(pow(freqh[k],beta[0])*pow(simh[k],beta[1]))
            ct += -np.log(numrat/denrat)
        return ct
      
    def model_dynamic_pswitchonly_troyer(beta, freql, freqh, siml, simh, phonl, phonh, troyer):
      ## here we use phonology as a "local" cue with semantics
        import numpy as np
        ct = 0
        for k in range(0, len(freql)):
            if k == 0 :
            # P of item based on frequency alone (freq of this item / freq of all items)
                numrat = pow(freql[k],beta[0])
                denrat = sum(pow(freqh[k],beta[0]))
            elif troyer[k] == 1: ## "dip" based on sim-drop
            # If similarity dips, P of item is based on a combination of frequency and phonemic similarity
                numrat = pow(freql[k],beta[0]) * pow(phonl[k],beta[2]) 
                denrat = sum(pow(freqh[k],beta[0]) * pow(phonh[k],beta[2]) )
            else:
            # P of item based on combined frequency and similarity
                numrat = pow(freql[k],beta[0])*pow(siml[k],beta[1])
                denrat = sum(pow(freqh[k],beta[0])*pow(simh[k],beta[1]))
            ct += -np.log(numrat/denrat)
        return ct
    

    def model_dynamic_plocal_troyer(beta, freql, freqh, siml, simh, phonl, phonh, troyer):
      ## here we use phonology as a "local" cue with semantics
        import numpy as np
        ct = 0
        for k in range(0, len(freql)):
            if k == 0 :
            # P of item based on frequency alone (freq of this item / freq of all items)
                numrat = pow(freql[k],beta[0])
                denrat = sum(pow(freqh[k],beta[0]))
            elif troyer[k] == 1:
            # If similarity dips, P of item is based on frequency 
                numrat = pow(freql[k],beta[0]) 
                denrat = sum(pow(freqh[k],beta[0]))
            else:
            # P of item based on combined frequency and similarity
                numrat = pow(freql[k],beta[0])*pow(phonl[k],beta[2])*pow(siml[k],beta[1])
                denrat = sum(pow(freqh[k],beta[0])*pow(phonh[k],beta[2])*pow(simh[k],beta[1]))
            ct += -np.log(numrat/denrat)
        return ct

    def model_dynamic_pglobal_troyer(beta, freql, freqh, siml, simh, phonl, phonh, troyer):
      ## here we use phonology as a "local" cue with semantics
        import numpy as np
        ct = 0
        for k in range(0, len(freql)):
            if k == 0 :
            # P of item based on frequency alone (freq of this item / freq of all items)
                numrat = pow(freql[k],beta[0])
                denrat = sum(pow(freqh[k],beta[0]))
            elif troyer[k] == 1:
            # If similarity dips, P of item is based on a combination of frequency and phonemic similarity
                numrat = pow(freql[k],beta[0]) * pow(phonl[k],beta[2]) 
                denrat = sum(pow(freqh[k],beta[0]) * pow(phonh[k],beta[2]) )
            else:
            # P of item based on combined frequency and similarity
                numrat = pow(freql[k],beta[0])*pow(phonl[k],beta[2])*pow(siml[k],beta[1])
                denrat = sum(pow(freqh[k],beta[0])*pow(phonh[k],beta[2])*pow(simh[k],beta[1]))
            ct += -np.log(numrat/denrat)
        return ct
    

    def getfits( freq_l, freq_h, sim_l, sim_h, phon_l, phon_h , troyer):
        import numpy as np
        from scipy.optimize import fmin
    #fmin: Uses a Nelder-Mead simplex algorithm to find the minimum of function of variables.
        r1 = np.random.rand()
        r2 = np.random.rand()
        r3 = np.random.rand()

    # STATIC MODEL (no dynamic switching, just focusing on two cues with some weights)

        # 1.) Optimize model parameters
        v = fmin(forage.model_static, [r1, r2], args=(freq_l, freq_h, sim_l, sim_h), ftol = 0.001, disp=False)
        beta_static_freq = float(v[0]) # Optimized weight for frequency cue
        beta_static_semantic = float(v[1]) # Optimized weight for similarity cue

        # 2.) Determine model fit (errors) at optimal parameters: will return total -LL
        optimal_fit_static = forage.model_static([beta_static_freq, beta_static_semantic], freq_l, freq_h, sim_l, sim_h)

        # 3.) For comparison, determine model fit (errors) without parameter fits
        random_fit_static = forage.model_static([0, 0], freq_l, freq_h, sim_l, sim_h)
    # LOCAL PHONEMIC CUE, STATIC MODEL (no dynamic switching, just focusing on two cues with some weights)

        # 1.) Optimize model parameters
        v = fmin(forage.model_static_plocal, [r1, r2, r3], args=(freq_l, freq_h, sim_l, sim_h, phon_l, phon_h), ftol = 0.001, disp=False)
        beta_static_plocal_freq = float(v[0]) # Optimized weight for frequency cue
        beta_static_plocal_semantic = float(v[1]) # Optimized weight for similarity cue
        beta_static_plocal_phonemic = float(v[2]) # Optimized weight for phonemic cue

        # 2.) Determine model fit (errors) at optimal parameters: will return total -LL
        optimal_fit_plocalstatic = forage.model_static_plocal([beta_static_plocal_freq, beta_static_plocal_semantic, beta_static_plocal_phonemic], freq_l, freq_h, sim_l, sim_h, phon_l, phon_h)

        # 3.) For comparison, determine model fit (errors) without parameter fits
        random_fit_plocalstatic = forage.model_static_plocal([0, 0, 0], freq_l, freq_h, sim_l, sim_h, phon_l, phon_h)
    
  
    # SIMDROP DYNAMIC MODEL (switches dynamically between cues)

        # 1.) Optimize model parameters
        v = fmin(forage.model_dynamic_psyrev_simdrop, [r1,r2], args=(freq_l, freq_h, sim_l, sim_h), ftol = 0.001, disp=False)
        beta_dynamic_simdrop_freq = float(v[0]) # Optimized weight for frequency cue
        beta_dynamic_simdrop_semantic = float(v[1]) # Optimized weight for similarity cue

        # 2.) Determine model fit (errors) at optimal parameters: will return total -LL
        optimal_fit_dynamic_simdrop = forage.model_dynamic_psyrev_simdrop([beta_dynamic_simdrop_freq, beta_dynamic_simdrop_semantic], freq_l, freq_h, sim_l, sim_h)

        # 3.) For comparison, determine model fit (errors) without parameter fits
        random_fit_dynamic_simdrop = forage.model_dynamic_psyrev_simdrop([0,0], freq_l, freq_h, sim_l, sim_h)

    # SIMDROP DYNAMIC PHON SWITCH ONLY MODEL (switches dynamically between cues, phonology is a GLOBAL cue with frequency)

        # 1.) Optimize model parameters
        v = fmin(forage.model_dynamic_pswitchonly_simdrop, [r1,r2, r3], args=(freq_l, freq_h, sim_l, sim_h, phon_l, phon_h), ftol = 0.001, disp=False)
        beta_dynamic_pswitchonly_simdrop_freq = float(v[0]) # Optimized weight for frequency cue
        beta_dynamic_pswitchonly_simdrop_semantic = float(v[1]) # Optimized weight for similarity cue
        beta_dynamic_pswitchonly_simdrop_phonemic = float(v[2]) # Optimized weight for phonemic cue

        # 2.) Determine model fit (errors) at optimal parameters: will return total -LL
        optimal_fit_pswitchonlydynamic_simdrop = forage.model_dynamic_pswitchonly_simdrop([beta_dynamic_pswitchonly_simdrop_freq, beta_dynamic_pswitchonly_simdrop_semantic, beta_dynamic_pswitchonly_simdrop_phonemic], freq_l, freq_h, sim_l, sim_h, phon_l, phon_h)

        # 3.) For comparison, determine model fit (errors) without parameter fits
        random_fit_pswitchonlydynamic_simdrop = forage.model_dynamic_pswitchonly_simdrop([0, 0, 0], freq_l, freq_h, sim_l, sim_h, phon_l, phon_h)
          

      # SIMDROP DYNAMIC PHON LOCAL MODEL (switches dynamically between cues, phonology,semantic, freq is a LOCAL cue)

        # 1.) Optimize model parameters
        v = fmin(forage.model_dynamic_plocal_simdrop, [r1,r2, r3], args=(freq_l, freq_h, sim_l, sim_h, phon_l, phon_h), ftol = 0.001, disp=False)
        beta_dynamic_plocal_simdrop_freq = float(v[0]) # Optimized weight for frequency cue
        beta_dynamic_plocal_simdrop_semantic = float(v[1]) # Optimized weight for similarity cue
        beta_dynamic_plocal_simdrop_phonemic = float(v[2]) # Optimized weight for phonemic cue

        # 2.) Determine model fit (errors) at optimal parameters: will return total -LL
        optimal_fit_plocaldynamic_simdrop = forage.model_dynamic_plocal_simdrop([beta_dynamic_plocal_simdrop_freq, beta_dynamic_plocal_simdrop_semantic, beta_dynamic_plocal_simdrop_phonemic], freq_l, freq_h, sim_l, sim_h, phon_l, phon_h)

        # 3.) For comparison, determine model fit (errors) without parameter fits
        random_fit_plocaldynamic_simdrop = forage.model_dynamic_plocal_simdrop([0, 0, 0], freq_l, freq_h, sim_l, sim_h, phon_l, phon_h)

     # SIMDROP DYNAMIC PHON GLOBAL MODEL (switches dynamically between cues, phonology is a GLOBAL cue with frequency)

        # 1.) Optimize model parameters
        v = fmin(forage.model_dynamic_pglobal_simdrop, [r1,r2, r3], args=(freq_l, freq_h, sim_l, sim_h, phon_l, phon_h), ftol = 0.001, disp=False)
        beta_dynamic_pglobal_simdrop_freq = float(v[0]) # Optimized weight for frequency cue
        beta_dynamic_pglobal_simdrop_semantic = float(v[1]) # Optimized weight for similarity cue
        beta_dynamic_pglobal_simdrop_phonemic = float(v[2]) # Optimized weight for phonemic cue

        # 2.) Determine model fit (errors) at optimal parameters: will return total -LL
        optimal_fit_pglobaldynamic_simdrop = forage.model_dynamic_pglobal_simdrop([beta_dynamic_pglobal_simdrop_freq, beta_dynamic_pglobal_simdrop_semantic, beta_dynamic_pglobal_simdrop_phonemic], freq_l, freq_h, sim_l, sim_h, phon_l, phon_h)

        # 3.) For comparison, determine model fit (errors) without parameter fits
        random_fit_pglobaldynamic_simdrop = forage.model_dynamic_pglobal_simdrop([0, 0, 0], freq_l, freq_h, sim_l, sim_h, phon_l, phon_h)


        ## troyer dynamic models

        # TROYER DYNAMIC MODEL (switches dynamically between cues)

        # 1.) Optimize model parameters
        v = fmin(forage.model_dynamic_psyrev_troyer, [r1,r2], args=(freq_l, freq_h, sim_l, sim_h, troyer), ftol = 0.001, disp=False)
        beta_dynamic_troyer_freq = float(v[0]) # Optimized weight for frequency cue
        beta_dynamic_troyer_semantic = float(v[1]) # Optimized weight for similarity cue

        # 2.) Determine model fit (errors) at optimal parameters: will return total -LL
        optimal_fit_dynamic_troyer = forage.model_dynamic_psyrev_troyer([beta_dynamic_troyer_freq, beta_dynamic_troyer_semantic], freq_l, freq_h, sim_l, sim_h, troyer)

        # 3.) For comparison, determine model fit (errors) without parameter fits
        random_fit_dynamic_troyer = forage.model_dynamic_psyrev_troyer([0,0], freq_l, freq_h, sim_l, sim_h, troyer)

    # TROYER DYNAMIC PHON SWITCH ONLY MODEL (switches dynamically between cues, phonology is a GLOBAL cue with frequency)

        # 1.) Optimize model parameters
        v = fmin(forage.model_dynamic_pswitchonly_troyer, [r1,r2, r3], args=(freq_l, freq_h, sim_l, sim_h, phon_l, phon_h, troyer), ftol = 0.001, disp=False)
        beta_dynamic_pswitchonly_troyer_freq = float(v[0]) # Optimized weight for frequency cue
        beta_dynamic_pswitchonly_troyer_semantic = float(v[1]) # Optimized weight for similarity cue
        beta_dynamic_pswitchonly_troyer_phonemic = float(v[2]) # Optimized weight for phonemic cue

        # 2.) Determine model fit (errors) at optimal parameters: will return total -LL
        optimal_fit_pswitchonlydynamic_troyer = forage.model_dynamic_pswitchonly_troyer([beta_dynamic_pswitchonly_troyer_freq, beta_dynamic_pswitchonly_troyer_semantic, beta_dynamic_pswitchonly_troyer_phonemic], freq_l, freq_h, sim_l, sim_h, phon_l, phon_h, troyer)

        # 3.) For comparison, determine model fit (errors) without parameter fits
        random_fit_pswitchonlydynamic_troyer = forage.model_dynamic_pswitchonly_troyer([0, 0, 0], freq_l, freq_h, sim_l, sim_h, phon_l, phon_h, troyer)
          

      # TROYER DYNAMIC PHON LOCAL MODEL (switches dynamically between cues, phonology,semantic, freq is a LOCAL cue)

        # 1.) Optimize model parameters
        v = fmin(forage.model_dynamic_plocal_troyer, [r1,r2, r3], args=(freq_l, freq_h, sim_l, sim_h, phon_l, phon_h, troyer), ftol = 0.001, disp=False)
        beta_dynamic_plocal_troyer_freq = float(v[0]) # Optimized weight for frequency cue
        beta_dynamic_plocal_troyer_semantic = float(v[1]) # Optimized weight for similarity cue
        beta_dynamic_plocal_troyer_phonemic = float(v[2]) # Optimized weight for phonemic cue

        # 2.) Determine model fit (errors) at optimal parameters: will return total -LL
        optimal_fit_plocaldynamic_troyer = forage.model_dynamic_plocal_troyer([beta_dynamic_plocal_troyer_freq, beta_dynamic_plocal_troyer_semantic, beta_dynamic_plocal_troyer_phonemic], freq_l, freq_h, sim_l, sim_h, phon_l, phon_h, troyer)

        # 3.) For comparison, determine model fit (errors) without parameter fits
        random_fit_plocaldynamic_troyer = forage.model_dynamic_plocal_troyer([0, 0, 0], freq_l, freq_h, sim_l, sim_h, phon_l, phon_h, troyer)

     # TROYER DYNAMIC PHON GLOBAL MODEL (switches dynamically between cues, phonology is a GLOBAL cue with frequency)

        # 1.) Optimize model parameters
        v = fmin(forage.model_dynamic_pglobal_troyer, [r1,r2, r3], args=(freq_l, freq_h, sim_l, sim_h, phon_l, phon_h, troyer), ftol = 0.001, disp=False)
        beta_dynamic_pglobal_troyer_freq = float(v[0]) # Optimized weight for frequency cue
        beta_dynamic_pglobal_troyer_semantic = float(v[1]) # Optimized weight for similarity cue
        beta_dynamic_pglobal_troyer_phonemic = float(v[2]) # Optimized weight for phonemic cue

        # 2.) Determine model fit (errors) at optimal parameters: will return total -LL
        optimal_fit_pglobaldynamic_troyer = forage.model_dynamic_pglobal_troyer([beta_dynamic_pglobal_troyer_freq, beta_dynamic_pglobal_troyer_semantic, beta_dynamic_pglobal_troyer_phonemic], freq_l, freq_h, sim_l, sim_h, phon_l, phon_h, troyer)

        # 3.) For comparison, determine model fit (errors) without parameter fits
        random_fit_pglobaldynamic_troyer = forage.model_dynamic_pglobal_troyer([0, 0, 0], freq_l, freq_h, sim_l, sim_h, phon_l, phon_h, troyer)
      

        results = [ beta_static_freq, beta_static_semantic, float(optimal_fit_static), float(random_fit_static),
                   beta_static_plocal_freq, beta_static_plocal_semantic, beta_static_plocal_phonemic, float(optimal_fit_plocalstatic), float(random_fit_plocalstatic),
                  

                   beta_dynamic_simdrop_freq, beta_dynamic_simdrop_semantic, float(optimal_fit_dynamic_simdrop), float(random_fit_dynamic_simdrop),
                   beta_dynamic_pswitchonly_simdrop_freq, beta_dynamic_pswitchonly_simdrop_semantic, beta_dynamic_pswitchonly_simdrop_phonemic, float(optimal_fit_pswitchonlydynamic_simdrop), float(random_fit_pswitchonlydynamic_simdrop),

                   beta_dynamic_plocal_simdrop_freq, beta_dynamic_plocal_simdrop_semantic, beta_dynamic_plocal_simdrop_phonemic, float(optimal_fit_plocaldynamic_simdrop), float(random_fit_plocaldynamic_simdrop),
                   beta_dynamic_pglobal_simdrop_freq, beta_dynamic_pglobal_simdrop_semantic, beta_dynamic_pglobal_simdrop_phonemic, float(optimal_fit_pglobaldynamic_simdrop), float(random_fit_pglobaldynamic_simdrop),

                   beta_dynamic_troyer_freq, beta_dynamic_troyer_semantic, float(optimal_fit_dynamic_troyer), float(random_fit_dynamic_troyer),
                   beta_dynamic_pswitchonly_troyer_freq, beta_dynamic_pswitchonly_troyer_semantic, beta_dynamic_pswitchonly_troyer_phonemic, float(optimal_fit_pswitchonlydynamic_troyer), float(random_fit_pswitchonlydynamic_troyer),

                   beta_dynamic_plocal_troyer_freq, beta_dynamic_plocal_troyer_semantic, beta_dynamic_plocal_troyer_phonemic, float(optimal_fit_plocaldynamic_troyer), float(random_fit_plocaldynamic_troyer),
                   beta_dynamic_pglobal_troyer_freq, beta_dynamic_pglobal_troyer_semantic, beta_dynamic_pglobal_troyer_phonemic, float(optimal_fit_pglobaldynamic_troyer), float(random_fit_pglobaldynamic_troyer)

                   ]

        return results

## run foraging models

In [13]:
datapath = parentfolder+'data-fluency.txt'
modelFits(datapath, delimiter = "\t")

All items OK.
SUBJECT 1/171 51
SUBJECT 2/171 575
SUBJECT 3/171 576
SUBJECT 4/171 577
SUBJECT 5/171 578
SUBJECT 6/171 579
SUBJECT 7/171 580
SUBJECT 8/171 581
SUBJECT 9/171 582
SUBJECT 10/171 71
SUBJECT 11/171 583
SUBJECT 12/171 584
SUBJECT 13/171 585
SUBJECT 14/171 586
SUBJECT 15/171 587
SUBJECT 16/171 588
SUBJECT 17/171 589
SUBJECT 18/171 590
SUBJECT 19/171 591
SUBJECT 20/171 592
SUBJECT 21/171 593
SUBJECT 22/171 594
SUBJECT 23/171 595
SUBJECT 24/171 596
SUBJECT 25/171 597
SUBJECT 26/171 598
SUBJECT 27/171 677
SUBJECT 28/171 678
SUBJECT 29/171 679
SUBJECT 30/171 680
SUBJECT 31/171 681
SUBJECT 32/171 682
SUBJECT 33/171 683
SUBJECT 34/171 684
SUBJECT 35/171 686
SUBJECT 36/171 687
SUBJECT 37/171 688
SUBJECT 38/171 689
SUBJECT 39/171 690
SUBJECT 40/171 691
SUBJECT 41/171 692
SUBJECT 42/171 693
SUBJECT 43/171 694
SUBJECT 44/171 695
SUBJECT 45/171 696
SUBJECT 46/171 697
SUBJECT 47/171 698
SUBJECT 48/171 198
SUBJECT 49/171 199
SUBJECT 50/171 779
SUBJECT 51/171 780
SUBJECT 52/171 781
SUBJECT 5