# Setting attention weights

In [1]:
%load_ext autoreload
from ExemPy import *
from ExemPy.utils import *
from ExemPy.viz import *
from ExemPy.GCM import *
%aimport ExemPy, ExemPy.utils, ExemPy.viz, ExemPy.GCM
%autoreload 1
import math
import random
import matplotlib.pyplot as plt
#%matplotlib inline
import numpy as np
import pandas as pd
from pandas import DataFrame
from scipy.optimize import minimize
import seaborn as sns
sns.set(style='ticks', context='paper')
colors=["#e3c934","#68c4bf","#c51000","#287271"]
sns.set_palette(colors)

In [2]:
# Read in data, set initial parameters
pb52 = pd.read_csv('pb52_data//pb52.csv')
pb52 = HzToBark(pb52, ["F0", "F1", "F2", "F3"])
dimsvals={'z0' : 1,
          'z1' : .761,
          'z2' : .681,
          'z3' : .407}
dimslist = list(dimsvals.keys())

catslist = ['type', 'vowel'] 

cval = 25

exemplars = pb52.copy()

In [9]:
# Define error function
def calcerror(x, test, exemplars, catslist, fitdims, cval, anchordim = None):
    '''
    Categorizes a data set and returns the proportion of stimuli/test
    rows that were categorized inaccurately. A lower value means a
    lower amount of error. Designed to be used with parameter
    fitting functions to assign values to attention weighting
    for dimensions.
    
    Required paratemers:
    
    x = Array. Initial guesses for parameters
    
    test = DataFrame. Stimuli to be categorized
    
    exemplars = DataFrame. Exemplar cloud to use for categorization
    
    catslist = List of strings. Each string should correspond to a
        category that should be assigned to the test
    
    fitdims = List of strings. Each string should correspond to a 
        dimension for which parameters should be fit.
        
    Optional parameters:
    
    anchordim = String. Dimension for parameter which will not be fit,
        but will instead be hard-coded as 1. This helps constrain
        the set of possible solutions
    
    
    '''  
    dimsvals = {fitdims[i]: x[i] for i in range(len(fitdims))}
    if anchordim != None:
        dimsvals.update({anchordim:1})
    
    choices = multicat(test, cloud, catslist, dimsvals, cval)
    accuracy = checkaccuracy(choices, catslist)
    category = catslist[0]
    err = accuracy[category+"Acc"].value_counts(normalize=True)['n']
    return err

In [10]:
# Specify arguments for optimization
fitdims = dimslist[1:]      # Fit all dimensions except item 0
anchordim = dimslist[0]     # Set item 0 to 1

name = 'pb52-111723'        # name of output spreadsheet
nt = 3                      # number of times that random x is generated
t = 0.1                     # Tolerance value -- lower = more evals

# To demonstrate, fit based on 50 exemplars of each vowel
test = gettestset(exemplars, "vowel", 50)

cloud = exemplars
cats = ["vowel"]


In [7]:
# Optimize
# Initialize lists
resultslist=[['start','fit','error','evals']]
wlist=[]

print("----- Parameters -----")
if anchordim != None: 
    print("Anchored (1):  ", anchordim)
    
print("Optimized:     ", fitdims)
print("")
print("Categorizing for: ", cats)
print("")
print("Trials: ", nt)
print("")

for i in range(0,nt): 
    x=np.divide(random.sample(range(0,300),len(fitdims)),100)
    xguess = x
    result = minimize(calcerror,
                  xguess,
                  args=(test, cloud, cats, fitdims, cval, anchordim),
                  method='Powell',  
                  tol = t) 
    # Create list to save as csv
    start = x
    fit = np.round(result.x,3)
    error = result.fun
    evals = result.nfev
    row = [start,fit,error,evals]
    resultslist.append(row)
    
    # Re-compose w dict to save with json  
    wdict_keys = fitdims
    wdict_vals = list(fit)
    #if anchordim != None: 
    wdict_keys.insert(0, anchordim)
    wdict_vals.insert(0, 'hi')    
    wdict = {wdict_keys[i]: wdict_vals[i] for i in range(len(wdict_keys))}
    wlist.append(wdict)
    
    print ("-----", (i+1) ," -----")
    print("Initial guess:    ", start)
    print("Optimized:        ", fit)
    print(" ")
    print("Number evals: ", evals)
    print("Error:        ", error)
    print("")
results=pd.DataFrame(resultslist)
results.columns = results.iloc[0]
results=results[1:]

settings = {"fitdims": fitdims, "anchordim": anchordim,
            "cats": cats, "trials":nt, "tol": t }

# Write results to csv
results.to_csv(name+".csv")     
with open((name+"_info.txt"),"w") as file:
    file.write(str(settings))

#Clear lists
resultslist = []

----- Parameters -----
Anchored (1):   z0
Optimized:      ['z1', 'z2', 'z3']

Categorizing for:  ['vowel']

Trials:  3

----- 1  -----
Initial guess:     [2.53 0.1  0.28]
Optimized:         [ 1.912  0.482 -0.192]
 
Number evals:  33
Error:         0.1

----- 2  -----
Initial guess:     [1.34 2.29 1.85 0.05]
Optimized:         [3.092 0.987 0.633 0.585]
 
Number evals:  52
Error:         0.098

----- 3  -----
Initial guess:     [1.15 0.56 2.44 1.38 2.32]
Optimized:         [2.15  1.56  3.44  1.526 2.367]
 
Number evals:  28
Error:         0.118

