# Optimizing Parameters in ExemPy

This notebook will show you how to fit parameters for attention weighting, building on the concepts we saw in ExemPy-Basics.ipynb

In [1]:
%load_ext autoreload
import ExemPy as xm
%aimport ExemPy
%autoreload 1
import math
import random
import matplotlib.pyplot as plt
#%matplotlib inline
import numpy as np
import pandas as pd
from pandas import DataFrame
from scipy.optimize import minimize
import seaborn as sns
sns.set(style='ticks', context='paper')
colors=["#e3c934","#68c4bf","#c51000","#287271"]
sns.set_palette(colors)

## Read in data, set initial parameters

In [2]:
pb52=pd.read_csv('pb52_data//pb52.csv')
pb52 = xm.HzToBark(pb52,["F0","F1","F2","F3"])
pb52.sample(5)

Unnamed: 0,type,gender,speaker,vowel,repetition,F0,F1,F2,F3,z0,z1,z2,z3
336,m,m,17,GOOSE,1,117,315,1080,2260,0.980241,3.182154,8.994605,13.827962
120,m,m,7,FLEECE,1,186,320,2320,3120,1.7937,3.232807,14.002523,15.935984
672,w,f,34,THOUGHT,1,207,570,830,3300,2.030992,5.510198,7.445735,16.289962
1085,w,f,55,DRESS,2,230,460,2300,3050,2.285662,4.566116,13.944883,15.791457
970,w,f,49,PALM,1,200,700,1080,2420,1.952407,6.525263,8.994605,14.282831


In [3]:
dimsvals={'z0':1,'z1':.761,'z2':.681,'z3':.407}
dimslist = list(dimsvals.keys())

catslist = ['type', 'vowel']           # man, woman, or child; lexical set notation

cval = 25

exemplars = pb52
test = pb52

## Define error function
- In future versions, this function will be pre-defined in the library
- For now, there is some value in being able to tweak the function, and to see how it works!

In [4]:
def calcerror(x,test,exemplars,catslist,fitdims,cval,anchordim=None):
    '''
    Categorizes a data set and returns the proportion of stimuli/test rows
    that were categorized inaccurately. A lower value means a lower amount of
    error. Designed to be used with parameter fitting functions to assign
    values to attention weighting for the dimensions.
    
    Required paratemers:
    
    x = Array. Initial guesses for parameters
    
    test = DataFrame. Stimuli to be categorized
    
    exemplars = DataFrame. Exemplar cloud to use for categorization
    
    catslist = List of strings. Each string should correspond to a category that
        should be assigned to the test
    
    fitdims = List of strings. Each string should correspond to a dimension
        for which parameters should be fit.
        
    Optional parameters:
    
    anchordim = String. Dimension for parameter which will not be fit, but will
        instead be hard-coded as 1. This helps constrain the set of possible
        solutions
    
    
    '''
    #x = [z1,z2,z3]
    # dimslist = [z0, z1, z2, z3]
    # dimsdict = {z0 =1, z1 = z1guess, z2 = z2guess, z3=z3guess}
    # dimslist[0] = z0        dl[1] = z1,       dl[2] = z2.      dl[3] = z3
    #            1            x[0]= z1,          x[1] = z2,      x[2] =z3 

    
    dimsvals = {fitdims[i]: x[i] for i in range(len(fitdims))}
    if anchordim != None:
        dimsvals.update({anchordim:1})
    
    choices=xm.multicat(test,cloud,catslist,dimsvals,cval)
    accuracy=xm.checkaccuracy(choices,catslist)
    category=catslist[0]
    err = accuracy[category+"Acc"].value_counts(normalize=True)['n']
    return err

## Specify arguments for optimization

In [5]:
fitdims = dimslist[1:]      # Fit all dimensions except item 0
anchordim = dimslist[0]     # Set item 0 to 1

name = 'pboptimization'                # name of output spreadsheet
n = 5                        # number of times that random x is generated
t = 0.1                     # Tolerance value -- lower = more evals

test=exemplars
cloud=exemplars
cats=["vowel"]


## Optimize!
You may want to come back later for the results: 
- Results will be saved to a spreadsheet. (\[NAME\].csv)
- Settings will be saved to a text file. (\[NAME\]\_info.txt)

In [6]:
resultslist=[['start','fit','error','evals']] # initialize a list for restults


print("----- Parameters -----")
if anchordim != None: 
    print("Anchored (1):  ", anchordim)
    
print("Optimized:     ", fitdims)
print("")
print("Categorizing for: ", cats)
print("")
print("Trials: ",n)
print("")

for i in range(0,n): 
    x=np.divide(random.sample(range(0,300),len(fitdims)),100)
        # Get a random sample of numbers between 0 and 300,
           # divide by 100 to get floats between 0 and 3  
    xguess = x
    result = minimize(calcerror,
                  xguess,  # the initial guess array
                  args=(test,cloud,cats,fitdims,cval,anchordim), # arguments for the error function
                  method='Powell',  
                  tol=t,  # a 'tolerance' value, smaller means more function evaluation, but potentially better fit
                 )
    start = x
    fit = np.round(result.x,3)
    error = result.fun
    evals = result.nfev
    row = [start,fit,error,evals]
    resultslist.append(row)
    
    print ("-----", (i+1) ," -----")
    print("Initial guess:    ", start)
    print("Optimized:        ", fit)
    print(" ")
    print("Number evals: ", evals)
    print("Error:        ", error)
    print("")
results=pd.DataFrame(resultslist)
results.columns = results.iloc[0]
results=results[1:]

settings = {"fitdims": fitdims, "anchordim": anchordim, "cats": cats, "trials":n, "tol": t }

# write results to csv
### good for if you want to leave it running while you do something else!
results.to_csv(name+".csv")     
with open((name+"_info.txt"),"w") as file:
    file.write(str(settings))


----- Parameters -----
Anchored (1):   z0
Optimized:      ['z1', 'z2', 'z3']

Categorizing for:  ['vowel']

Trials:  5

----- 1  -----
Initial guess:     [0.32 0.41 2.87]
Optimized:         [ 2.702  1.174 -0.986]
 
Number evals:  44
Error:         0.1013157894736842

----- 2  -----
Initial guess:     [1.2  2.9  1.15]
Optimized:         [3.735 1.283 1.506]
 
Number evals:  47
Error:         0.09868421052631579

----- 3  -----
Initial guess:     [1.81 2.58 2.55]
Optimized:         [5.792 1.8   1.388]
 
Number evals:  44
Error:         0.1006578947368421

----- 4  -----
Initial guess:     [1.56 2.05 0.34]
Optimized:         [2.56  1.985 1.34 ]
 
Number evals:  18
Error:         0.10394736842105264

----- 5  -----
Initial guess:     [1.18 2.87 1.02]
Optimized:         [6.584 2.103 1.739]
 
Number evals:  43
Error:         0.1006578947368421

