# mixed models grid search

Here, we try to find the best mixed-trait models for the evolutionary analysis. Always two out of three attributes are enforced with label smoothing. We vary the weighting between the two traits and the smoothing factor. The goal is to find models for each combination (color-scale, color-shape, scale-shape) such that the bias for enforced attributes is strong, and approximately equally strong, while the bias for the remaining attribute is approximately zero. In addition, the classification accuracies should be relatively high.

In [1]:
import pickle
import numpy as np
import matplotlib.pyplot as plt
from scipy.spatial import distance
from utils.vision_analysis import save_rsa_cnns

### evaluate bias strength for each network

we store these results in the 'results/' folder 

In [2]:
# for sf in ['0.6', '0.7', '0.8']:
#        for tw in ['05', '10', '15', '20', '25', '30', '35', '40', '45', '50', '55', '60',
#                   '65', '70', '75', '80', '85', '90', '95']:
#             name = 'mixed_tw-' + tw + '_nonlinear_sf-' + str(sf) + '_'
#             save_rsa_cnns(sf=sf, tw=tw, n_examples=10, mode='mixed')

### training accuracies for each network, and each smoothing factor

The lists contain the results for each weighting

In [3]:
accuracies = {
    'color-shape': {'0.6': [1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000,
                          1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.9999, 1.0000, 1.0000],
                    '0.7': [1.0000, 1.0000, 1.0000, 1.0000, 0.9835, 1.0000, 0.9996, 0.9678, 0.9999,
                         1.0000, 1.0000, 0.9995, 1.0000, 0.9990, 0.9998, 1.0000, 0.9953, 0.9996, 0.9957],
                    '0.8': [0.9975, 0.9736, 0.9740, 0.9883, 0.9885, 0.8547, 0.9997, 0.9441, 0.9845,
                          0.9952, 0.9143, 0.9639, 0.7619, 0.9477, 0.8554, 0.9872, 0.9701, 0.9759, 0.9898]},
                    
    'color-size': {'0.6': [1.0000, 1.0000, 0.9999, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 
                         1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 0.9999, 0.9998],
                   '0.7': [1.0000, 1.0000, 1.0000, 0.9995, 0.9992, 1.0000, 0.9999, 0.9975, 0.9996,
                         0.9999, 1.0000, 1.0000, 1.0000, 0.9999, 1.0000, 0.9991, 1.0000, 1.0000, 0.9997],
                   '0.8': [0.9967, 0.9932, 0.6112, 0.9637, 0.5792, 0.9962, 0.9649, 0.8939, 0.7754,
                         0.9468, 0.8013, 0.9394, 0.9242, 0.6037, 0.8381, 0.9850, 0.9869, 0.9887, 0.9265]},
    
    'shape-size': {'0.6': [1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 
                         1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
                   '0.7': [1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 
                         1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000, 1.0000],
                   '0.8': [0.9003, 0.9992, 0.9624, 0.9551, 0.9738, 0.9120, 0.9957, 0.9522, 0.9980, 
                         0.9721, 0.9820, 0.9285, 0.9935, 0.9923, 0.9992, 1.0000, 0.9903, 0.9999, 1.0000]}
}


### find best networks

We set different minimal bias threshold for enforced attributes, and find the networks with the lowest error given this threshold. The error is calculated as the sum of the absolute difference between enforced attributes and threshold, as well as unenforced attributes and zero. We set the minimal accuracy threshold to 0.97. 

In [15]:
def find_best_networks(threshold_enforced, min_training_acc=0.97):
    
    for sf in ['0.6', '0.7', '0.8']:
        
        mean_enforced = []

        best_matches = {'color-size': ['name', 'sf', None, None, None],
                        'color-shape': ['name', 'sf', None, None, None],
                        'shape-size': ['name', 'sf', None, None, None], 
                        'accuracy': [[],[],[]],
                        'error': [[100],[100],[100]],
                       }

        for i, tw in enumerate(['05', '10', '15', '20', '25', '30', '35', '40', '45', 
                                '50', '55', '60', '65', '70', '75', '80', '85', '90', '95']):

            name = 'mixed_tw-' + tw + '_nonlinear_sf-' + str(sf) + '_'
            path = 'results/rsa_attributes_CNNfeatures/' + name + '10examples.pkl'
            results = pickle.load(open(path, 'rb'))

            for n, network in enumerate(['color-size', 'color-shape', 'shape-size']):

                better = False

                acc = accuracies[network][sf][i]
                differences = {}

                biases = network.split('-')
                if biases[1] == 'size': 
                    biases[1] = 'scale'

                for nonbias in ['color', 'scale', 'shape']:
                    if nonbias not in biases: 
                        difference_nonbias = abs(results[network][nonbias] - 0)

                for bias in biases: 
                    differences[bias] = abs(results[network][bias] - threshold_enforced)
                    mean_enforced.append(results[network][bias])
                
                error = np.sum(list(differences.values())) + difference_nonbias
                if (error < best_matches['error'][n] and acc > min_training_acc):
                    better = True

                if better: 
                    best_matches[network] = [tw, sf,
                                             np.round(differences[biases[0]],4), 
                                             np.round(differences[biases[1]], 4), 
                                             np.round(difference_nonbias, 4)]
                    best_matches['accuracy'][n] = acc
                    best_matches['error'][n] = error
    return best_matches

In [17]:
for threshold in [0.35, 0.40, 0.45, 0.50, 0.55]:
    best_matches = find_best_networks(threshold)
    print('bias threshold:', threshold, ', error:', best_matches['error'])

bias threshold: 0.35 , error: [0.22758378667398005, 0.16058423151261245, 0.11759302292636874]
bias threshold: 0.4 , error: [0.12758378667397996, 0.09444285173919093, 0.03597137814275984]
bias threshold: 0.45 , error: [0.03893094510534477, 0.02472987775250296, 0.03540558856385197]
bias threshold: 0.5 , error: [0.07282672448376847, 0.10905065843971323, 0.10698489337827773]
bias threshold: 0.55 , error: [0.17282672448376857, 0.20905065843971332, 0.19124165963151313]


$\rightarrow$ turns out the smallest error is achieved with threshold 0.45, hence we choose the networks that optimize for this threshold. 

In [18]:
best_matches = find_best_networks(0.45)
[print(k, best_matches[k]) for k in best_matches.keys()][0]

color-size ['30', '0.8', 0.0057, 0.0331, 0.0002]
color-shape ['35', '0.8', 0.0078, 0.0151, 0.0017]
shape-size ['25', '0.8', 0.02, 0.0142, 0.0012]
accuracy [0.9962, 0.9997, 0.9738]
error [0.03893094510534477, 0.02472987775250296, 0.03540558856385197]


$\rightarrow$ We use the networks: 
    
* color-size: weighting 0.3, smoothing factor 0.8
* color-shape: weighting: 0.35, smoothing factor 0.8
* scale-shape: weighting: 0.25, smoothing factor 0.8