In [12]:
import model
import pandas as pd
from collections import Counter
import numpy as np

dog_data = pd.read_csv("dog_open_ended.csv")
experimental_results = {"child": [], "dog_trainer": []}
for q_num in ["Q1", "Q3", "Q4", "Q12", "Q10", "Q6"]:
    n_examples = len(dog_data[q_num][4:])
    c = Counter([item.strip().lower() for item in dog_data[q_num][4:]])
    result = {w.replace(" ", "_"): round(count / n_examples, 3) for w, count in c.most_common(5)}
    
    if q_num in ["Q1", "Q4", "Q10"]:
        experimental_results["dog_trainer"].append(result)
    elif q_num in ["Q3", "Q12", "Q6"]:
        experimental_results["child"].append(result)

actual = {"dog_trainer": [
{"dog": 0.422, "dalmatian": 0.567, "spotted_dog": 0.011},
{"dalmatian": 0.978, "spotted_dog": 0.022},
{"pug": 0.967, "small_dog": 0.033}], "child": [
{"dog": 0.522, "dalmatian": 0.244, "spotted_dog": 0.233},
{"dalmatian": 0.356, "spotted_dog": 0.644},
{"pug": 0.5, "small_dog": 0.5}]}

lambdas = [0.1, 1, 3, 5, 10, 15, 20, 30]
contexts = [["DALMATIAN", "CAT", "HORSE"], ["DALMATIAN", "PUG", "POODLE"], ["PUG", "DALMATIAN", "POODLE"]]
results = []
for length_cost in [False, True]:
    for lambda_val in lambdas:
        child_agent = model.Agent("child", lambda_param=lambda_val)
        adult_agent = model.Agent("general_adult", lambda_param=lambda_val)
        dog_trainer_agent = model.Agent("dog_trainer", lambda_param=lambda_val)
        
#         total_variation_child = 0
#         total_variation_dog_trainer = 0
        for c, context in enumerate(contexts):
            to_child = adult_agent.produce_matrix_mutant(context, child_agent)
            to_dog_trainer = adult_agent.produce_matrix_mutant(context, dog_trainer_agent)
            
            compare_child = []
            compare_dog_trainer = []
            for word1, prob1 in to_child.items():
                prob2 = actual["child"][c].get(word1, 0)
                compare_child.append(abs(prob1-prob2))
            
            for word1, prob1 in to_dog_trainer.items():
                prob2 = actual["dog_trainer"][c].get(word1, 0)
                compare_dog_trainer.append(abs(prob1-prob2))
                    
            total_variation_child = sum(compare_child)/2
            total_variation_dog_trainer = sum(compare_dog_trainer)/2
#             print(total_variation_child)
            
            results.append({"lambda": lambda_val, "context": context, "length cost": length_cost, 
                           "result_trainer": total_variation_dog_trainer, 
                            "result_child": total_variation_child, 
                            "result_avg": (total_variation_child + total_variation_dog_trainer) / 2})

print("best settings for dog trainer", np.array(results)[np.argsort([r["result_trainer"] for r in results])])
print("best settings for child", np.array(results)[np.argsort([r["result_child"] for r in results])])
print("best settings on average", np.array(results)[np.argsort([r["result_avg"] for r in results])])

best settings for dog trainer [{'lambda': 0.1, 'context': ['DALMATIAN', 'CAT', 'HORSE'], 'length cost': False, 'result_trainer': 0.197768267223382, 'result_child': 0.13384693877551018, 'result_avg': 0.16580760299944608}
 {'lambda': 30, 'context': ['DALMATIAN', 'CAT', 'HORSE'], 'length cost': True, 'result_trainer': 0.197768267223382, 'result_child': 0.13384693877551018, 'result_avg': 0.16580760299944608}
 {'lambda': 20, 'context': ['DALMATIAN', 'CAT', 'HORSE'], 'length cost': True, 'result_trainer': 0.197768267223382, 'result_child': 0.13384693877551018, 'result_avg': 0.16580760299944608}
 {'lambda': 1, 'context': ['DALMATIAN', 'CAT', 'HORSE'], 'length cost': False, 'result_trainer': 0.197768267223382, 'result_child': 0.13384693877551018, 'result_avg': 0.16580760299944608}
 {'lambda': 15, 'context': ['DALMATIAN', 'CAT', 'HORSE'], 'length cost': True, 'result_trainer': 0.197768267223382, 'result_child': 0.13384693877551018, 'result_avg': 0.16580760299944608}
 {'lambda': 10, 'context': [