In [3]:
import model
import pandas as pd
from collections import Counter
import numpy as np

dog_data = pd.read_csv("dog_open_ended.csv")
experimental_results = {"child": [], "dog_trainer": []}
for q_num in ["Q1", "Q3", "Q4", "Q12", "Q10", "Q6"]:
    n_examples = len(dog_data[q_num][4:])
    c = Counter([item.strip().
                 lower() for item in dog_data[q_num][4:]])
    result = {w.replace(" ", "_"): round(count / n_examples, 3) for w, count in c.most_common(5)}
    
    if q_num in ["Q1", "Q4", "Q10"]:
        experimental_results["dog_trainer"].append(result)
    elif q_num in ["Q3", "Q12", "Q6"]:
        experimental_results["child"].append(result)

actual = {"dog_trainer": [
{"dog": 0.422, "dalmatian": 0.567, "spotted_dog": 0.011},
{"dalmatian": 0.978, "spotted_dog": 0.022},
{"pug": 0.967, "small_dog": 0.033}], "child": [
{"dog": 0.522, "dalmatian": 0.244, "spotted_dog": 0.233},
{"dalmatian": 0.356, "spotted_dog": 0.644},
{"pug": 0.5, "small_dog": 0.5}]}
speaker_weights = np.arange(0, 1.25, 0.25)
length_cost_params = np.arange(0, 10, 1)
lambdas = [0.1, 1, 3, 5, 10, 15, 20, 30]
contexts = [["DALMATIAN", "CAT", "HORSE"], ["DALMATIAN", "PUG", "POODLE"], ["PUG", "DALMATIAN", "POODLE"]]
results = []
for length_cost_param in length_cost_params:
    for speaker_weight in speaker_weights:
        for length_cost in [False, True]:
            for lambda_val in lambdas:
                if length_cost:
                    lcp = length_cost_param
                else:
                    lcp = 0
                child_agent = model.Agent("child", lambda_param=lambda_val, 
                                          speaker_weight=speaker_weight, length_cost_smoothing=lcp)
                adult_agent = model.Agent("general_adult", lambda_param=lambda_val, 
                                          speaker_weight=speaker_weight, length_cost_smoothing=lcp)
                dog_trainer_agent = model.Agent("dog_trainer", lambda_param=lambda_val,
                                          speaker_weight=speaker_weight, length_cost_smoothing=lcp)

                total_variation_child = 0
                total_variation_dog_trainer = 0
                rank_prop = 0
                for c, context in enumerate(contexts):
                    to_child = adult_agent.produce_matrix_mutant(context, child_agent)
                    to_dog_trainer = adult_agent.produce_matrix_mutant(context, dog_trainer_agent)

                    compare_child = []
                    compare_dog_trainer = []
                    for word1, prob1 in to_child.items():
                        prob2 = actual["child"][c].get(word1, 0)
                        compare_child.append(abs(prob1-prob2))

                    for word1, prob1 in to_dog_trainer.items():
                        prob2 = actual["dog_trainer"][c].get(word1, 0)
                        compare_dog_trainer.append(abs(prob1-prob2))

                    total_variation_child += sum(compare_child)/2
                    total_variation_dog_trainer += sum(compare_dog_trainer)/2
        
                    to_child_ranking = [k for k,_ in sorted(to_child.items(), key=lambda item: item[1]) 
                                        if k in actual["child"][c].keys()]
                    to_trainer_ranking = [k for k,_ in sorted(to_dog_trainer.items(), key=lambda item: item[1]) 
                                        if k in actual["dog_trainer"][c].keys()]
                    actual_child_ranking = [k for k,_ in sorted(actual["child"][c].items(), key=lambda item: item[1]) 
                                        if k in to_child.keys()]
                    actual_trainer_ranking = [k for k,_ in sorted(actual["dog_trainer"][c].items(), key=lambda item: item[1]) 
                                        if k in to_dog_trainer.keys()]
                    if to_child_ranking == actual_child_ranking:
                        rank_prop += 1
                    if to_trainer_ranking == actual_trainer_ranking:
                        rank_prop += 1

        
                results.append({"lambda": lambda_val, "length cost": length_cost, "length cost smoothing": lcp, "speaker weight": speaker_weight,
                                "proportion correct ranking": rank_prop/(len(contexts)*2),
                                "result_trainer": total_variation_dog_trainer/len(contexts), 
                                "result_child": total_variation_child/len(contexts), 
                                "result_avg": (total_variation_child/len(contexts) + total_variation_dog_trainer/len(contexts)) / 2})

print("best settings for dog trainer", np.array(results)[np.argsort([r["result_trainer"] for r in results])][0])
print("best settings for child", np.array(results)[np.argsort([r["result_child"] for r in results])][0])
print("best settings on average", np.array(results)[np.argsort([r["result_avg"] for r in results])][0])


child_agent = model.Agent("child", lambda_param=3, 
                          speaker_weight=0.75, length_cost_smoothing=5)
adult_agent = model.Agent("general_adult", lambda_param=3, 
                          speaker_weight=0.75, length_cost_smoothing=5)
dog_trainer_agent = model.Agent("dog_trainer", lambda_param=3,
                          speaker_weight=0.75, length_cost_smoothing=5)

for c, context in enumerate(contexts):
    to_child = adult_agent.produce_matrix_mutant(context, child_agent)
    to_dog_trainer = adult_agent.produce_matrix_mutant(context, dog_trainer_agent)
    print(to_child)
    print(to_dog_trainer)

best settings for dog trainer {'lambda': 5, 'length cost': True, 'length cost smoothing': 0, 'speaker weight': 1.0, 'proportion correct ranking': 0.6666666666666666, 'result_trainer': 0.048268787346568594, 'result_child': 0.3725893293448095, 'result_avg': 0.21042905834568904}
best settings for child {'lambda': 3, 'length cost': False, 'length cost smoothing': 0, 'speaker weight': 0.25, 'proportion correct ranking': 0.6666666666666666, 'result_trainer': 0.28216416495640506, 'result_child': 0.13438074916479925, 'result_avg': 0.20827245706060216}
best settings on average {'lambda': 3, 'length cost': True, 'length cost smoothing': 5, 'speaker weight': 0.75, 'proportion correct ranking': 0.6666666666666666, 'result_trainer': 0.08469622026840051, 'result_child': 0.3122838005343289, 'result_avg': 0.1984900104013647}
{'dalmatian': 0.03225315819579699, 'spotted_dog': 0.4080320692164627, 'dog': 0.5597147725877403}
{'dalmatian': 0.41817992998333026, 'spotted_dog': 0.01529614594980534, 'dog': 0.56

In [4]:
lambdas = [0.1, 1, 3, 5, 10, 15, 20, 30]
# lambdas = [1]
contexts = [["DALMATIAN", "CAT", "HORSE"], ["DALMATIAN", "PUG", "POODLE"], ["PUG", "DALMATIAN", "POODLE"]]
results = []
for lambda_val in lambdas:
    adult_agent = model.Agent("general_adult", lambda_param=lambda_val)

    total_variation_child = 0
    total_variation_dog_trainer = 0
    rank_prop = 0
    for c, context in enumerate(contexts):
        produce = adult_agent.produce_matrix_plain(context)

        compare_child = []
        compare_trainer = []
        for word1, prob1 in produce.items():
            prob2 = actual["child"][c].get(word1, 0)
            compare_child.append(abs(prob1-prob2))
            
            prob3 = actual["dog_trainer"][c].get(word1, 0)
            compare_trainer.append(abs(prob1-prob3))
#         print(compare_child)

        total_variation_child += sum(compare_child)/2
        total_variation_dog_trainer += sum(compare_dog_trainer)/2

        produce_ranking_child = [k for k,_ in sorted(produce.items(), key=lambda item: item[1]) 
                            if k in actual["child"][c].keys()]
        produce_ranking_trainer = [k for k,_ in sorted(produce.items(), key=lambda item: item[1]) 
                            if k in actual["dog_trainer"][c].keys()]
        actual_child_ranking = [k for k,_ in sorted(actual["child"][c].items(), key=lambda item: item[1]) 
                            if k in produce.keys()]
        actual_trainer_ranking = [k for k,_ in sorted(actual["dog_trainer"][c].items(), key=lambda item: item[1]) 
                            if k in produce.keys()]
#         print(produce_ranking_child, actual_child_ranking)
        if produce_ranking_child == actual_child_ranking:
            rank_prop += 1
        if produce_ranking_trainer == actual_trainer_ranking:
            rank_prop += 1


    results.append({"lambda": lambda_val,
                    "proportion correct ranking": rank_prop/(len(contexts)*2),
                    "result_trainer": total_variation_dog_trainer/len(contexts), 
                    "result_child": total_variation_child/len(contexts), 
                    "result_avg": (total_variation_child/len(contexts) + total_variation_dog_trainer/len(contexts)) / 2})

print("best settings for dog trainer", np.array(results)[np.argsort([r["result_trainer"] for r in results])][0])
print("best settings for child", np.array(results)[np.argsort([r["result_child"] for r in results])][0])
print("best settings on average", np.array(results)[np.argsort([r["result_avg"] for r in results])][0])

adult_agent = model.Agent("general_adult", lambda_param=10)
for c, context in enumerate(contexts):
    produce = adult_agent.produce_matrix_plain(context)
    print(produce)

best settings for dog trainer {'lambda': 0.1, 'proportion correct ranking': 0.3333333333333333, 'result_trainer': 0.03300000000000002, 'result_child': 0.2693104615484699, 'result_avg': 0.15115523077423496}
best settings for child {'lambda': 30, 'proportion correct ranking': 0.3333333333333333, 'result_trainer': 0.03300000000000002, 'result_child': 0.11105555555555678, 'result_avg': 0.0720277777777784}
best settings on average {'lambda': 30, 'proportion correct ranking': 0.3333333333333333, 'result_trainer': 0.03300000000000002, 'result_child': 0.11105555555555678, 'result_avg': 0.0720277777777784}
{'dalmatian': 0.3333333333333333, 'spotted_dog': 0.3333333333333333, 'dog': 0.3333333333333333}
{'dalmatian': 0.4999957662638972, 'spotted_dog': 0.4999957662638972, 'dog': 8.467472205522475e-06}
{'pug': 0.4999957662638972, 'small_dog': 0.4999957662638972, 'dog': 8.467472205522475e-06}
