In [1]:
import utilities
import numpy as np
from scipy import stats

In [2]:
pp_results_path_512 = 'temporal_pp_results_512/all_results.txt'
pp_results_path_128_50 = 'temporal_pp_results_128_50/all_results.txt'
pp_results_path_512_pre = 'temporal_pp_results_512_pre/all_results.txt'
pp_results_path_128_50_pre = 'temporal_pp_results_128_50_pre/all_results.txt'

In [3]:
def aggregate_by_category(path):
    pp_results = utilities.read_data(path)
    norm_prob_dict = {}
    acc_dict = {}
    for categories_tuple, years_dict in pp_results.items():
        if categories_tuple not in norm_prob_dict:
            norm_prob_dict[categories_tuple] = []
            acc_dict[categories_tuple] = []
        for year, keywords_dict in years_dict.items():
            for keywords_tuple in keywords_dict:
                for prob, acc in keywords_dict[keywords_tuple]:
                    norm_prob_dict[categories_tuple].append(prob)
                    acc_dict[categories_tuple].append(acc)
    return norm_prob_dict, acc_dict

In [4]:
def check_stats(all_norm_prob, all_acc, woman_norm_prob, woman_acc, man_norm_prob, man_acc):
    avg_all_norm_prob = np.mean(all_norm_prob)
    avg_all_acc = np.mean(all_acc)
    print('Average norm prob (all):', avg_all_norm_prob)
    print('Average accuracy (all):', avg_all_acc)

    avg_woman_norm_prob = np.mean(woman_norm_prob)
    avg_woman_acc = np.mean(woman_acc)
    print('Average norm prob (woman):', avg_woman_norm_prob)
    print('Average accuracy (woman):', avg_woman_acc)

    avg_man_norm_prob = np.mean(man_norm_prob)
    avg_man_acc = np.mean(man_acc)
    print('Average norm prob (man):', avg_man_norm_prob)
    print('Average accuracy (man):', avg_man_acc)

    t_val_prob, p_val_prob = stats.ttest_ind(woman_norm_prob, man_norm_prob)
    t_val_acc, p_val_acc = stats.ttest_ind(woman_acc, man_acc)
    print('Woman vs. man norm prob: t-value %f, p-value %f'%(t_val_prob, p_val_prob))
    print('Woman vs. man acc: t-value %f, p-value %f'%(t_val_acc, p_val_acc))

In [5]:
def analyze_all_categories(norm_prob_dict, acc_dict):
    all_norm_prob, all_acc = [], []
    woman_norm_prob, woman_acc = [], []
    man_norm_prob, man_acc = [], []
    for gender, categ in norm_prob_dict:
        norm_prob_list = norm_prob_dict[(gender, categ)]
        acc_list = acc_dict[(gender, categ)]
        if gender == 'woman':
            woman_norm_prob.extend(norm_prob_list)
            woman_acc.extend(acc_list)
        elif gender == 'man':
            man_norm_prob.extend(norm_prob_list)
            man_acc.extend(acc_list)
        all_norm_prob.extend(norm_prob_list)
        all_acc.extend(acc_list)

    check_stats(all_norm_prob, all_acc, woman_norm_prob, woman_acc, man_norm_prob, man_acc)
    print('\n')

In [6]:
def analyze_each_category(norm_prob_dict, acc_dict):
    for category in ['work', 'home', 'achiev']:
        all_norm_prob, all_acc = [], []
        woman_norm_prob, woman_acc = [], []
        man_norm_prob, man_acc = [], []
        for gender, categ in norm_prob_dict:
            if categ == category:
                norm_prob_list = norm_prob_dict[(gender, categ)]
                acc_list = acc_dict[(gender, categ)]
                if gender == 'woman':
                    woman_norm_prob.extend(norm_prob_list)
                    woman_acc.extend(acc_list)
                elif gender == 'man':
                    man_norm_prob.extend(norm_prob_list)
                    man_acc.extend(acc_list)
                all_norm_prob.extend(norm_prob_list)
                all_acc.extend(acc_list)
        print('STATS FOR CATEGORY:', category)
        check_stats(all_norm_prob, all_acc, woman_norm_prob, woman_acc, man_norm_prob, man_acc)
        print('\n')

In [7]:
def do_analysis(path):
    norm_prob_dict, acc_dict = aggregate_by_category(path)
    analyze_all_categories(norm_prob_dict, acc_dict)
    analyze_each_category(norm_prob_dict, acc_dict)

In [8]:
do_analysis(pp_results_path_512)

Average norm prob (all): 0.8787320129938979
Average accuracy (all): 0.9159219830600668
Average norm prob (woman): 0.8949972284450393
Average accuracy (woman): 0.9484967964514539
Average norm prob (man): 0.8712408945850076
Average accuracy (man): 0.9009193054136875
Woman vs. man norm prob: t-value 5.811127, p-value 0.000000
Woman vs. man acc: t-value 9.065170, p-value 0.000000


STATS FOR CATEGORY: work
Average norm prob (all): 0.8704407684070338
Average accuracy (all): 0.9090325685865555
Average norm prob (woman): 0.9007172788032978
Average accuracy (woman): 0.9540229885057471
Average norm prob (man): 0.8531943653558252
Average accuracy (man): 0.8834046839587005
Woman vs. man norm prob: t-value 8.119024, p-value 0.000000
Woman vs. man acc: t-value 9.386672, p-value 0.000000


STATS FOR CATEGORY: home
Average norm prob (all): 0.8464772299677028
Average accuracy (all): 0.8825025432349949
Average norm prob (woman): 0.8967378994974244
Average accuracy (woman): 0.9539007092198581
Average no

In [9]:
do_analysis(pp_results_path_512_pre)

Average norm prob (all): 0.8665188544053947
Average accuracy (all): 0.9152226280208252
Average norm prob (woman): 0.8329393333526545
Average accuracy (woman): 0.9063578117299163
Average norm prob (man): 0.8819842608782148
Average accuracy (man): 0.9193054136874361
Woman vs. man norm prob: t-value -12.441155, p-value 0.000000
Woman vs. man acc: t-value -2.450472, p-value 0.014280


STATS FOR CATEGORY: work
Average norm prob (all): 0.8563507131978702
Average accuracy (all): 0.9066260227819669
Average norm prob (woman): 0.8352751070704943
Average accuracy (woman): 0.9120247568523431
Average norm prob (man): 0.8683560068418198
Average accuracy (man): 0.903550742885923
Woman vs. man norm prob: t-value -5.866825, p-value 0.000000
Woman vs. man acc: t-value 1.105561, p-value 0.268959


STATS FOR CATEGORY: home
Average norm prob (all): 0.8343628068111351
Average accuracy (all): 0.8880976602238047
Average norm prob (woman): 0.8436210342344872
Average accuracy (woman): 0.9302600472813238
Average

In [10]:
do_analysis(pp_results_path_128_50)

Average norm prob (all): 0.9192061165908707
Average accuracy (all): 0.94375
Average norm prob (woman): 0.8934801567168337
Average accuracy (woman): 0.9314709733740725
Average norm prob (man): 0.9320494755081553
Average accuracy (man): 0.9498801481804314
Woman vs. man norm prob: t-value -8.013883, p-value 0.000000
Woman vs. man acc: t-value -3.125122, p-value 0.001785


STATS FOR CATEGORY: work
Average norm prob (all): 0.9076710670688862
Average accuracy (all): 0.9337529976019184
Average norm prob (woman): 0.8966398327198809
Average accuracy (woman): 0.9355083459787557
Average norm prob (man): 0.9148758078379592
Average accuracy (man): 0.9326065411298315
Woman vs. man norm prob: t-value -2.592786, p-value 0.009562
Woman vs. man acc: t-value 0.329345, p-value 0.741916


STATS FOR CATEGORY: home
Average norm prob (all): 0.8849581848122071
Average accuracy (all): 0.9190556492411467
Average norm prob (woman): 0.894963178235859
Average accuracy (woman): 0.9384615384615385
Average norm prob (

In [11]:
do_analysis(pp_results_path_128_50_pre)

Average norm prob (all): 0.8578040953448561
Average accuracy (all): 0.9142441860465116
Average norm prob (woman): 0.8272024470583231
Average accuracy (woman): 0.9061545176778699
Average norm prob (man): 0.8730815798130293
Average accuracy (man): 0.9182828502941818
Woman vs. man norm prob: t-value -8.744445, p-value 0.000000
Woman vs. man acc: t-value -1.693335, p-value 0.090437


STATS FOR CATEGORY: work
Average norm prob (all): 0.8427455954459625
Average accuracy (all): 0.9025779376498801
Average norm prob (woman): 0.8254675059907204
Average accuracy (woman): 0.909711684370258
Average norm prob (man): 0.8540302941089998
Average accuracy (man): 0.8979187314172448
Woman vs. man norm prob: t-value -3.747440, p-value 0.000182
Woman vs. man acc: t-value 1.122816, p-value 0.261596


STATS FOR CATEGORY: home
Average norm prob (all): 0.8241471064228391
Average accuracy (all): 0.8752107925801011
Average norm prob (woman): 0.8405575540043428
Average accuracy (woman): 0.9230769230769231
Average 