# Get adjectives related to male and female

In [1]:
import numpy as np
from itertools import combinations, filterfalse
from sklearn.metrics.pairwise import cosine_similarity
from gensim.models.keyedvectors import KeyedVectors
import pandas as pd
import random
import sys
import os
import pickle

import pandas as pd
from pandas.core import indexing
from gender_predictor.GenderClassifier import classify_gender
from collections import defaultdict
from tqdm.notebook import tqdm
import multiprocessing as mp
import json

In [2]:
movie_data = pd.read_csv('movie.metadata.tsv', sep='\t', skip_blank_lines=True, header=None, names=['id', 'free_id', 'movie_name', 'release_date', 'revenue', 'runtime', 'languages', 'countries', 'genres'])

In [3]:
character = pd.read_csv('character.metadata.tsv', sep='\t', skip_blank_lines=True, header=None, names=['id', 'free_id', 'release_date','char_name', 'dob', 'gender', 'height', 'ethnicity', 'name', 'age', 'free_char_id1', 'free_char_id2', 'free_char_id3'])
character = character[['id', 'char_name', 'gender']]

In [4]:
movie_data['release_year'] = movie_data['release_date'].apply(lambda r:r[:4] if str(r)!='nan' else None)

In [5]:
movie_id_by_year = {'United States of America':{}, 'India':{}}

for index, row in movie_data.iterrows():
    for key, value in json.loads(row['countries']).items():            
        if value == 'United States of America' or value == 'India':
            if row['release_year'] not in movie_id_by_year[value]:
                movie_id_by_year[value][row['release_year']] = [row.id]
            else:
                movie_id_by_year[value][row['release_year']].append(row.id)

In [6]:
count = [i for i in movie_id_by_year['India'].keys() if i is not None and i<'2000']

In [7]:
decade_2000 = []
for c in count:
    decade_2000 += movie_id_by_year['India'][c]

In [8]:
len(decade_2000)

4443

In [9]:
female_words = ['she', 'her', 'woman', 'women', 'ladies', 'girls', 'lady', 'aunt', 'grandmother', 'female', 'girl', 'damsel', 'maiden', 'daughter', 'sister', 'mother']
male_words   = ['he', 'his', 'man', 'male', 'men', 'boys', 'gentleman', 'uncle', 'grandfather', 'gentlemen', 'boy', 'bloke', 'brother', 'father']


def read_input_file(filename):
    data_df = pd.read_csv(filename,sep=',', skip_blank_lines=True, index_col= False)
    return data_df


def get_plots_by_movie_id(data_df):
    movie_ids = data_df.movie_id.unique() 
    grouped = data_df.groupby(data_df.movie_id)

    all_movie_plots = []
    for id in movie_ids:
        sents_df = grouped.get_group(id)
        all_movie_plots.append(sents_df)
    return all_movie_plots


def get_frequency_for_movie(movie):
    
    if movie.iloc[0]['movie_id'] not in decade_2000:
        return
    
    frequency_list = {'M':defaultdict(int), 'F':defaultdict(int)}
    name_data  = movie[((movie.dep_pos == 'NNP') & (movie.dep_ner == 'PERSON')) | (movie.dependent.isin(female_words)) | (movie.dependent.isin(male_words))]
    char_list  = character[character.id==movie.iloc[0]['movie_id']]
    
    gender_list = {}
    for idx,name in name_data.iterrows():            
        try:
            character_name = name['dependent'].lower()
            gender = None
            if character_name in gender_list:
                gender = gender_list[character_name]
            elif character_name in female_words:
                gender = 'F'
            elif character_name in male_words:
                gender = 'M'
            else:
                for ix, char in char_list.iterrows():
                    chk = str(char['char_name'])
                    if character_name in chk.lower():
                        gender = char['gender']
                        break
                    
                if gender is None:
                    gender = classify_gender(character_name)
                    
            gender_list[character_name] = gender
            governor = int(name['governor'])
            governor_df = movie[(movie['sentence_id']==name['sentence_id']) & (movie['token_id'] == governor) & (movie['dep_pos'].isin(['VB', 'VBP', 'VBZ', 'VBN']))]
            df2 = movie[(movie['sentence_id']==name['sentence_id']) & (movie['governor'] == name['token_id'])  & (movie['dep_pos'].isin(['VB', 'VBP', 'VBZ', 'VBN']))]
            df3 = movie[(movie['sentence_id'] == name['sentence_id']) & (movie['governor']==name['governor']) & (movie['dep_pos'].isin(['VB', 'VBP', 'VBZ', 'VBN']))]
            y = pd.concat([governor_df, df2, df3]).drop_duplicates()
            for i, x in y.iterrows():
                frequency_list[gender][x['dependent']] +=1
        except Exception as exc:
            pass
    print(movie.iloc[0]['movie_id'])
    print(frequency_list)
    return frequency_list
    
def get_name_and_adjective_mapping(all_movie_plots):
    frequency_list = {'M':defaultdict(int), 'F':defaultdict(int)}
    pool = mp.Pool(20)
    results = [pool.apply_async(get_frequency_for_movie, args=(movie,)) for movie in all_movie_plots]    
    output = [p.get() for p in results]
    return output


def get_adjective_cloud(filename):
    movie_data_df = read_input_file(filename)
    all_movie_plots = get_plots_by_movie_id(movie_data_df)
    name_adj_cluster_list = get_name_and_adjective_mapping(all_movie_plots)
    return name_adj_cluster_list



In [None]:
%time result = get_adjective_cloud('india_lemma.csv')

Accuracy: 0.968247
Accuracy: 0.971214
Accuracy: 0.970121Accuracy: 0.969340
Accuracy: 0.971162
Accuracy: 0.969497

Accuracy: 0.970486Accuracy: 0.969445

Accuracy: 0.968820Accuracy: 0.967623

Accuracy: 0.969393Accuracy: 0.966894

Accuracy: 0.96850832345990
Accuracy: 0.970642
Accuracy: 0.969236

Accuracy: 0.970329{'M': defaultdict(<class 'int'>, {'see': 2, 'decide': 1, 'do': 1, 'end': 1, 'approve': 1, 'know': 1}), 'F': defaultdict(<class 'int'>, {'turn': 1, 'spoil': 1, 'conceit': 1, 'attract': 1, 'complicate': 1, 'be': 1, 'see': 1, 'sing': 2})}

Accuracy: 0.970382
Accuracy: 0.970069
Accuracy: 0.969757
Accuracy: 0.968820
10594195
{'M': defaultdict(<class 'int'>, {'be': 2, 'reveal': 2, 'go': 2, 'have': 2, 'turn': 1, 'encounter': 1, 'accompany': 1, 'reconstruct': 1}), 'F': defaultdict(<class 'int'>, {'be': 3, 'live': 1, 'have': 1, 'call': 1, 'spoil': 1, 'accustom': 1, 'surround': 1, 'accompany': 1, 'recognize': 1, 'do': 1, 'come': 1, 'know': 1, 'reconstruct': 1})}
5181423
{'M': defaultdict(<

{'M': defaultdict(<class 'int'>, {'be': 3, 'send': 1, 'meet': 2, 'attract': 1, 'take': 1, 'start': 1, 'find': 1, 'get': 1, 'go': 1, 'play': 1, 'let': 1, 'know': 1}), 'F': defaultdict(<class 'int'>, {'be': 1, 'marry': 1})}
Accuracy: 0.967987Accuracy: 0.969393

Accuracy: 0.968091
Accuracy: 0.969705
Accuracy: 0.970277
Accuracy: 0.966530
Accuracy: 0.970382
Accuracy: 0.969288Accuracy: 0.968143

Accuracy: 0.970017
Accuracy: 0.969549
Accuracy: 0.967571
Accuracy: 0.969861
Accuracy: 0.969028
Accuracy: 0.970746
Accuracy: 0.969288
Accuracy: 0.966634
Accuracy: 0.96829915858518

{'M': defaultdict(<class 'int'>, {'be': 4, 'pay': 1, 'keep': 1, 'get': 1, 'turn': 1, 'agree': 2, 'play': 1, 'discover': 1, 'reveal': 1, 'find': 1, 'clear': 1}), 'F': defaultdict(<class 'int'>, {'leave': 1, 'be': 2, 'bring': 1, 'try': 2, 'get': 2, 'tell': 1, 'marry': 1, 'think': 1, 'reveal': 1, 'find': 1, 'clear': 1})}
Accuracy: 0.969080
7876902Accuracy: 0.968664

{'M': defaultdict(<class 'int'>, {'commit': 3, 'send': 1, 're

Accuracy: 0.968508
Accuracy: 0.968404
Accuracy: 0.970434
9633533
{'M': defaultdict(<class 'int'>, {'convict': 2, 'escape': 1, 'land': 1, 'send': 1, 'be': 4, 'follow': 1, 'meet': 2, 'struggle': 1, 'bear': 1, 'try': 3, 'report': 1, 'fail': 3, 'befriend': 1, 'find': 1, 'nab': 1, 'reach': 1, 'love': 1, 'convince': 1, 'give': 2, 'like': 1, 'ask': 1, 'join': 1, 'bring': 1, 'see': 1, 'order': 1, 'refuse': 1, 'take': 1, 'defuse': 1}), 'F': defaultdict(<class 'int'>, {'be': 3, 'accept': 1, 'reveal': 1, 'rape': 1, 'kill': 1, 'convict': 1, 'like': 1, 'ask': 1, 'join': 2, 'let': 1, 'bring': 1, 'see': 2, 'make': 1, 'give': 1, 'decide': 1})}
Accuracy: 0.968508
Accuracy: 0.970590
Accuracy: 0.967362
Accuracy: 0.968976
Accuracy: 0.97027730816196

{'M': defaultdict(<class 'int'>, {'be': 8, 'adopt': 1, 'give': 2, 'have': 1, 'frequent': 1, 'stop': 1, 'see': 1, 'decide': 1, 'propose': 1, 'disclose': 1, 'want': 3, 'reveal': 1, 'marry': 5, 'love': 2, 'meet': 2, 'say': 1, 'forget': 1, 'arrive': 1, 'learn': 1,

Accuracy: 0.969549
Accuracy: 0.969653
Accuracy: 0.971423
Accuracy: 0.967883
Accuracy: 0.970538
Accuracy: 0.969288Accuracy: 0.968872

Accuracy: 0.971683
26104950
{'M': defaultdict(<class 'int'>, {'meet': 1, 'cal': 1, 'arrive': 1}), 'F': defaultdict(<class 'int'>, {'want': 1, 'marry': 2, 'love': 1, 'belong': 1, 'meet': 1, 'find': 1, 'be': 1, 'demand': 1, 'insist': 1, 'pretend': 1, 'give': 1, 'start': 1, 'arrive': 1, 'feign': 1, 'disguise': 1})}
Accuracy: 0.970694
Accuracy: 0.969288
36151270
{'M': defaultdict(<class 'int'>, {'kill': 2, 'accumulate': 1, 'be': 1, 'end': 1, 'do': 3, 'meet': 1}), 'F': defaultdict(<class 'int'>, {'name': 1, 'protect': 1, 'accumulate': 1, 'arrest': 1, 'hold': 1, 'meet': 1, 'kill': 1})}
Accuracy: 0.971527
Accuracy: 0.967675
23758269
{'M': defaultdict(<class 'int'>, {}), 'F': defaultdict(<class 'int'>, {'play': 1})}
10599593
{'M': defaultdict(<class 'int'>, {'be': 3, 'die': 1, 'inherit': 2, 'give': 1, 'convince': 1, 'mend': 3, 'marry': 1, 'appear': 1, 'do': 1, 'i

{'M': defaultdict(<class 'int'>, {'give': 3, 'follow': 1, 'be': 5, 'want': 2, 'call': 1, 'go': 1, 'shoot': 1, 'stop': 1, 'take': 1, 'find': 1, 'impress': 1, 'kill': 3, 'build': 2, 'sell': 1, 'try': 1, 'show': 1, 'get': 2, 'know': 1, 'beat': 1, 'switch': 2, 'leave': 1}), 'F': defaultdict(<class 'int'>, {'be': 5, 'see': 1, 'impress': 2, 'notice': 2, 'do': 1, 'go': 1, 'contact': 2, 'get': 3, 'meet': 2, 'believe': 1, 'want': 1, 'find': 1, 'say': 1, 'give': 2, 'enter': 1, 'shout': 1})}
Accuracy: 0.968351
Accuracy: 0.969653
Accuracy: 0.969236
Accuracy: 0.969028
Accuracy: 0.970538
25697925
{'M': defaultdict(<class 'int'>, {'find': 2, 'marry': 1, 'expire': 1, 'stick': 1, 'support': 1, 'believe': 1, 'fall': 1, 'perform': 1}), 'F': defaultdict(<class 'int'>, {'marry': 1, 'abandon': 1, 'injure': 1, 'get': 1})}Accuracy: 0.969236

Accuracy: 0.971683
Accuracy: 0.970694
Accuracy: 0.969132
Accuracy: 0.970902
Accuracy: 0.971110
Accuracy: 0.969809
Accuracy: 0.968508
Accuracy: 0.969757
Accuracy: 0.968247

{'M': defaultdict(<class 'int'>, {'learn': 2, 'challenge': 1, 'convince': 1, 'belong': 1}), 'F': defaultdict(<class 'int'>, {'be': 2, 'learn': 1, 'marry': 2, 'impress': 1})}
Accuracy: 0.969809
Accuracy: 0.971475
Accuracy: 0.969393
Accuracy: 0.968768
Accuracy: 0.971371
Accuracy: 0.969653
Accuracy: 0.969497
Accuracy: 0.968299
Accuracy: 0.968560
Accuracy: 0.970225
18004274
{'M': defaultdict(<class 'int'>, {'want': 2, 'avenge': 1, 'retrieve': 1, 'set': 2, 'search': 1, 'narrate': 1, 'lure': 2, 'get': 1, 'fight': 1, 'return': 1}), 'F': defaultdict(<class 'int'>, {'send': 1, 'sacrifice': 1, 'capture': 1, 'feel': 2, 'betray': 2})}
21020059
{'M': defaultdict(<class 'int'>, {'marry': 1}), 'F': defaultdict(<class 'int'>, {'marry': 1, 'agree': 1})}Accuracy: 0.969809

Accuracy: 0.969757
Accuracy: 0.970746
Accuracy: 0.968404
Accuracy: 0.972099
4036425Accuracy: 0.970017

{'M': defaultdict(<class 'int'>, {'find': 2, 'win': 2, 'break': 1, 'be': 1, 'think': 1, 'manage': 1}), 'F': defaultdict(<class 'int

{'M': defaultdict(<class 'int'>, {'be': 4, 'hope': 1, 'want': 1, 'get': 2, 'settle': 1, 'appear': 1, 'attend': 1, 'hire': 1, 'go': 1, 'offer': 1, 'see': 1, 'notify': 1, 'rape': 1, 'release': 1, 'find': 1, 'commit': 1, 'set': 1, 'put': 1, 'meet': 1, 'frame': 1}), 'F': defaultdict(<class 'int'>, {'live': 1, 'marry': 1, 'be': 2, 'name': 1, 'see': 1, 'notify': 1, 'rape': 1})}
Accuracy: 0.969288
Accuracy: 0.969965
Accuracy: 0.971735
Accuracy: 0.969549
Accuracy: 0.969861
Accuracy: 0.969288
5627716
{'M': defaultdict(<class 'int'>, {'spoil': 1, 'try': 1, 'instill': 1, 'gain': 1, 'educate': 1, 'continue': 1, 'get': 2, 'meet': 1, 'cripple': 1, 'name': 1, 'befriend': 1, 'change': 2, 'stop': 1, 'slow': 1, 'lose': 1, 'become': 1, 'go': 1, 'excel': 1, 'decide': 1, 'leave': 1}), 'F': defaultdict(<class 'int'>, {'name': 1, 'try': 2, 'get': 1, 'go': 1, 'slow': 1, 'cause': 1, 'run': 1, 'be': 1})}
Accuracy: 0.967779
Accuracy: 0.972984
Accuracy: 0.971631
Accuracy: 0.967571
Accuracy: 0.970017
Accuracy: 0.9

{'M': defaultdict(<class 'int'>, {'come': 2, 'start': 1, 'meet': 1, 'be': 7, 'become': 1, 'live': 1, 'visit': 1, 'ask': 1, 'see': 1, 'decline': 1, 'find': 2, 'have': 2, 'accompany': 1, 'admit': 1, 'advise': 1, 'learn': 2}), 'F': defaultdict(<class 'int'>, {'appear': 1, 'grow': 1, 'visit': 2, 'ask': 1, 'accompany': 2, 'see': 1, 'do': 1, 'decline': 1, 'top': 1, 'get': 1, 'come': 1, 'advise': 1, 'be': 1})}

Accuracy: 0.969288
Accuracy: 0.967935
Accuracy: 0.967727Accuracy: 0.968091

Accuracy: 0.969809
Accuracy: 0.970746
Accuracy: 0.967883Accuracy: 0.970017

Accuracy: 0.971110
10139922
{'M': defaultdict(<class 'int'>, {'be': 1, 'call': 2, 'ask': 2, 'want': 1, 'live': 1, 'turn': 1, 'refuse': 1, 'entertain': 2, 'join': 1, 'lend': 1, 'go': 1, 'play': 1, 'meet': 1, 'come': 1, 'throw': 1}), 'F': defaultdict(<class 'int'>, {'go': 1, 'play': 1, 'meet': 1, 'be': 1, 'fall': 1})}
Accuracy: 0.969757
Accuracy: 0.968195
Accuracy: 0.970277
Accuracy: 0.971266
Accuracy: 0.969549
Accuracy: 0.970017
Accuracy

{'M': defaultdict(<class 'int'>, {'meet': 4, 'come': 2, 'owe': 1, 'want': 1, 'marry': 2, 'agree': 1, 'engage': 1, 'see': 2, 'name': 2, 'be': 4, 'help': 2, 'realize': 2, 'step': 2, 'rekindle': 1, 'find': 3, 'break': 2, 'have': 4, 'drag': 2, 'decide': 1, 'take': 2, 'defend': 1, 'curse': 1, 'seem': 1, 'call': 2, 'do': 1, 'escort': 2, 'head': 1, 'unite': 1}), 'F': defaultdict(<class 'int'>, {'visit': 1, 'accompany': 1, 'know': 1, 'do': 1, 'meet': 3, 'marry': 1, 'name': 1, 'shock': 2, 'be': 3, 'see': 1, 'say': 1, 'act': 1, 'have': 1, 'remember': 2, 'murder': 2, 'take': 1, 'hold': 1, 'call': 1, 'go': 1, 'set': 1, 'sit': 1, 'succumb': 1})}
Accuracy: 0.967727
Accuracy: 0.969080
Accuracy: 0.967519Accuracy: 0.971058

Accuracy: 0.968820
Accuracy: 0.970902
Accuracy: 0.967935
Accuracy: 0.970017
Accuracy: 0.969445
Accuracy: 0.967727
Accuracy: 0.971110
10641044
{'M': defaultdict(<class 'int'>, {'be': 1, 'fall': 1}), 'F': defaultdict(<class 'int'>, {})}Accuracy: 0.970642

Accuracy: 0.967779
29857599
{

{'M': defaultdict(<class 'int'>, {'work': 1, 'give': 1, 'torture': 3, 'learn': 2, 'be': 3}), 'F': defaultdict(<class 'int'>, {'be': 1, 'work': 1, 'get': 1})}
Accuracy: 0.968820
Accuracy: 0.970850
26373339
{'M': defaultdict(<class 'int'>, {'anger': 1, 'addict': 1, 'be': 1, 'secure': 1, 'refuse': 1, 'talk': 1, 'leave': 1, 'do': 1}), 'F': defaultdict(<class 'int'>, {'be': 8, 'witness': 1, 'kill': 1, 'rape': 2, 'impregnate': 1, 'relieve': 1, 'see': 4, 'have': 2, 'addict': 2, 'marry': 2, 'live': 1, 'decline': 1, 'try': 1, 'annoy': 1, 'feel': 1, 'pass': 1, 'end': 3, 'suspect': 1, 'stay': 1, 'ask': 1, 'go': 1, 'reconcile': 1, 'overjoy': 1, 'rush': 1, 'share': 1, 'shock': 1, 'surrender': 1})}Accuracy: 0.968560

Accuracy: 0.970434
Accuracy: 0.969340
Accuracy: 0.969861
Accuracy: 0.969965
Accuracy: 0.967987
Accuracy: 0.969080
Accuracy: 0.969913
Accuracy: 0.968091
Accuracy: 0.967779
Accuracy: 0.968612
Accuracy: 0.969913
Accuracy: 0.970017
Accuracy: 0.969601
Accuracy: 0.969445
Accuracy: 0.970329
Ac

{'M': defaultdict(<class 'int'>, {'make': 1, 'get': 1, 'save': 1, 'denounce': 1, 'face': 2, 'excommunicate': 1, 'dismiss': 1, 'arrive': 1, 'accept': 1, 'show': 1, 'be': 2}), 'F': defaultdict(<class 'int'>, {'be': 4, 'educate': 1, 'find': 2, 'make': 1, 'live': 1, 'show': 2, 'meet': 1, 'fell': 1, 'excommunicate': 2, 'save': 1, 'denounce': 1, 'answer': 1, 'arrive': 1, 'dismiss': 1, 'accept': 1})}
Accuracy: 0.967883
Accuracy: 0.967779
Accuracy: 0.970329
Accuracy: 0.970798
Accuracy: 0.969913
Accuracy: 0.969236
Accuracy: 0.971423
Accuracy: 0.967154
29153320
{'M': defaultdict(<class 'int'>, {'be': 2, 'dream': 1, 'involve': 1, 'have': 1, 'set': 1}), 'F': defaultdict(<class 'int'>, {'have': 2, 'live': 1, 'get': 1, 'move': 1, 'support': 1, 'meet': 1, 'start': 2, 'discover': 2, 'help': 1, 'be': 2, 'decide': 2, 'take': 1, 'educate': 2, 'marry': 1, 'resign': 1, 'return': 2, 'welcome': 1, 'come': 1, 'know': 1, 'set': 1, 'follow': 1})}
Accuracy: 0.968508
Accuracy: 0.968299
Accuracy: 0.967831
Accuracy

{'M': defaultdict(<class 'int'>, {'see': 1, 'marry': 3, 'put': 1, 'be': 4, 'turn': 1, 'approach': 1, 'pretend': 1, 'play': 1, 'promise': 1, 'make': 1, 'ask': 1, 'leave': 1, 'fall': 2, 'find': 1, 'recall': 1, 'rescue': 1, 'tell': 1}), 'F': defaultdict(<class 'int'>, {'marry': 1, 'commit': 1, 'believe': 1, 'turn': 1, 'give': 1, 'be': 3, 'become': 1, 'try': 1, 'wish': 1, 'persuade': 3, 'have': 1, 'make': 1, 'run': 1, 'let': 2, 'do': 1, 'fall': 1, 'recall': 1, 'humiliate': 1, 'throw': 1, 'need': 1, 'rescue': 1, 'tell': 1, 'save': 2, 'bear': 1, 'find': 1, 'accept': 1})}
Accuracy: 0.970434
Accuracy: 0.969549
Accuracy: 0.968247
Accuracy: 0.967831
14719302
{'M': defaultdict(<class 'int'>, {'marry': 1}), 'F': defaultdict(<class 'int'>, {})}
Accuracy: 0.970225
Accuracy: 0.970173
Accuracy: 0.968143
Accuracy: 0.970798
Accuracy: 0.969132
23688242
{'M': defaultdict(<class 'int'>, {'get': 1, 'abandon': 1, 'hold': 1}), 'F': defaultdict(<class 'int'>, {})}
Accuracy: 0.969445
Accuracy: 0.969809
Accuracy

Accuracy: 0.968612
Accuracy: 0.970277
Accuracy: 0.969340
Accuracy: 0.968195
10639481
{'M': defaultdict(<class 'int'>, {'introduce': 1, 'employ': 3, 'be': 3, 'meet': 1, 'find': 2}), 'F': defaultdict(<class 'int'>, {'be': 3, 'name': 2, 'get': 1, 'read': 1, 'fall': 1, 'meet': 1, 'have': 1, 'intend': 2, 'marry': 1})}
Accuracy: 0.969601
Accuracy: 0.969028Accuracy: 0.966998

Accuracy: 0.967675
Accuracy: 0.969705
Accuracy: 0.968820
Accuracy: 0.968612
Accuracy: 0.968299
Accuracy: 0.969809
Accuracy: 0.970642
Accuracy: 0.970538
Accuracy: 0.969445
Accuracy: 0.970329Accuracy: 0.967675

10640601
{'M': defaultdict(<class 'int'>, {'consider': 2, 'know': 1, 'be': 2, 'assign': 1, 'refuse': 1, 'attempt': 1, 'set': 1, 'kill': 1, 'lose': 1, 'contact': 1}), 'F': defaultdict(<class 'int'>, {'consider': 1, 'know': 1, 'meet': 1, 'fall': 1, 'refuse': 1, 'lose': 1, 'do': 1, 'find': 1, 'horrify': 1, 'contact': 1})}
Accuracy: 0.969601
Accuracy: 0.969861
Accuracy: 0.966946
Accuracy: 0.966686
Accuracy: 0.969393
281

{'M': defaultdict(<class 'int'>, {'reach': 1, 'encounter': 1, 'meet': 1, 'love': 1, 'marry': 1, 'redress': 1, 'settle': 1, 'appear': 1, 'derive': 1, 'follow': 1, 'become': 1, 'sacrifice': 1, 'believe': 2}), 'F': defaultdict(<class 'int'>, {'meet': 1, 'love': 1, 'hesitate': 1, 'become': 1, 'marry': 1, 'redress': 1, 'try': 2, 'lead': 1, 'remind': 1, 'be': 1, 'appear': 1, 'happen': 1})}Accuracy: 0.968091

Accuracy: 0.970434
Accuracy: 0.969132
Accuracy: 0.970538
Accuracy: 0.970017
Accuracy: 0.968299
Accuracy: 0.969288
Accuracy: 0.968299
Accuracy: 0.969965
Accuracy: 0.969028
Accuracy: 0.968768Accuracy: 0.968872

Accuracy: 0.969705
Accuracy: 0.970746
Accuracy: 0.969184
Accuracy: 0.968612
Accuracy: 0.970173
10637370
{'M': defaultdict(<class 'int'>, {'marry': 1, 'satisfy': 2, 'hire': 1, 'like': 2, 'stop': 2, 'forgive': 1}), 'F': defaultdict(<class 'int'>, {'bring': 1, 'go': 1, 'have': 1, 'participate': 1, 'return': 2, 'block': 1, 'hire': 1, 'pose': 1, 'follow': 1, 'number': 1, 'win': 2, 'leave

{'M': defaultdict(<class 'int'>, {'marry': 1, 'be': 4, 'have': 1, 'assign': 1, 'threaten': 1, 'protect': 1, 'begin': 1, 'wound': 1, 'hospitalize': 1, 'discover': 1}), 'F': defaultdict(<class 'int'>, {'play': 2, 'have': 1, 'present': 1, 'identify': 1, 'protect': 2, 'begin': 1, 'manage': 1, 'wring': 1})}
Accuracy: 0.970590
Accuracy: 0.969445
Accuracy: 0.969653
Accuracy: 0.967987
24240349
{'M': defaultdict(<class 'int'>, {}), 'F': defaultdict(<class 'int'>, {'be': 1, 'cope': 1, 'have': 1})}
Accuracy: 0.970694
Accuracy: 0.971787
Accuracy: 0.969028
Accuracy: 0.968976
Accuracy: 0.969340
Accuracy: 0.969549
Accuracy: 0.968195
33355085Accuracy: 0.971475

{'M': defaultdict(<class 'int'>, {'compel': 2, 'be': 3, 'marry': 2, 'require': 2, 'do': 1, 'leave': 2, 'torture': 1, 'escape': 1, 'get': 1, 'trap': 1, 'offer': 1, 'perform': 1, 'achieve': 1, 'expect': 1, 'lock': 1, 'know': 1, 'search': 1, 'find': 2, 'convince': 1, 'have': 3, 'take': 1, 'promise': 1, 'return': 1, 'tell': 1, 'save': 1, 'chase': 1

Accuracy: 0.969861
Accuracy: 0.969080
31465007
{'M': defaultdict(<class 'int'>, {'spare': 1, 'be': 2, 'suggest': 1, 'see': 1, 'begin': 1, 'compose': 1, 'walk': 1}), 'F': defaultdict(<class 'int'>, {'face': 2, 'reply': 1, 'arrive': 2, 'suggest': 2, 'do': 2, 'perform': 1, 'leave': 1, 'bring': 1, 'fight': 1, 'receive': 1, 'despatch': 2, 'be': 1, 'slay': 1, 'near': 1, 'debar': 3})}
Accuracy: 0.968039
Accuracy: 0.970434
Accuracy: 0.968247
Accuracy: 0.971006
Accuracy: 0.971162
Accuracy: 0.970486
9034157
{'M': defaultdict(<class 'int'>, {'take': 1, 'meet': 1, 'return': 1, 'tell': 1, 'be': 3, 'kill': 2, 'have': 1, 'assign': 1, 'alia': 1, 'set': 1}), 'F': defaultdict(<class 'int'>, {'live': 1})}
26617962
{'M': defaultdict(<class 'int'>, {'accuse': 3, 'be': 10, 'take': 3, 'want': 2, 'meet': 5, 'explain': 1, 'look': 1, 'have': 2, 'leave': 3, 'introduce': 3, 'pretend': 1, 'urge': 1, 'pursue': 1, 'refuse': 1, 'marry': 3, 'show': 2, 'go': 5, 'name': 1, 'arrange': 1, 'get': 4, 'tempt': 2, 'ask': 2, '

{'M': defaultdict(<class 'int'>, {'disapprove': 3, 'bless': 2, 'get': 2, 'have': 1, 'go': 1}), 'F': defaultdict(<class 'int'>, {'stay': 2, 'get': 1, 'adopt': 1, 'disapprove': 1})}
Accuracy: 0.968195
Accuracy: 0.968612
Accuracy: 0.971214
Accuracy: 0.968820
Accuracy: 0.970173
Accuracy: 0.969913
Accuracy: 0.969184
Accuracy: 0.969809
Accuracy: 0.970225
Accuracy: 0.970590
Accuracy: 0.971787
Accuracy: 0.969393
3188785112578919
{'M': defaultdict(<class 'int'>, {'name': 1, 'attempt': 2, 'arrange': 1, 'find': 1, 'disappear': 1, 'assist': 1, 'adopt': 2, 'give': 2, 'be': 2, 'kill': 1, 'get': 1, 'meet': 1, 'force': 2, 'leave': 2, 'join': 1}), 'F': defaultdict(<class 'int'>, {'have': 1, 'meet': 1, 'leave': 1})}
{'M': defaultdict(<class 'int'>, {'be': 7, 'separate': 1, 'travel': 1, 'look': 1, 'start': 1, 'discover': 1, 'take': 3, 'decide': 2, 'die': 1, 'reveal': 2, 'marry': 1, 'kill': 2, 'decease': 1, 'have': 3, 'murder': 1, 'rape': 1, 'prove': 2, 'render': 1, 'swear': 1, 'presume': 1, 'terrorize': 

{'M': defaultdict(<class 'int'>, {'name': 2, 'tell': 6, 'be': 8, 'serve': 1, 'have': 5, 'prove': 1, 'ask': 2, 'explain': 2, 'refuse': 2, 'start': 1, 's': 2, 'think': 1, 'want': 3, 'use': 1, 'get': 4, 'continue': 1, 'afford': 3, 'make': 1, 'die': 2, 'decide': 2, 'meet': 2, 'like': 1, 'invite': 1, 'announce': 1, 'go': 2, 'elope': 2, 'say': 1, 'take': 5, 'help': 1, 'attack': 2, 'kill': 2, 'promise': 3, 'marry': 5, 'give': 1, 'advice': 1, 'convince': 1, 'point': 1, 'disapprove': 1, 'regret': 1, 'belittle': 2, 'live': 1, 'disclose': 1, 'hurt': 1, 'realize': 2, 'turn': 2, 'catch': 2, 'throw': 1, 'avenge': 1}), 'F': defaultdict(<class 'int'>, {'be': 7, 'think': 3, 'use': 1, 'fear': 1, 'talk': 1, 's': 1, 'marry': 5, 'meet': 1, 'like': 2, 'urge': 1, 'ask': 1, 'invite': 2, 'announce': 1, 'swallow': 1, 'maintain': 1, 'tell': 3, 'elope': 1, 'do': 1, 'agree': 2, 'go': 2, 'regret': 1, 'say': 1, 'belittle': 1, 'make': 1, 'live': 1, 'disclose': 2, 'bring': 2, 'take': 2, 'realize': 1, 'get': 1, 'try': 

{'M': defaultdict(<class 'int'>, {'marry': 3, 'play': 2, 'spend': 1, 'give': 2, 'choose': 1, 'take': 1}), 'F': defaultdict(<class 'int'>, {'find': 2, 'marry': 2, 'play': 1, 'be': 1, 'console': 1, 'reunite': 1})}
Accuracy: 0.969549
27430676
{'M': defaultdict(<class 'int'>, {'be': 5, 'adjust': 1, 'have': 1, 'take': 1, 'enter': 1}), 'F': defaultdict(<class 'int'>, {'be': 2, 'adjust': 1, 'have': 1, 'leave': 1})}
Accuracy: 0.969080Accuracy: 0.969080

34252872
{'M': defaultdict(<class 'int'>, {'be': 2, 'think': 1, 'meet': 2, 'decide': 1, 'help': 1, 'lose': 1, 'have': 1}), 'F': defaultdict(<class 'int'>, {'be': 1, 'become': 1, 'see': 2, 'meet': 2, 'help': 1, 'imprison': 1, 'lose': 1})}
28492772
{'M': defaultdict(<class 'int'>, {'estrange': 2}), 'F': defaultdict(<class 'int'>, {})}2151087

{'M': defaultdict(<class 'int'>, {'travel': 1, 'fall': 1, 'seduce': 1, 'become': 1, 'make': 2, 'promise': 1, 'discover': 2, 'have': 2, 'decide': 1, 'flee': 1, 'tell': 2}), 'F': defaultdict(<class 'int'>, {'b

Accuracy: 0.969393
Accuracy: 0.970329
Accuracy: 0.970434
Accuracy: 0.971162
20259767
{'M': defaultdict(<class 'int'>, {'be': 1, 'get': 1, 'have': 1, 'take': 1, 'pose': 1}), 'F': defaultdict(<class 'int'>, {})}
Accuracy: 0.967310
Accuracy: 0.967987
13915114
{'M': defaultdict(<class 'int'>, {'be': 5, 'spend': 1, 'get': 2, 'have': 2, 'save': 1, 'kill': 1, 'arrive': 1, 'find': 1, 'decide': 1, 'involve': 1, 'join': 1, 'enrage': 1, 'invite': 1}), 'F': defaultdict(<class 'int'>, {'be': 2, 'get': 2, 'set': 1, 'make': 2})}
Accuracy: 0.968039
Accuracy: 0.969913Accuracy: 0.970121

Accuracy: 0.971787
Accuracy: 0.970382
Accuracy: 0.968768
Accuracy: 0.967623
Accuracy: 0.970173
Accuracy: 0.968039
Accuracy: 0.969132
Accuracy: 0.972203
Accuracy: 0.970902
Accuracy: 0.970798
34387127
{'M': defaultdict(<class 'int'>, {'be': 2, 'ring': 1, 'join': 1, 'fall': 1, 'provoke': 1, 'confront': 1, 'have': 1, 'sort': 1, 'tell': 1, 'beat': 1, 'attempt': 1, 'study': 1, 'assault': 1, 'render': 1, 'refuse': 1, 'side': 1

{'M': defaultdict(<class 'int'>, {'have': 2, 'become': 3, 'organize': 1, 'appoint': 1, 'pick': 1, 'settle': 1, 'leave': 1, 'ridicule': 1, 'be': 1, 'decide': 1, 'commit': 1, 'save': 2, 'tell': 1, 'marry': 2, 'succeed': 1, 'stop': 1, 'teach': 2, 'interview': 1, 'live': 1}), 'F': defaultdict(<class 'int'>, {'call': 3, 'pick': 1, 'be': 1, 'have': 1, 'do': 1, 'come': 1, 'help': 1, 'settle': 1, 'challenge': 1, 'accept': 1, 'leave': 1, 'save': 1, 'tell': 1, 'marry': 2, 'pass': 1, 'become': 1, 'get': 1, 'decide': 1, 'ridicule': 1, 'teach': 1, 'cancel': 1})}
Accuracy: 0.969549
Accuracy: 0.969445
Accuracy: 0.968404
Accuracy: 0.967154
Accuracy: 0.968404
Accuracy: 0.970590
Accuracy: 0.969184
Accuracy: 0.971683
Accuracy: 0.968508
Accuracy: 0.969184
Accuracy: 0.968716
7497572
{'M': defaultdict(<class 'int'>, {'be': 7, 'try': 1, 'humiliate': 2, 'engage': 1, 'get': 2, 'withdraw': 1, 'threaten': 1, 'act': 2, 'turn': 1, 'marry': 4, 'have': 1, 'promise': 1, 'help': 2, 'beat': 1, 'throw': 3, 'offer': 1, '

{'M': defaultdict(<class 'int'>, {'decide': 1, 'visit': 1, 'arrest': 1, 'be': 4, 'cry': 1, 'sentence': 1, 'return': 2, 'join': 1, 'make': 1, 'reward': 1, 'send': 1, 'move': 1}), 'F': defaultdict(<class 'int'>, {'rape': 1, 'have': 1, 'be': 1, 'kill': 2, 'decide': 1, 'avenge': 1, 'marry': 1})}
Accuracy: 0.968091
11729093
{'M': defaultdict(<class 'int'>, {}), 'F': defaultdict(<class 'int'>, {'meet': 2, 'accuse': 1, 'have': 1, 'be': 2, 'catch': 1, 'lead': 1})}
Accuracy: 0.969705
10638889
{'M': defaultdict(<class 'int'>, {'permit': 1, 'marry': 1, 'refuse': 1}), 'F': defaultdict(<class 'int'>, {'fall': 1, 'marry': 1, 'agree': 1, 'cure': 1})}
Accuracy: 0.969601
Accuracy: 0.969028
2098445
{'M': defaultdict(<class 'int'>, {'die': 1, 'admit': 1, 'be': 2, 'regain': 1, 'arrange': 1, 'have': 1, 'oppose': 2, 'find': 2, 'perform': 1, 'put': 1, 'feel': 1, 'kill': 2, 'confront': 2, 'confess': 2}), 'F': defaultdict(<class 'int'>, {'be': 3, 'overhear': 1, 'lose': 1, 'admit': 1})}
Accuracy: 0.969705
Accur

{'M': defaultdict(<class 'int'>, {'be': 1, 'stop': 1, 'happen': 1, 'watch': 1, 'terrify': 1, 'zero': 1, 'refuse': 1, 'help': 1, 'get': 1, 'make': 1, 'nab': 2, 'complete': 2}), 'F': defaultdict(<class 'int'>, {'stop': 1, 'lock': 1, 'die': 2, 'use': 2, 'refuse': 1, 'plead': 2, 'decide': 1, 'help': 1, 'get': 1})}
Accuracy: 0.970382
18901219
{'M': defaultdict(<class 'int'>, {'bear': 1, 'be': 1}), 'F': defaultdict(<class 'int'>, {'head': 1, 'be': 3, 'name': 1, 'want': 1, 'study': 2, 'allow': 2, 'do': 1, 'meet': 1, 'welcome': 1, 'permit': 1})}Accuracy: 0.968560

Accuracy: 0.969601
Accuracy: 0.971319
Accuracy: 0.970954
Accuracy: 0.970694Accuracy: 0.970538

26015028
{'M': defaultdict(<class 'int'>, {'kill': 1, 'be': 1, 'claim': 1, 'adopt': 1, 'plan': 1, 'destroy': 1}), 'F': defaultdict(<class 'int'>, {'wreck': 1, 'claim': 1, 'adopt': 1, 'destroy': 1, 'kidnap': 1, 'be': 1})}
23143530
{'M': defaultdict(<class 'int'>, {'be': 4, 'cherish': 1, 'spend': 1, 'become': 1, 'try': 1, 'fall': 1, 'decide':

Accuracy: 0.968508
2150352
{'M': defaultdict(<class 'int'>, {'see': 1, 'be': 1, 'accuse': 2, 'offer': 1, 'have': 1, 'feel': 1, 'humiliate': 1, 'reciprocate': 1, 'engage': 2, 'establish': 2, 'run': 1, 'unite': 1, 'advertise': 1, 'want': 1, 'kidnap': 2, 'come': 1, 'help': 1, 'find': 2, 'jump': 1, 'rescue': 3}), 'F': defaultdict(<class 'int'>, {'go': 1, 'see': 4, 'meet': 1, 'think': 1, 'be': 1, 'realize': 1, 'progress': 1, 'take': 1, 'stop': 1, 'challenge': 1, 'decide': 1, 'want': 1, 'kidnap': 1, 'rescue': 2})}
Accuracy: 0.969809
Accuracy: 0.970486
Accuracy: 0.971110
Accuracy: 0.970173
Accuracy: 0.969861
Accuracy: 0.970069
24478633Accuracy: 0.968664

{'M': defaultdict(<class 'int'>, {}), 'F': defaultdict(<class 'int'>, {})}Accuracy: 0.970590

22265300
{'M': defaultdict(<class 'int'>, {'live': 1, 'commit': 1, 'face': 1}), 'F': defaultdict(<class 'int'>, {'rape': 1, 'commit': 1, 'kill': 1, 'escape': 1})}
Accuracy: 0.969705
4457586
{'M': defaultdict(<class 'int'>, {'slay': 1, 'send': 3, 'tel

Accuracy: 0.969549
Accuracy: 0.970954
Accuracy: 0.968976
4569282
{'M': defaultdict(<class 'int'>, {'take': 1, 'catch': 1}), 'F': defaultdict(<class 'int'>, {'try': 1, 'catch': 1})}
Accuracy: 0.968612
Accuracy: 0.967206
Accuracy: 0.967831
Accuracy: 0.969288
Accuracy: 0.970277
Accuracy: 0.971214
Accuracy: 0.969236
Accuracy: 0.970486
Accuracy: 0.969393
Accuracy: 0.970902
Accuracy: 0.969132Accuracy: 0.969601

Accuracy: 0.970434
Accuracy: 0.968872
Accuracy: 0.968976
13075853
{'M': defaultdict(<class 'int'>, {'crawl': 1, 'call': 1, 'save': 2, 'be': 6, 'work': 1, 'have': 3, 'want': 2, 'build': 1, 'get': 4, 'ask': 4, 'go': 2, 'warn': 2, 'leave': 1, 'fall': 2, 'take': 3, 'realize': 2, 'change': 1, 'hand': 1, 'try': 1, 'kill': 3, 'kidnap': 1, 'rescue': 1, 'shoot': 2}), 'F': defaultdict(<class 'int'>, {'show': 1, 'be': 2, 'wound': 1, 'live': 1, 'kidnap': 1, 'rescue': 1, 'kill': 1})}
Accuracy: 0.967675
Accuracy: 0.969705
Accuracy: 0.970798
Accuracy: 0.970902Accuracy: 0.967779

Accuracy: 0.968664
A

{'M': defaultdict(<class 'int'>, {'remind': 2, 'have': 1, 'retire': 1, 'receive': 1, 'try': 1, 'seize': 1, 'shoot': 1, 'reach': 1, 'get': 2, 'inform': 2, 'decide': 1, 'avenge': 1, 'introduce': 1, 'agree': 1, 'provide': 1, 'felicitate': 1, 'blackmail': 1, 'finish': 2, 'kidnap': 1, 'offer': 1, 'save': 1}), 'F': defaultdict(<class 'int'>, {'break': 1, 'be': 3, 'reach': 1, 'shock': 1, 'react': 1, 'raise': 1, 'loose': 1, 'introduce': 3, 'alia': 1, 'kill': 3, 'agree': 1, 'help': 2, 'intrude': 1, 'target': 1, 'enrage': 1, 'send': 2, 'kidnap': 1, 'offer': 1, 'arrive': 1})}
Accuracy: 0.969809
Accuracy: 0.969965
Accuracy: 0.968456
10641282
{'M': defaultdict(<class 'int'>, {'stop': 1, 'meet': 4, 'reach': 1, 'marry': 1, 'get': 1, 'accompany': 2, 'see': 2, 'thrill': 1, 'be': 1, 'find': 1, 'recognize': 1}), 'F': defaultdict(<class 'int'>, {'meet': 2, 'see': 1, 'be': 7, 'find': 2, 'want': 2, 'recognize': 1, 'resemble': 2})}
Accuracy: 0.969705
Accuracy: 0.969809
Accuracy: 0.969393
Accuracy: 0.970486
A

{'M': defaultdict(<class 'int'>, {'have': 5, 'go': 2, 'encounter': 1, 'ask': 2, 'know': 1, 'be': 5, 'discover': 1, 'record': 1, 'see': 2, 'show': 1, 'commit': 1, 'arrest': 2, 'block': 1, 'kill': 1, 'realize': 2, 'start': 1, 'confess': 3, 'decide': 1, 'hire': 1, 'protest': 1, 'take': 1, 'send': 1, 'reach': 2, 'surprise': 1, 'suggest': 1, 'switch': 1}), 'F': defaultdict(<class 'int'>, {'encounter': 1, 'bring': 2, 'agree': 1, 'resist': 1, 'deny': 1, 'beat': 1, 'send': 1, 'get': 1, 'decide': 1, 'survive': 1, 'claim': 1, 'put': 1, 'come': 1, 'be': 1, 'see': 1})}{'M': defaultdict(<class 'int'>, {'find': 3, 'touch': 2, 'have': 4, 'start': 1, 'end': 1, 'fall': 2, 'engage': 1, 'become': 2, 'use': 1, 'be': 1}), 'F': defaultdict(<class 'int'>, {'substitute': 1, 'name': 2})}

14529906Accuracy: 0.967779

{'M': defaultdict(<class 'int'>, {'be': 4, 'have': 3, 'accuse': 2, 'sentence': 1, 'assure': 1, 'take': 1, 'come': 2, 'find': 2, 'adopt': 1, 'know': 1, 'fall': 1, 'diminish': 1, 'start': 1, 'accept'

Accuracy: 0.970902
Accuracy: 0.969340
11375668
{'M': defaultdict(<class 'int'>, {'live': 1, 'bring': 1, 'name': 1, 'take': 1, 'ask': 1, 'be': 4, 'leave': 1, 'kill': 1, 'know': 1, 'agree': 1, 'marry': 1, 'fulfill': 1, 'exchange': 1, 'follow': 1, 'hospitalize': 1, 'arrest': 1, 'sentence': 1, 'regain': 1, 'refuse': 1, 'press': 1, 'succumb': 1}), 'F': defaultdict(<class 'int'>, {'be': 6, 'spend': 1, 'molest': 1, 'decide': 1, 'marry': 3, 'know': 1, 'agree': 1, 'accept': 1, 'take': 1, 'arrest': 1, 'sentence': 1, 'regain': 1, 'find': 1, 'reach': 2, 'tell': 1, 'meet': 1, 'have': 1})}
518117
{'M': defaultdict(<class 'int'>, {'pray': 1, 'answer': 1, 'grow': 1, 'be': 5, 'send': 2, 'teach': 3, 'return': 1, 'fall': 1, 'want': 2, 'marry': 1, 'arrange': 1, 'attempt': 1, 'throw': 1, 'reject': 1, 'defeat': 1, 'sentence': 1, 'tell': 1, 'interfere': 1, 'remind': 1, 'wall': 1, 'owe': 2, 'stipulate': 1, 'know': 1}), 'F': defaultdict(<class 'int'>, {'grant': 2, 'ask': 1, 'marry': 1, 'attempt': 1, 'make': 2,

{'M': defaultdict(<class 'int'>, {'withdraw': 1, 'say': 3, 'inspire': 1, 'take': 2, 'need': 1, 'prop': 1, 'cook': 1, 'be': 1, 'visit': 1, 'treat': 1}), 'F': defaultdict(<class 'int'>, {'revolve': 1, 'work': 1, 'place': 1, 'be': 8, 'destroy': 2, 'support': 2, 'do': 1, 'want': 2, 'marry': 1, 'prop': 1, 'cook': 1, 'trap': 1, 'represent': 1, 'show': 1, 'wear': 4, 'survive': 1, 'have': 1, 'worry': 1, 'feed': 1, 'use': 2, 'suffer': 1, 'ask': 1, 'scream': 1, 'attenuate': 1, 'symbolize': 1, 'say': 1, 'run': 1, 'treat': 2, 'take': 1})}
Accuracy: 0.969965Accuracy: 0.970642

Accuracy: 0.971527
Accuracy: 0.968664Accuracy: 0.969913

Accuracy: 0.966894
Accuracy: 0.969601
Accuracy: 0.969288
Accuracy: 0.968404
19286405
{'M': defaultdict(<class 'int'>, {'be': 1}), 'F': defaultdict(<class 'int'>, {})}
Accuracy: 0.969861
Accuracy: 0.970225
Accuracy: 0.969913
Accuracy: 0.969653
11257726
{'M': defaultdict(<class 'int'>, {'be': 3, 'die': 1, 'have': 2, 'jail': 1, 'commit': 2, 'set': 1, 'gather': 1, 'reopen':

{'M': defaultdict(<class 'int'>, {'be': 1, 'travel': 1, 'unravel': 1, 'return': 1, 'undertake': 1, 'bring': 1, 'identify': 1}), 'F': defaultdict(<class 'int'>, {'pass': 1, 'wake': 1, 'be': 15, 'state': 1, 'marry': 1, 'see': 1, 'try': 2, 'disappear': 1, 'kill': 2, 'recall': 1, 'undertake': 1, 'name': 1, 'identify': 1, 'recollect': 1})}
4884523
{'M': defaultdict(<class 'int'>, {'be': 7, 'name': 2, 'take': 1, 'start': 1, 'entertain': 1, 'misunderstand': 1, 'realize': 4, 'fall': 2, 'have': 2, 'try': 7, 'die': 1, 'give': 2, 'confide': 1, 'shock': 2, 'marry': 1, 'get': 1, 'reveal': 2, 'leave': 3, 'stay': 1, 'arrive': 1, 'tell': 2, 'love': 2, 'do': 1, 'accuse': 1, 'confront': 3, 'convince': 1, 'hit': 2, 'walk': 2, 'save': 1, 'cheat': 1, 'awaken': 1, 'testify': 1, 'hide': 1, 'await': 1, 'find': 1, 'write': 2, 'come': 1}), 'F': defaultdict(<class 'int'>, {'be': 4, 'entertain': 1, 'realize': 1, 'tell': 5, 'leave': 2, 'devastate': 1, 'engage': 2, 'marry': 1, 'get': 1, 'reveal': 1, 'shock': 1, 'th

{'M': defaultdict(<class 'int'>, {'temper': 1, 'be': 6, 'get': 1, 'turn': 1, 'see': 2, 'manage': 2, 'come': 1, 'reciprocate': 2, 'save': 1, 'have': 6, 'believe': 1, 'fall': 1, 'kill': 1, 'refuse': 1, 'tell': 3, 'hand': 1, 'recognize': 1, 'escape': 1, 'run': 3, 'ask': 2, 'hide': 1, 'realize': 1, 'specify': 1, 'enter': 1, 'find': 1, 'call': 1, 'make': 1, 'reveal': 1, 'say': 1, 'stumble': 1, 'shock': 1, 'kidnap': 1, 'chase': 1, 'start': 1}), 'F': defaultdict(<class 'int'>, {'have': 2, 'arrive': 2, 'go': 2, 'meet': 1, 'put': 1, 'talk': 1, 'attack': 1, 'be': 5, 'save': 3, 'take': 3, 'get': 1, 'seem': 1, 'warn': 2, 'reveal': 1, 'plan': 1, 'feign': 1, 'fall': 2, 'stage': 2, 's': 1, 'reciprocate': 1, 'threaten': 1, 'commit': 1, 'see': 1, 'believe': 1, 'slip': 1, 'find': 2, 'happen': 1, 'strangle': 1, 'call': 1, 'kidnap': 1, 'tell': 1, 'oust': 1})}
Accuracy: 0.970538
Accuracy: 0.969809
Accuracy: 0.96996510640970

{'M': defaultdict(<class 'int'>, {'be': 1, 'go': 1, 'meet': 1, 'hide': 1, 'have': 

Accuracy: 0.970486
Accuracy: 0.968872Accuracy: 0.967727

Accuracy: 0.968351
12908588
{'M': defaultdict(<class 'int'>, {'name': 2, 'be': 3, 'use': 2, 'handicap': 2, 's': 1, 'leave': 1, 'arrange': 1, 'appoint': 1, 'take': 2, 'arrive': 1, 'have': 1, 'start': 1}), 'F': defaultdict(<class 'int'>, {'come': 1, 'live': 1, 'develop': 1, 'fall': 1, 'be': 3, 'reveal': 1, 'name': 1})}
Accuracy: 0.970329
Accuracy: 0.969028
Accuracy: 0.969393
Accuracy: 0.967779
Accuracy: 0.969653
Accuracy: 0.970225
Accuracy: 0.971006
Accuracy: 0.969549
Accuracy: 0.969445
Accuracy: 0.971006
Accuracy: 0.971371
Accuracy: 0.970329
26759151
{'M': defaultdict(<class 'int'>, {'kill': 2}), 'F': defaultdict(<class 'int'>, {'be': 3, 'grow': 1, 'get': 1})}
Accuracy: 0.969913
29016432
{'M': defaultdict(<class 'int'>, {}), 'F': defaultdict(<class 'int'>, {'have': 2, 'find': 1, 'marry': 1, 'be': 2, 'seek': 1, 'commit': 1, 'prefer': 2, 'end': 1, 'help': 1})}Accuracy: 0.969549

Accuracy: 0.969653
Accuracy: 0.971214
Accuracy: 0.9687

Accuracy: 0.967831
Accuracy: 0.969601
Accuracy: 0.967727
Accuracy: 0.969028
Accuracy: 0.968247
Accuracy: 0.968091
11217842
Accuracy: 0.971162{'M': defaultdict(<class 'int'>, {'re-marry': 1, 'respect': 1, 'do': 2, 'refuse': 1, 'meet': 1, 'break': 1, 'take': 3, 'start': 1, 'name': 1, 'see': 1, 'inform': 1, 'leave': 4, 'allow': 1, 'be': 3, 'decide': 1, 'arrest': 1, 'ask': 2, 'sell': 1, 'immigrate': 1, 'hear': 1, 'turn': 1, 'find': 1}), 'F': defaultdict(<class 'int'>, {'widow': 1, 'be': 2, 're-marry': 1, 'respect': 1, 'do': 2, 'refuse': 1, 'inform': 1, 'pack': 1, 'go': 1, 'sell': 1, 'immigrate': 1, 'tell': 1})}

Accuracy: 0.969028
Accuracy: 0.969288
Accuracy: 0.968872
Accuracy: 0.971995
Accuracy: 0.970590
Accuracy: 0.969809
Accuracy: 0.972255
Accuracy: 0.970069
Accuracy: 0.967310
17030343
{'M': defaultdict(<class 'int'>, {'be': 3, 'harass': 1, 'stop': 2, 'kill': 1, 'have': 1, 'enrage': 1, 'embark': 2}), 'F': defaultdict(<class 'int'>, {'show': 1})}
Accuracy: 0.969288
Accuracy: 0.968820
Acc

{'M': defaultdict(<class 'int'>, {'name': 1, 'complete': 1, 'have': 4, 'indulge': 1, 'marry': 2, 'be': 5, 'want': 2, 'annul': 2, 'fall': 1, 'do': 1, 'meet': 2, 'decide': 1, 'agree': 1, 'seem': 1, 'start': 2, 'feel': 1, 'turn': 2, 'arrive': 2, 'confess': 1, 'change': 1, 'continue': 1, 'pursue': 1, 'leave': 1, 'care': 1, 'follow': 1, 'attempt': 3, 'escape': 1, 'kill': 1, 'fire': 1, 'reach': 1, 'lure': 1, 'confront': 1, 'accuse': 1, 'pierce': 1, 'attack': 2, 'take': 1, 'treat': 1, 'deliver': 2}), 'F': defaultdict(<class 'int'>, {'be': 3, 'quit': 1, 'marry': 1, 'refuse': 1, 'hear': 2, 'have': 1, 'reconsider': 1, 'ask': 1, 'provide': 1, 'go': 2, 'meet': 2, 'look': 1, 'find': 1, 'get': 1, 'support': 1, 'turn': 1, 'disagree': 1, 'shatter': 1, 'pursue': 1, 'show': 1, 'care': 1, 'learn': 1, 'accuse': 1, 'run': 1, 'slip': 1, 'take': 1, 'deliver': 1})}
25802262
{'M': defaultdict(<class 'int'>, {}), 'F': defaultdict(<class 'int'>, {})}
Accuracy: 0.968976
Accuracy: 0.968612
Accuracy: 0.968716
Accur

{'M': defaultdict(<class 'int'>, {}), 'F': defaultdict(<class 'int'>, {'have': 1})}
10776705
{'M': defaultdict(<class 'int'>, {'widow': 1, 'have': 1, 'live': 1, 'be': 1, 'ask': 2, 'say': 2, 'want': 2, 'see': 2, 'go': 1, 'get': 1, 'return': 1, 'tell': 1, 'marry': 2, 'come': 5, 'refuse': 1, 'miss': 1, 'visit': 2, 'do': 1, 'accompany': 1, 'promise': 1}), 'F': defaultdict(<class 'int'>, {'have': 3, 'marry': 4, 'live': 1, 'ask': 1, 'say': 1, 'see': 2, 'agree': 1, 'take': 1, 'educate': 1, 'understand': 1, 'come': 2, 'refuse': 3, 'handle': 1, 'stay': 1, 'leave': 3, 'head': 1, 'start': 1, 'miss': 1, 'realize': 2, 'tell': 2, 'want': 3, 'go': 2, 'excel': 1, 'bruise': 1, 'accompany': 1, 'write': 1, 'do': 1, 'get': 1, 'be': 1})}
Accuracy: 0.969601
Accuracy: 0.969184
Accuracy: 0.969861
Accuracy: 0.971058
15041008

In [24]:
x1 = result

In [16]:
## Save frequency
import pickle
output = open('india_before_2000_adj.pkl', 'wb')
pickle.dump(frequency_list, output)
output.close()

In [27]:
import pickle
pkl_file = open('india_before_2000.pkl', 'rb')

frequency_list = pickle.load(pkl_file)


In [12]:
# Combine frequency from different results
frequency_list = {'M':defaultdict(int),'F':defaultdict(int)}

for freq in result:
    if freq is None:
        continue
    for k, v in freq['M'].items():
        frequency_list['M'][k]+=v
    for k, v in freq['F'].items():
        frequency_list['F'][k]+=v  

# Calculate Odds Ratio

In [13]:
odds_ratio = {}
threshold  = 2
topk       = 50

total_num_f = sum(frequency_list['F'].values())
total_num_m = sum(frequency_list['M'].values())

for key in frequency_list['F'].keys():
    m_num = frequency_list['M'][key]
    f_num = frequency_list['F'][key]
    non_f_num = total_num_f - f_num
    non_m_num = total_num_m - m_num
    if f_num >= threshold and m_num >= threshold:
        # we only consider the events where there are at least {thresohld} occurences for both gender
        odds_ratio[key] = round((m_num / f_num) / (non_m_num / non_f_num), 2)
    else:
        continue

In [14]:
from operator import itemgetter

top_m = dict(sorted(odds_ratio.items(), key=itemgetter(1), reverse=True)[:topk])
top_f = dict(sorted(odds_ratio.items(), key=itemgetter(1))[:topk])

### Before 2000 - India

In [15]:
top_m, top_f

({'male': 5.98,
  'major': 5.15,
  'proud': 3.76,
  'honest': 3.66,
  'powerful': 3.48,
  'look-alike': 3.31,
  'close': 3.27,
  'alcoholic': 3.1,
  'criminal': 2.88,
  'guilty': 2.72,
  'terrorist': 2.65,
  'entire': 2.65,
  'eccentric': 2.65,
  'disappointed': 2.65,
  'future': 2.65,
  'few': 2.65,
  'late': 2.59,
  'maternal': 2.51,
  'blind': 2.42,
  'unemployed': 2.39,
  'native': 2.32,
  'confident': 2.32,
  'suspicious': 2.32,
  'full': 2.32,
  'gifted': 2.32,
  'superior': 2.21,
  'secret': 2.21,
  'disabled': 2.21,
  'rival': 2.21,
  'evil': 2.05,
  'first': 2.03,
  'responsible': 1.99,
  'dark': 1.99,
  'long-lost': 1.99,
  'shy': 1.99,
  'romantic': 1.99,
  'chief': 1.99,
  'black': 1.99,
  'principal': 1.99,
  'public': 1.99,
  'rid': 1.99,
  'successful': 1.95,
  'wealthy': 1.89,
  'frustrated': 1.86,
  'bad': 1.82,
  'holy': 1.82,
  'willing': 1.77,
  'deceased': 1.77,
  'injured': 1.77,
  'good': 1.74},
 {'female': 0.05,
  'beautiful': 0.09,
  'lovely': 0.12,
  'tribal':

### After 2000 - India

In [26]:
top_m, top_f

({'senior': 5.49,
  'defeat': 5.17,
  'shoot': 4.6,
  'provide': 4.52,
  'encounter': 3.87,
  'successful': 3.87,
  'succeed': 3.55,
  'capture': 3.55,
  'set': 3.52,
  'suspect': 3.44,
  'finish': 3.44,
  'old': 3.06,
  'transform': 3.01,
  'prove': 2.91,
  'overcome': 2.9,
  'track': 2.9,
  'greet': 2.9,
  'perceive': 2.9,
  'terrorist': 2.8,
  'ready': 2.8,
  'arrest': 2.76,
  'transfer': 2.74,
  'eliminate': 2.71,
  'visit': 2.66,
  'responsible': 2.58,
  'remove': 2.58,
  'drunk': 2.58,
  'sure': 2.58,
  'pick': 2.58,
  'entire': 2.58,
  'good': 2.49,
  'avenge': 2.42,
  'separate': 2.42,
  'injure': 2.42,
  'due': 2.42,
  'wrong': 2.37,
  'fight': 2.37,
  'last': 2.32,
  'continue': 2.3,
  'beat': 2.28,
  'create': 2.26,
  'local': 2.26,
  'free': 2.26,
  'demand': 2.26,
  'unknown': 2.26,
  'study': 2.26,
  'develop': 2.19,
  'earn': 2.15,
  'trust': 2.15,
  'insult': 2.1},
 {'beautiful': 0.09,
  'female': 0.11,
  'remind': 0.14,
  'surprise': 0.16,
  'represent': 0.21,
  'pregn

# Weat Score Calculation

In [21]:
def swAB(W, A, B):
    """Calculates differential cosine-similarity between word vectors in W, A and W, B
     Arguments
              W, A, B : n x d matrix of word embeddings stored row wise
    """
    WA = cosine_similarity(W,A)
    WB = cosine_similarity(W,B)

    #Take mean along columns
    WAmean = np.mean(WA, axis = 1)
    WBmean = np.mean(WB, axis = 1)

    return (WAmean - WBmean)
  
def test_statistic(X, Y, A, B):
    """Calculates test-statistic between the pair of association words and target words
     Arguments
              X, Y, A, B : n x d matrix of word embeddings stored row wise
     Returns
              Test Statistic
    """
    return (sum(swAB(X, A, B)) - sum(swAB(Y, A, B)))

In [23]:
def weat_effect_size(X, Y, A, B, embd):
    """Computes the effect size for the given list of association and target word pairs
     Arguments
              X, Y : List of association words
              A, B : List of target words
              embd : Dictonary of word-to-embedding for all words
     Returns
              Effect Size
    """

    Xmat = np.array([embd[w.lower()] for w in X if w.lower() in embd])
    Ymat = np.array([embd[w.lower()] for w in Y if w.lower() in embd])
    Amat = np.array([embd[w.lower()] for w in A if w.lower() in embd])
    Bmat = np.array([embd[w.lower()] for w in B if w.lower() in embd])

    XuY = list(set(X).union(Y))
    XuYmat = []
    for w in XuY:
        if w.lower() in embd:
            XuYmat.append(embd[w.lower()])
    XuYmat = np.array(XuYmat)


    d = (np.mean(swAB(Xmat,Amat,Bmat)) - np.mean(swAB(Ymat,Amat,Bmat)))/np.std(swAB(XuYmat, Amat, Bmat))

    return d

In [28]:
def random_permutation(iterable, r=None):
    """Returns a random permutation for any iterable object"""
    pool = tuple(iterable)
    r = len(pool) if r is None else r
    return tuple(random.sample(pool, r))

def weat_p_value(X, Y, A, B, embd, sample = 1000):
    """Computes the one-sided P value for the given list of association and target word pairs
     Arguments
              X, Y : List of association words
              A, B : List of target words
              embd : Dictonary of word-to-embedding for all words
              sample : Number of random permutations used.
     Returns
    """
    size_of_permutation = min(len(X), len(Y))
    X_Y = X + Y
    test_stats_over_permutation = []

    Xmat = np.array([embd[w.lower()] for w in X if w.lower() in embd])
    Ymat = np.array([embd[w.lower()] for w in Y if w.lower() in embd])
    Amat = np.array([embd[w.lower()] for w in A if w.lower() in embd])
    Bmat = np.array([embd[w.lower()] for w in B if w.lower() in embd])

    if not sample:
        permutations = combinations(X_Y, size_of_permutation)
    else:
        permutations = [random_permutation(X_Y, size_of_permutation) for s in range(sample)]

    for Xi in permutations:
        Yi = filterfalse(lambda w:w in Xi, X_Y)
        Ximat = np.array([embd[w.lower()] for w in Xi if w.lower() in embd])
        Yimat = np.array([embd[w.lower()] for w in Yi if w.lower() in embd])
        test_stats_over_permutation.append(test_statistic(Ximat, Yimat, Amat, Bmat))

    unperturbed = test_statistic(Xmat, Ymat, Amat, Bmat)
  
    is_over = np.array([o > unperturbed for o in test_stats_over_permutation])
  
    return is_over.sum() / is_over.size

In [34]:
"""List of association and target word pairs for the sample test (top_m, top_f) vs (Pleasant, Unpleasant)"""


X = list(top_m.keys())

Y = list(top_f.keys())

# Find better pleasant and unpleasant words

A = ["caress", "freedom", "health", "love", "peace", "cheer", "friend", "heaven", "loyal", "pleasure", "diamond", "gentle", "honest", "lucky", "rainbow", "diploma", "gift", "honor", "miracle", "sunrise", "family",
    "happy", "laughter", "paradise", "vacation"] 

B = ["abuse", "crash", "filth", "murder", "sickness", "accident", "death", "grief", "poison", "stink", "assault", "disaster", "hatred", "pollute", "tragedy", "divorce", "jail", "poverty", "ugly", "cancer", "kill", "rotten",
    "vomit", "agony", "prison"] 


resourceFile = ''
glove = KeyedVectors.load_word2vec_format('embeddings/GoogleNews-vectors-negative300.bin', binary=True)
print('The glove embedding has been loaded!')

"""Compute the effect-size and P value"""
print('WEAT d = ', weat_effect_size(X, Y, A, B, glove))
print('WEAT p = ', weat_p_value(X, Y, A, B, glove, 1000))

The glove embedding has been loaded!
WEAT d =  -0.3398966
WEAT p =  0.969


In [36]:
import numpy as np
from gensim.models import KeyedVectors
import gensim
import random
import sys
import glob
import os
import json
from gensim.models import Word2Vec
from scipy import stats
import sys
import math

def word_assoc(w,A,B,embedding):
    """
    Calculates difference in mean cosine similarity between a word and two sets
    of words.
    """
    return embedding.n_similarity([w],A) - embedding.n_similarity([w],B)

def diff_assoc(X,Y,A,B,embedding):
    """
    Caclulates the WEAT test statics for four sets of words in an embeddings
    """
    word_assoc_X = np.array(list(map(lambda x : word_assoc(x,A,B,embedding), X)))
    word_assoc_Y = np.array(list(map(lambda y : word_assoc(y,A,B,embedding), Y)))
    mean_diff = np.mean(word_assoc_X) - np.mean(word_assoc_Y)
    std = np.std(np.concatenate((word_assoc_X, word_assoc_Y), axis=0))
    return mean_diff / std

def get_bias_scores_mean_err(word_pairs,embedding):
    """
    Caculate the mean WEAT statistic and standard error using a permutation test
    on the sets of words (defaults to 100 samples)
    """
    # divide smaller word_list by two
    subset_size_target = min(len(word_pairs['X']),len(word_pairs['Y']))//2
    subset_size_attr = min(len(word_pairs['A']),len(word_pairs['B']))//2
    bias_scores = []
    for i in range(100):
        sX = np.random.choice(word_pairs['X'],subset_size_target,replace=False)
        sY = np.random.choice(word_pairs['Y'],subset_size_target,replace=False)
        sA = np.random.choice(word_pairs['A'],subset_size_attr,replace=False)
        sB = np.random.choice(word_pairs['B'],subset_size_attr,replace=False)
        bias_scores.append(diff_assoc(sX,sY,sA,sB,embedding))
    return np.mean(bias_scores), stats.sem(bias_scores)


def run_test(config, embedding):
    word_pairs = {}
    min_len = sys.maxsize
    # Only include words that are present in the word embedding
    for word_list_name, word_list in config.items():
        if word_list_name in ['X', 'Y', 'A', 'B']:
            word_list_filtered = list(filter(lambda x: x in embedding and np.count_nonzero(embedding[x]) > 0, word_list))
            word_pairs[word_list_name] = word_list_filtered
            if len(word_list_filtered) < 2:
                print('ERROR: Words from list {} not found in embedding\n {}'.\
                format(word_list_name, word_list))
                print('All word groups must contain at least two words')
                return None, None
    return get_bias_scores_mean_err(word_pairs,embedding)

def load_embedding(embed_path):
    if embed_path.endswith('wv'):
        return KeyedVectors.load(embed_path)
    elif embed_path.endswith('txt'):
        return KeyedVectors.load_word2vec_format(embed_path, binary=False)
    elif embed_path.endswith('bin'):
        return KeyedVectors.load_word2vec_format(embed_path, binary=True)
    # NOTE reddit embedding is saved as model (no ext) + syn1neg + syn0
    else:
        return Word2Vec.load(embed_path)

In [None]:
results = {}
embedding = load_embedding('embeddings/GoogleNews-vectors-negative300.bin')
        except:
            print('could not load embedding {}'.format(e_name))
            continue;
        for name_of_test, test_config in config['tests'].items():
            mean, err = run_test(test_config, embedding)
            print('mean: {} err: {}'.format(mean, err))
            if mean is not None:
                results[e_name][name_of_test] = (round(mean, 4), round(err,4))
    else:
        print('loading time series embeddings...')
        for time, embed_path in e.items():
            results[e_name][time] = {}
            embedding = load_embedding(embed_path)
            for name_of_test, test_config in config['tests'].items():
                print(name_of_test)
                mean, err = run_test(test_config, embedding)
                print('mean: {} err: {}'.format(mean, err))
                if mean is not None:
                    results[e_name][time][name_of_test] = (round(mean, 4), round(err,4))
                    
                    
    