# Load pre-trained UMBC vectors

In [2]:
import numpy as np
from gensim.models.keyedvectors import KeyedVectors

In [5]:
w2v = 'embeddings/1A_en_UMBC_tokenized.vectors.txt'
model = KeyedVectors.load_word2vec_format(w2v, binary=False)
# pre-compute L2 norms of vectors\\\\
model.init_sims(replace=True)

In [None]:
len(model.index2word)

# embeddings size is 219523 terms

In [54]:
model.most_similar(u'florida', topn=20)

[(u'sarasota', 0.7943328619003296),
 (u'dade', 0.7730554342269897),
 (u'alabama', 0.7612541913986206),
 (u'gainesville', 0.7478644847869873),
 (u'louisiana', 0.738013505935669),
 (u'miami', 0.7371755242347717),
 (u'carolina', 0.7347331047058105),
 (u'north_carolina', 0.7287717461585999),
 (u'pensacola', 0.7225453853607178),
 (u'maryland', 0.7108623385429382),
 (u'texas', 0.7063707113265991),
 (u'lauderdale', 0.7022767663002014),
 (u'ohio', 0.6949695944786072),
 (u'new_jersey', 0.6765778660774231),
 (u'virginia', 0.6628003120422363),
 (u'tennessee', 0.6603942513465881),
 (u'california', 0.6600708961486816),
 (u'georgia', 0.6539464592933655),
 (u'missouri', 0.6458120942115784),
 (u'hillsborough', 0.6452142000198364)]

In [None]:
# no word is capitalised in model vocab
assert len(list(filter(lambda k: k.istitle(), model.vocab.keys()))) == 0


## Load training, test data

In [91]:
#import codecs
import io
import os
import unicodecsv as csv
from collections import defaultdict

def read_subsumptions(filenames):
    hypo, hyper = filenames
        
    data_list, gold_list, subsumptions = [], [], []
    
    # load data items
    with open(hypo, mode='r') as f:        
        reader = csv.reader(f, delimiter='\t', quoting=csv.QUOTE_NONE, encoding='utf-8')
        for row in reader:
            data_list.append(row[0])
            
    with io.open(hyper, mode= 'r') as f:        
        reader = csv.reader(f, delimiter='\t', quoting=csv.QUOTE_NONE, encoding='utf-8')
        for row in reader:            
            gold_list.append(row)
      
    # make sure we have the same number of elements in each list
    assert len(data_list) == len(gold_list)
    
    for data_item, gold_terms in zip(data_list, gold_list):
        for gold_item in gold_terms:
            data_item = data_item.replace(" ", "_").lower()
            gold_item = gold_item.replace(" ", "_").lower()
            subsumptions.append((data_item, gold_item))
    
    return subsumptions

def read_vocab(filename):
        
    vocab = []    
    # load data items
    with open(filename, mode='r') as f:        
        reader = csv.reader(f, delimiter='\t', quoting=csv.QUOTE_NONE, encoding='utf-8')
        for row in reader:
            vocab_item = row[0].replace(" ", "_").lower()
            vocab.append(vocab_item)
                              
    return vocab


In [92]:
data_file_names = list(map(lambda x: './data/shared_task/1A.english.%s.data.txt'%(x), ['trial', 'test', 'training']))
gold_file_names = list(map(lambda x: './data/shared_task/1A.english.%s.gold.txt'%(x), ['trial', 'test', 'training']))
vocab_file_name = './data/shared_task/1A.english.vocabulary.txt'

file_names = zip(data_file_names, gold_file_names)
# 0 = trial; 1 = test; 2 = train
valid_subs = read_subsumptions(file_names[0])
test_subs = read_subsumptions(file_names[1])
train_subs = read_subsumptions(file_names[2])
vocabulary = read_vocab(vocab_file_name)


In [None]:
# ensure that model vocab is greater than given vocab
assert len(model.index2word) >= len(vocabulary)

In [93]:
# eliminate training tuples for which no embedding exists
from collections import Counter

def get_terms_having_vectors(dataset):        
    query, hyper = \
    zip(*[(q,h) for q, h in dataset 
          if q in model and h in model])
    
    return list(query), list(hyper)


train_query, train_hyper = get_terms_having_vectors(train_subs)
test_query, test_hyper = get_terms_having_vectors(test_subs)
valid_query, valid_hyper = get_terms_having_vectors(valid_subs)
vocab = list(filter(lambda w: w in model, vocabulary))

assert len(train_query) == len(train_hyper)
assert len(test_query) == len(test_hyper)
assert len(valid_query) == len(valid_hyper)

In [100]:
from collections import Counter

# person is the most popular hypernym in training set (310 times), followed by city (63)
hypernym_distrib = Counter(train_hyper + test_hyper + valid_hyper)
for v, k in  sorted(((value, key) for (key,value) in hypernym_distrib.items()), reverse = True):
    print (k,v)

# There are 4,233 unique hypernyms in all
print len(hypernym_distrib)
print sum(hypernym_distrib.values())

(u'person', 612)
(u'city', 98)
(u'writer', 77)
(u'leader', 73)
(u'state', 70)
(u'work_of_art', 67)
(u'animal', 65)
(u'constructed_structure', 64)
(u'plant', 61)
(u'natural_phenomenon', 60)
(u'software', 59)
(u'film', 57)
(u'town', 56)
(u'phenomenon', 55)
(u'software_program', 54)
(u'computer_program', 52)
(u'software_package', 51)
(u'company', 51)
(u'picture', 50)
(u'politician', 49)
(u'technical_specification', 48)
(u'movie', 45)
(u'movement', 45)
(u'country', 45)
(u'physical_phenomenon', 43)
(u'locale', 42)
(u'sportsperson', 40)
(u'show', 40)
(u'moving_picture', 39)
(u'moving-picture_show', 38)
(u'mechanism', 37)
(u'instrument', 37)
(u'computer_software', 37)
(u'transportation', 36)
(u'transport', 36)
(u'intellectual', 36)
(u'enterprise', 36)
(u'chief', 36)
(u'sport', 35)
(u'public_building', 35)
(u'medium', 35)
(u'computer_programme', 35)
(u'athlete', 35)
(u'picture_show', 34)
(u'move', 34)
(u'motion_picture', 34)
(u'motion', 33)
(u'communication_medium', 33)
(u'worker', 32)
(u'spec

(u'excretion', 2)
(u'exclusive_right', 2)
(u'exchange', 2)
(u'evildoing', 2)
(u'european_nation', 2)
(u'eristic', 2)
(u'equipment_failure', 2)
(u'equilibrium', 2)
(u'environmental_pollution', 2)
(u'environmental_condition', 2)
(u'entryway', 2)
(u'entranceway', 2)
(u'enrollee', 2)
(u'enlargement', 2)
(u'emotionality', 2)
(u'emotionalism', 2)
(u'electromagnetic_radiation', 2)
(u'electricity', 2)
(u'electrical_conductance', 2)
(u'effort', 2)
(u'edible_nut', 2)
(u'eater', 2)
(u'earthquake', 2)
(u'drug_of_abuse', 2)
(u'dose', 2)
(u'domesticated_animal', 2)
(u'domestic_dog', 2)
(u'domestic_animal', 2)
(u'dog_type', 2)
(u'dog_breed', 2)
(u'documentary', 2)
(u'doctrine', 2)
(u'doc', 2)
(u'disturbance', 2)
(u'display', 2)
(u'dislike', 2)
(u'dishware', 2)
(u'disgust', 2)
(u'diseased_person', 2)
(u'discussion', 2)
(u'discrimination', 2)
(u'disbursement', 2)
(u'disbursal', 2)
(u'dirtying', 2)
(u'dirtiness', 2)
(u'diplomat', 2)
(u'diocese', 2)
(u'dimensional_analysis', 2)
(u'dimension', 2)
(u'dicta

(u'optical_microscope', 1)
(u'optical_disk', 1)
(u'opposition', 1)
(u'oppositeness', 1)
(u'opportunity', 1)
(u'opponent', 1)
(u'operating_system_distribution', 1)
(u'online_database', 1)
(u'on-line_database', 1)
(u'olympiad', 1)
(u'olive', 1)
(u'older_adult', 1)
(u'oilseed', 1)
(u'oil', 1)
(u'official_residence', 1)
(u'official_document', 1)
(u'office_suite', 1)
(u'office_furniture', 1)
(u'offering', 1)
(u'offer', 1)
(u'odonate', 1)
(u'octopod', 1)
(u'ocean', 1)
(u'occurrent', 1)
(u'obstruent', 1)
(u'observer', 1)
(u'observatory', 1)
(u'observation', 1)
(u'obliteration', 1)
(u'object-orientation', 1)
(u'oak_wood', 1)
(u'nymph', 1)
(u'numerical_quantity', 1)
(u'nucleus', 1)
(u'nucleic_acid_sequence', 1)
(u'nucleic_acid', 1)
(u'nuclear_physicist', 1)
(u'nozzle', 1)
(u'novel_adaptation', 1)
(u'not-for-profit', 1)
(u'north_american_indian', 1)
(u'nonverbal_communication', 1)
(u'nonvascular_plant', 1)
(u'nonsmoker', 1)
(u'nonsense', 1)
(u'nonprofit_organization', 1)
(u'nonprofit', 1)
(u'non

(u'blood_circulation', 1)
(u'blood', 1)
(u'blister', 1)
(u'bleb', 1)
(u'blanket', 1)
(u'blank_space', 1)
(u'bladder', 1)
(u'black_person', 1)
(u'black_people', 1)
(u'black_magic', 1)
(u'black_belt', 1)
(u'black', 1)
(u'bishopric', 1)
(u'biscuit', 1)
(u'biotic_community', 1)
(u'biosystem', 1)
(u'biomedical_scientist', 1)
(u'biomaterial', 1)
(u'biological_weapon', 1)
(u'biological_sequence', 1)
(u'biological_research', 1)
(u'biological_membrane', 1)
(u'biography', 1)
(u'biographical_film', 1)
(u'biofilm', 1)
(u'biocide', 1)
(u'biochemist', 1)
(u'binomen', 1)
(u'binary_operation', 1)
(u'binary_arithmetic_operation', 1)
(u'billards', 1)
(u'bibliography', 1)
(u'bibliographic_index', 1)
(u'bibliographic_database', 1)
(u'bevel_gear', 1)
(u'beryl', 1)
(u'benevolence', 1)
(u'benefit', 1)
(u'bend', 1)
(u'belonging', 1)
(u'bell_shape', 1)
(u'bell', 1)
(u'believer', 1)
(u'beholder', 1)
(u'bedding', 1)
(u'bed_clothing', 1)
(u'beauty_treatment', 1)
(u'beautification', 1)
(u'beard', 1)
(u'beam', 1)
(

## Construct synonyms for training, testing and validation terms

In [3]:
from collections import defaultdict

def get_synyonyms(hyponyms, hypernyms, n=15):
    synonyms = {}
    
    # prepare hypernym lookup dictionary
    hyper_lookup = defaultdict(list)
    for q, h in zip(hyponyms, hypernyms):
        hyper_lookup[q].append(h)
                
    for term in set(hyponyms):        
        synonyms[term] = list(filter(lambda x: x not in hyper_lookup[x], zip(*model.most_similar(term, topn=20))[0]))[:n]
        
    return synonyms
    
#get_synyonyms(train_query + test_query + valid_query, train_hyper + test_hyper + valid_hyper)    
#get_synyonyms(valid_query, valid_hyper)    

def get_random(hyponyms, hypernyms, vocab, n = 15):
    random_words = {}
    
    # prepare hypernym lookup dictionary
    hyper_lookup = defaultdict(list)
    for q, h in zip(hyponyms, hypernyms):
        hyper_lookup[q].append(h)
            
    for term in set(hyponyms):                
        some_words = np.random.choice(vocab, 20, replace=False)        
        random_words[term] = list(filter(lambda x: x not in hyper_lookup[x], some_words))[:n]
            
    return random_words



In [4]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Data class that encapsulates all word-based data I need to train the various algorithms
# We assume that we have all pre-filtered any words that don't feature in the embeddings
class Data:
    def __init__(self, 
                 train_query, train_hyper, 
                 test_query, test_hyper, 
                 valid_query, valid_hyper, 
                 vocab, embeddings):
        
                
        # encapsulate input variables so that all the data can be passed via class instance reference
        self.train_query = train_query
        self.train_hyper = train_hyper
        self.test_query = test_query
        self.test_hyper = test_hyper
        self.valid_query = valid_query
        self.valid_hyper = valid_hyper
        self.vocab = vocab
        
        #self.synonyms = synonyms
                
        # determine dimensionality of embeddings
        self.embeddings_dim = embeddings['animal'].shape[0]
        
        print ("Tokenising words...")
        # intialise and fit tokenizer
        self.tokenizer = tokenizer = Tokenizer(num_words = 300000, filters='')
        self.tokenizer.fit_on_texts(train_query + test_query + valid_query + vocab)
        
        print ("Creating embedding matrix...")
        # construct embedding_matrix
        self.embedding_matrix = np.zeros((len(self.tokenizer.word_index)+1, self.embeddings_dim), dtype='float32')

        for word, i in self.tokenizer.word_index.items():
            if i < len(self.tokenizer.word_index) + 1:
                embedding_vector = embeddings[word]
                if embedding_vector is not None:
                    # normalise vector (already normalised)
                    #embedding_vector /= np.linalg.norm(embedding_vector)
                    self.embedding_matrix[i,:] = embedding_vector  
        # confirm shape
        assert self.embedding_matrix.shape == (len(self.tokenizer.word_index)+1, self.embeddings_dim)
        
        print ("Creating random words/synonyms...")
        self.random_words = get_random(train_query + test_query + valid_query, train_hyper + test_hyper + valid_hyper, vocab)  
        self.synonyms = get_synyonyms(train_query + test_query + valid_query, train_hyper + test_hyper + valid_hyper)

In [5]:
#data = Data(train_query, train_hyper, test_query, test_hyper, valid_query, valid_hyper, vocab, model)
import pickle
import os

dest = os.path.join('.', 'pickle')
#pickle.dump(data, open(os.path.join(dest, 'semeval_data.pkl'), 'wb'), protocol=2)
data = pickle.load(open(os.path.join(dest, 'semeval_data.pkl'), 'rb'))


In [4]:
print (data.random_words['bomber'])
print (data.synonyms['bomber'])


[u'subvention', u'noblesse_oblige', u'odometer', u'sore_throat', u'harries', u'congener', u'yobo', u'marquet', u'pleuromamma', u'm\xe9tis', u'emperor_of_rome', u'confliction', u'zerbo', u'semicomatose', u'elutriator']
[u'fighter', u'bomb', u'fighter_plane', u'cruise_missile', u'fighter_aircraft', u'anti-aircraft', u'attack_aircraft', u'shoot_down', u'heavy_bomber', u'air_force', u'airliner', u'air_attack', u'air_defense', u'surface-to-air_missile', u'fighter-bomber']


In [6]:
# function that returns negative samples alongside set of positive samples
# we need to pass:
# the batch hyponym terms, batch of hypernym terms, negative_tuples, tokenizer 
# to create sequences
def extend_batch_with_negatives(batch_X_term, batch_X_hyper, negative_tuples,                              
                                tokenizer, m):
    # initialise negative tuples container
    positive_words = [tokenizer.index_word[term_id] for term_id in batch_X_term.flatten()]
    
    # tokenize -ve samples
    neg_terms, neg_hyper = [], []
    for n in positive_words:
        for n2 in negative_tuples[n][:m]:
            neg_terms.append(n)
            neg_hyper.append(n2)
    
    neg_terms_seq = tokenizer.texts_to_sequences(neg_terms)
    neg_hyper_seq = tokenizer.texts_to_sequences(neg_hyper)

    # before increasing size of our batch, let's set the actual y values
    # the first n terms are true (1s), and the rest are the -ve samples (0)
    batch_y_label = np.concatenate((
            np.ones(batch_X_term.shape[0]),
            np.zeros(len(neg_terms_seq))
    ))
    # finally, stack -ve sequences at the bottom of +ves to 
    # create our final training batch
    # at most, batch size will be 192 samples            

    batch_X_term = np.vstack((batch_X_term, np.array(neg_terms_seq)))
    batch_X_hyper = np.vstack((batch_X_hyper, np.array(neg_hyper_seq)))
    
    return batch_X_term, batch_X_hyper, batch_y_label

# Evaluation

In [7]:
def convert_hypernyms_to_one_line(dataset):
    ordered_queries = sorted(list(set(dataset[0])))
    one_line = {}
    for w in ordered_queries:
        word_hypernyms = [h for q, h in zip(*dataset) if q == w]
        one_line[w] = word_hypernyms
    return one_line

In [8]:
# taken from task_scorer.py provided with shared task resources
def mean_reciprocal_rank(r):
    """Score is reciprocal of the rank of the first relevant item
    First element is 'rank 1'.  Relevance is binary (nonzero is relevant).
    Example from http://en.wikipedia.org/wiki/Mean_reciprocal_rank
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        Mean reciprocal rank
    """
    r = np.asarray(r).nonzero()[0]
    return 1. / (r[0] + 1) if r.size else 0.

def precision_at_k(r, k, n):
    """Score is precision @ k
    Relevance is binary (nonzero is relevant).
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        Precision @ k
    Raises:
        ValueError: len(r) must be >= k
    """
    assert k >= 1
    r = np.asarray(r)[:k] != 0
    if r.size != k:
        raise ValueError('Relevance score length < k')
    return (np.mean(r)*k)/min(k,n)
    # Modified from the first version. Now the gold elements are taken into account

def average_precision(r,n):
    """Score is average precision (area under PR curve)
    Relevance is binary (nonzero is relevant).
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        Average precision
    """
    r = np.asarray(r) != 0
    out = [precision_at_k(r, k + 1, n) for k in range(r.size)]
    #Modified from the first version (removed "if r[k]"). All elements (zero and nonzero) are taken into account
    if not out:
        return 0.
    return np.mean(out)

def mean_average_precision(r,n):
    """Score is mean average precision
    Relevance is binary (nonzero is relevant).
    Args:
        r: Relevance scores (list or numpy) in rank order
            (first element is the first item)
    Returns:
        Mean average precision
    """
    return average_precision(r,n)



In [9]:
# predictions is a dictionary whereby key is query term and value is a list of ranked hypernym predictions
def get_evaluation_scores(dataset, predictions):
    all_scores = []    
    scores_names = ['MRR', 'MAP', 'P@1', 'P@5', 'P@10']
    for query, gold_hyps in convert_hypernyms_to_one_line(dataset).items():

        avg_pat1 = []
        avg_pat2 = []
        avg_pat3 = []

        pred_hyps = predictions[query]
        gold_hyps_n = len(gold_hyps)    
        r = [0 for i in range(15)]

        for j in range(len(pred_hyps)):
            if j < gold_hyps_n:
                pred_hyp = pred_hyps[j]
                if pred_hyp in gold_hyps:
                    r[j] = 1

        avg_pat1.append(precision_at_k(r,1,gold_hyps_n))
        avg_pat2.append(precision_at_k(r,5,gold_hyps_n))
        avg_pat3.append(precision_at_k(r,10,gold_hyps_n))    

        mrr_score_numb = mean_reciprocal_rank(r)
        map_score_numb = mean_average_precision(r,gold_hyps_n)
        avg_pat1_numb = sum(avg_pat1)/len(avg_pat1)
        avg_pat2_numb = sum(avg_pat2)/len(avg_pat2)
        avg_pat3_numb = sum(avg_pat3)/len(avg_pat3)

        score_results = [mrr_score_numb, map_score_numb, avg_pat1_numb, avg_pat2_numb, avg_pat3_numb]
        all_scores.append(score_results)
    return scores_names, all_scores


## Helper functions to run evaluation routine

In [10]:
from sklearn.metrics.pairwise import cosine_similarity

# alternative hypernym generator by applying Phi weights to hyponym and see which 
# words are closest to this vector
def crim_get_hypernym(word, tokenizer, phi, cluster_weight, bias, embeddings, top):
    
    q_idx = tokenizer.word_index[word]
    q = embeddings[q_idx] 

    projections = np.dot(q, phi)
    #projections /= np.linalg.norm(projections, axis=1).reshape(-1,1)
    
    sim_matrix = np.dot(cluster_weight.T, np.dot(embeddings[1:], projections.T).T) + bias
    top_ranked_sequence = np.argsort(sim_matrix[0])[::-1][:top]
    
    return zip(tokenizer.sequences_to_texts(top_ranked_sequence.reshape(-1,1) + 1), 
               sim_matrix.flatten()[top_ranked_sequence])

In [11]:
def crim_max_get_hypernym(word, tokenizer, phi, cluster_weight, bias, embeddings, top):
    q_idx = tokenizer.word_index[word]
    q = embeddings[q_idx] 

    projections = np.dot(q, phi)
    
    projections /= np.linalg.norm(projections, axis=1).reshape(-1,1)
    
    sim_matrix = np.dot(embeddings[1:], projections.T)    
    max_sim = np.max(sim_matrix, 1).reshape(1,-1)
    
    sim_matrix = np.dot(cluster_weight.T, max_sim) + bias
    top_ranked_sequence = np.argsort(sim_matrix[0])[::-1][:top]
    
    return zip(tokenizer.sequences_to_texts(top_ranked_sequence.reshape(-1,1) + 1), 
               sim_matrix.flatten()[top_ranked_sequence])
    
    
def crim_mean_get_hypernym(word, tokenizer, phi, cluster_weight, bias, embeddings, top):
    q_idx = tokenizer.word_index[word]
    q = embeddings[q_idx] 

    projections = np.dot(q, phi)
    
    projections /= np.linalg.norm(projections, axis=1).reshape(-1,1)
    
    sim_matrix = np.dot(embeddings[1:], projections.T)    
    mean_sim = np.mean(sim_matrix, 1).reshape(1,-1)
    
    sim_matrix = np.dot(cluster_weight.T, mean_sim) + bias
    top_ranked_sequence = np.argsort(sim_matrix[0])[::-1][:top]
    
    return zip(tokenizer.sequences_to_texts(top_ranked_sequence.reshape(-1,1) + 1), 
               sim_matrix.flatten()[top_ranked_sequence])


In [12]:
def cluster_get_hypernym(word, tokenizer, phi, cluster_weight, bias, embeddings, top):
    q_idx = tokenizer.word_index[word]
    q = embeddings[q_idx] 
    
    projections = np.dot(q, phi)
    s = np.dot(embeddings[1:], projections.T)
    linear_combination = (s.T * cluster_weight) + bias

    best_projection = np.max(linear_combination, axis=0)
    top_words = np.argsort(best_projection)[::-1][:top]
    
    return zip(tokenizer.sequences_to_texts(top_words.reshape(-1,1) + 1), 
               best_projection[top_words])

    

In [13]:
# function which generates top 15 predictions for each hyponym query term
# and returns results as dictionary
def predict_crim_hypernyms(queries, tokenizer, model, algol):
    #hyper_candidates = [[data.tokenizer.word_index[hyper]] for hyper in data.tokenizer.word_index.keys()]
    #hyper_candidates = [[data.tokenizer.word_index[hyper]] for hyper in set(data.valid_hyper)]
    
    ordered_queries = sorted(list(set(queries)))
    results = {}
        
    # extract the Phi matrices out of trained model
    dense = [l.get_weights()[0] for l in model.layers if type(l) == Dense and l.name.startswith('Phi') ]
    dense = np.asarray(dense)
    
    # extract affine transform layer weights
    cluster_weight = model.get_layer(name='Prediction').get_weights()[0]
    bias = model.get_layer(name='Prediction').get_weights()[1]
    #embeddings = model.get_layer(name="TermEmbedding").get_weights()[0]
    embeddings = [l for l in model.layers if type(l) == Model][0].get_layer(name='TermEmbedding').get_weights()[0]

    
    for idx, word in enumerate(ordered_queries):        
        if (idx + 1) % 100 == 0:
            print ("Done", idx + 1)
        
        #predicted_hypers = alt_get_hypernym(word, model, data, dense, 15)
        predicted_hypers = algol(word, tokenizer, dense, cluster_weight, bias, embeddings, 15)
        results[word] = [h for h, p in predicted_hypers]
        
    return results


In [14]:
def predict_cluster_hypernyms(queries, tokenizer, cluster_list, knn_model=None):
    #hyper_candidates = [[data.tokenizer.word_index[hyper]] for hyper in data.tokenizer.word_index.keys()]
    #hyper_candidates = [[data.tokenizer.word_index[hyper]] for hyper in set(data.valid_hyper)]
    
    ordered_queries = sorted(list(set(queries)))
    
    results = {}
        
    # embeddings are present in a "shared" model that is used as the first layer of each cluster
    embeddings = [l for l in cluster_list[0].model.layers if type(l) == Model][0].get_layer(name='TermEmbedding').get_weights()[0]
    
    # extract the Phi matrices out of trained model
    dense = np.zeros((len(cluster_list), embeddings.shape[1], embeddings.shape[1]))
    lr_weights = np.zeros((len(cluster_list), 1))
    lr_bias = np.zeros((len(cluster_list), 1))

    for idx, cluster in enumerate(cluster_list):
        dense[idx] = cluster.model.get_layer(name='Phi0').get_weights()[0]
        lr_weights[idx] = cluster.model.get_layer(name='Prediction').get_weights()[0]
        lr_bias[idx] = cluster.model.get_layer(name='Prediction').get_weights()[1]    
    
    for idx, word in enumerate(ordered_queries):        
        if (idx + 1) % 100 == 0:
            print ("Done", idx + 1)
            
            
        if knn_model:            
            cluster_probs = knn_model.predict_proba(embeddings[tokenizer.word_index[word]].reshape(1,-1))
            cluster_idx = np.where(cluster_probs > 0.)[1]                        
            predicted_hypers = cluster_get_hypernym(word, tokenizer, 
                                                    dense[cluster_idx], 
                                                    lr_weights[cluster_idx], 
                                                    lr_bias[cluster_idx], embeddings, 15)
        else:        
            #predicted_hypers = alt_get_hypernym(word, model, data, dense, 15)
            predicted_hypers = cluster_get_hypernym(word, tokenizer, 
                                                    dense, lr_weights, lr_bias, 
                                                    embeddings, 15)
        
        results[word] = [h for h, p in predicted_hypers]
        
    return results

In [113]:
predict_cluster_hypernyms(['cold'], data.tokenizer, clusters)


{'cold': [u'illness',
  u'disease',
  u'weather',
  u'life-threatening',
  u'mild',
  u'cold',
  u'common_cold',
  u'weather_condition',
  u'cold_weather',
  u'severe',
  u'frostbite',
  u'respiratory_infection',
  u'hot_weather',
  u'fever',
  u'contagious']}

In [15]:
# This method retrieves the words which our model considers the most probably hypernyms.  
# Problem with this method is that it's excruciatingly slow so I developed a numpy-based
# algorithm which is significantly faster.  Refer to alt_get_hypernym

def crim_get_top_hypernyms(query, hyper_candidates, model, data, top):    
    candidates = data.tokenizer.texts_to_sequences(data.vocab)
    candidates = np.asarray(candidates).flatten()
    
    query_id = data.tokenizer.word_index[query]
    predictions = model.predict([np.asarray([query_id] * len(data.vocab)), candidates])

    best_predictions = np.argsort(predictions.flatten())[::-1][:top]
    return list(map(lambda x: (data.vocab[x], predictions[x][0]), best_predictions))
    

In [111]:
crim_get_top_hypernyms('downfall', None, crim_model, data, 15)

[(u'mass_media', 0.9961901),
 (u'judgement', 0.9954072),
 (u'value_system', 0.9939751),
 (u'judgment', 0.9937524),
 (u'demean', 0.9928692),
 (u'good_person', 0.9926629),
 (u'oneself', 0.99262685),
 (u'exercising', 0.992411),
 (u'condone', 0.9912377),
 (u'person', 0.9911773),
 (u'omission', 0.99114054),
 (u'excusing', 0.9909513),
 (u'public_servant', 0.99080867),
 (u'acknowledge', 0.9900271),
 (u'truthful', 0.98968166)]

In [112]:
predict_crim_hypernyms(['downfall'], data.tokenizer, crim_model, crim_get_hypernym)

{'downfall': [u'mass_media',
  u'judgement',
  u'value_system',
  u'judgment',
  u'demean',
  u'good_person',
  u'oneself',
  u'exercising',
  u'condone',
  u'person',
  u'omission',
  u'excusing',
  u'public_servant',
  u'acknowledge',
  u'truthful']}

In [None]:
candidates = data.tokenizer.texts_to_sequences(data.vocab)
candidates = np.asarray(candidates).flatten()

predictions = np.zeros((5, len(data.vocab)))
query_id = data.tokenizer.word_index['starcraft']
for idx, c in enumerate(cluster_list):
    predictions[idx] = c.model.predict([np.asarray([query_id] * len(data.vocab)), candidates]).flatten()
    
best_projection = np.max(predictions, axis=0)
top_words = np.argsort(best_projection)[::-1][:15]

zip( map ( lambda x: data.tokenizer.index_word[candidates[x]], top_words), 
               best_projection[top_words])


# Positive down-sampling

In [80]:
from collections import Counter
hypernym_distrib = Counter(data.train_hyper)

hypernym_prob = {}

for v, k in  sorted(((value, key) for (key,value) in hypernym_distrib.items()), reverse = True):
    #print k, v, np.round(np.sqrt(1./v), 2)
    hypernym_prob[k] = np.round(np.sqrt(1./v), 2)
    
hypernym_prob

{u'change_of_location': 1.0,
 u'electrical_element': 1.0,
 u'dimensional_analysis': 1.0,
 u'extracurricular': 1.0,
 u'dogness': 0.71,
 u'yellow': 1.0,
 u'narcotic': 1.0,
 u'obstruction': 0.38,
 u'protest': 1.0,
 u'circuitry': 0.45,
 u'lighter-than-air_craft': 1.0,
 u'mansion': 1.0,
 u'captain': 1.0,
 u'hate': 0.71,
 u'u.s._state': 0.38,
 u'looking': 1.0,
 u'orthopaedics': 1.0,
 u'particulate_matter': 1.0,
 u'assessment': 1.0,
 u'electricity': 0.71,
 u'head_of_government': 0.71,
 u'television_program': 0.24,
 u'photographic_printing': 1.0,
 u'bludgeon': 1.0,
 u'lyrist': 0.58,
 u'saccharide': 1.0,
 u'archaeologist': 1.0,
 u'artificial_language': 1.0,
 u'medical_doctor': 0.71,
 u'honorific': 0.41,
 u'political_economy': 0.58,
 u'herbicide': 1.0,
 u'primary_color': 0.58,
 u'merchant': 0.5,
 u'ordnance': 1.0,
 u'digit': 1.0,
 u'hormone': 0.71,
 u'moiety': 1.0,
 u'risk': 0.58,
 u'family_name': 0.38,
 u'social_control': 0.29,
 u'penchant': 1.0,
 u'applied_scientist': 0.71,
 u'retail_outlet': 

In [82]:
#[np.random.random() for _ in range(100)]
subsamples = []
dropped = []
for q, h in zip(data.train_query, data.train_hyper):
    if np.random.random() < hypernym_prob[h]:
        subsamples.append((q,h))
    else:
        dropped.append((q,h))

In [100]:
downsampled_hyper = Counter(zip(*dropped)[1])
print len(subsamples), len(dropped)
print len(zip(data.train_query, data.train_hyper))
for v, k in  sorted(((value, key) for (key,value) in downsampled_hyper.items()), reverse = True):
    print k, v

6134 5645
11779
person 297
city 53
leader 37
work_of_art 36
picture 33
writer 32
natural_phenomenon 32
animal 32
locale 28
film 28
computer_software 28
constructed_structure 27
technical_specification 26
plant 26
movie 26
state 25
software 25
moving_picture 25
moving-picture_show 25
software_program 24
computer_code 24
sport 23
show 23
motion_picture 23
software_package 22
picture_show 22
movement 22
country 22
computer_programme 22
travel 21
computer_program 21
company 21
transportation 20
transmission_channel 20
town 20
physical_phenomenon 20
musical_work 20
function 20
channel 20
transport 19
public_building 19
phenomenon 19
move 19
coding_system 19
code 19
social_event 18
sickness 18
piece_of_land 18
communication_medium 18
chief 18
traveling 17
specifications 17
disease 17
boss 17
travelling 16
television_program 16
storage_medium 16
politician 16
pic 16
mechanism 16
figure 16
competitor 16
tv_program 15
television_show 15
technology 15
political_leader 15
piece_of_work 15
passeng

sex_organ 2
set_theory 2
serial 2
senior_management 2
seaman 2
sea 2
scientific_instrument 2
scholarly_person 2
scholar 2
satellite 2
sailing_vessel 2
sailing_ship 2
sailing_boat 2
sailboat 2
sail 2
rock_'n'_roll 2
river_transport 2
risk 2
rhetorical_device 2
retail_store 2
remembering 2
religious_ceremony 2
reference 2
reason 2
ratio 2
railroad_track 2
railroad_station 2
quantitative_relation 2
putting_to_death 2
public_university 2
prophet 2
programing_language 2
program_line 2
profits 2
prof 2
primary_color 2
price 2
predisposition 2
precipitation 2
portraying 2
portrayal 2
point_of_reference 2
poet 2
plug-in 2
playwright 2
pike 2
pig 2
pictorial_representation 2
photograph 2
philosophy_of_language 2
pebble 2
payment 2
patrician 2
path 2
parcelling 2
painting 2
organ 2
operator 2
offense 2
offence 2
note 2
net 2
necromancy 2
natural_language 2
native_plant 2
narrative_mode 2
musical_instrument 2
municipal_corporation 2
mountain_range 2
money 2
misconduct 2
microhabitat 2
methodology

# Keras Projection Learning Models

In [None]:
def get_simple_class_model(phi_k=1, train_embeddings=False,\
                   embeddings_dim=300, vocab_size=1000,\
                   embeddings_matrix=None,
                   phi_init = None,
                   phi_activity_regularisation = None,
                   sigmoid_kernel_regularisation = None,
                   sigmoid_bias_regularisation = None,
                   sigmoid_kernel_constraint = None,
                   dropout_rate = 0.,
                   learning_rate = 0.001
                  ):
    
    hypo_input  = Input(shape=(1,), name='Hyponym')
    hyper_input = Input(shape=(1,), name='Hypernym')
    
    embedding_layer = Embedding(vocab_size + 1, embeddings_dim, embeddings_constraint = UnitNorm(axis=1), 
                                input_length=1, name='TermEmbedding')
    
    
    hypo_embedding = embedding_layer(hypo_input)    
    hyper_embedding = embedding_layer(hyper_input)
    
    # Add Dropout to avoid overfit    
    hypo_embedding = Dropout(dropout_rate, name='Dropout_Hypo')(hypo_embedding) 
        
    phi =  Dense(embeddings_dim, activation=None, use_bias=False, 
                 activity_regularizer=phi_activity_regularisation,
                 kernel_initializer=phi_init,
                 name='Phi') (hypo_embedding)
            
    phi = Flatten()(phi)            
    hyper_embedding = Flatten()(hyper_embedding)    
    
    word_concat = concatenate([phi, hyper_embedding], axis=-1, name='Concat')        
    word_concat = Dropout(dropout_rate)(word_concat)
                                
    predictions = Dense(1, activation="sigmoid", name='Prediction',
                        use_bias=True,
                        kernel_initializer='glorot_normal',                        
                        kernel_constraint= sigmoid_kernel_constraint,
                        bias_initializer='glorot_normal',                                                
                        kernel_regularizer=sigmoid_kernel_regularisation,
                        bias_regularizer=sigmoid_bias_regularisation
                       ) (word_concat)

    # instantiate model
    model = Model(inputs=[hypo_input, hyper_input], outputs=predictions)
        
    # inject pre-trained embedding weights into Embedding layer
    model.get_layer(name='TermEmbedding').set_weights([embeddings_matrix])
    model.get_layer(name='TermEmbedding').trainable = train_embeddings    

    adam = Adam(lr = learning_rate, beta_1 = 0.9, beta_2 = 0.9, clipnorm=1.)
    model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])
        
    return model

In [15]:
from tensorflow.keras import backend as K
from tensorflow.keras.constraints import Constraint

class ForceToOne (Constraint):    
    def __call__(self, w):
        w /= w
        return w

In [122]:
from tensorflow.keras.layers import Input, Dense, Embedding, Dot, Flatten, Concatenate, Dropout, Lambda
from tensorflow.keras.models import Model
from tensorflow.keras.initializers import RandomNormal, Zeros, Ones
from tensorflow.keras.regularizers import l2, l1, l1_l2
from tensorflow.keras.constraints import UnitNorm, MinMaxNorm
from tensorflow.keras.optimizers import Adam

from tensorflow.keras import backend as K
import tensorflow as tf

def get_embeddings_model(embedding_matrix):
    hypo_input = Input(shape=(1,))
    hyper_input = Input(shape=(1,))

    word_embedding = Embedding(embedding_matrix.shape[0], embedding_matrix.shape[1], name='TermEmbedding',
                               embeddings_constraint = UnitNorm(axis=1))

    hypo_embedding = word_embedding(hypo_input)
    hyper_embedding = word_embedding(hyper_input)

    embedding_model = Model(inputs=[hypo_input, hyper_input], outputs=[hypo_embedding, hyper_embedding])

    # inject pre-trained embeddings into this mini, resusable model/layer
    embedding_model.get_layer(name='TermEmbedding').set_weights([embedding_matrix])
    embedding_model.get_layer(name='TermEmbedding').trainable = False
    
    return embedding_model

def custom_loss(hypo_phi_tensor):    
    def inner_product(y_true, y_pred):                
        simil = 0.01 * K.mean(hypo_phi_tensor ** 2)        
        return K.mean(K.binary_crossentropy(y_true, y_pred), axis=-1) + simil                        
        
    return inner_product

def get_CRIM_model(phi_k=1, 
                   train_embeddings = False,
                   embeddings_dim = 300,
                   #, vocab_size=1000,
                   #embeddings_matrix=None,
                   embeddings_layer = None,
                   phi_init = None,
                   phi_activity_regularisation = None,
                   sigmoid_kernel_regularisation = None,
                   sigmoid_bias_regularisation = None,
                   sigmoid_kernel_constraint = None,
                   dropout_rate = 0.,
                   learning_rate = 0.001,
                   normalised = True,
                   max_or_combine = True # if true then max
                  ):
    
    hypo_input  = Input(shape=(1,), name='Hyponym')
    hyper_input = Input(shape=(1,), name='Hypernym')
    
    #embedding_layer = Embedding(vocab_size + 1, embeddings_dim, embeddings_constraint = UnitNorm(axis=1), 
    #                            input_length=1, name='TermEmbedding')        
    
    hypo_embedding, hyper_embedding = embeddings_layer([hypo_input, hyper_input])
    #hypo_embedding = embeddings_layer(hypo_input)    
    #hyper_embedding = embeddings_layer(hyper_input)
    
    # Add Dropout to avoid overfit    
    hypo_embedding = Dropout(dropout_rate, name='Dropout_Hypo')(hypo_embedding)
    hyper_embedding = Dropout(dropout_rate, name='Dropout_Hyper')(hyper_embedding)
    
    phi_layer = []
    for i in range(phi_k):
        phi_layer.append(Dense(embeddings_dim, activation=None, use_bias=False, 
                               activity_regularizer=phi_activity_regularisation,
                               kernel_initializer=phi_init,                               
                               name='Phi%d' % (i)) (hypo_embedding))
        

    #phi1 = Dense(embeddings_dim, activation=None, use_bias=False, 
                #kernel_initializer=random_identity, name='Phi1')(hypo_embedding)

    if phi_k == 1:
        # flatten tensors
        phi = Flatten(name='Flatten_Phi')(phi_layer[0])
        hyper_embedding = Flatten(name='Flatten_Hyper')(hyper_embedding)    
    else:
        phi = concatenate(phi_layer, axis=1)
    
    phi = Dropout(dropout_rate, name='Dropout_Phi')(phi)
    
    # this is referred to as "s" in the "CRIM" paper    
    phi_hyper = Dot(axes=-1, normalize=normalised, name='DotProduct1')([phi, hyper_embedding])
    
    if (phi_k == 1):
        phi_hypo = Dot(axes=-1, normalize=False, name='DotProduct2')([phi, hypo_embedding])
        regul_loss = custom_loss(phi_hypo)
            
    
    if phi_k > 1:
        if max_or_combine:
            phi_hyper = Lambda(lambda x: K.max(x, axis=1, keepdims=True))(phi_hyper)
        phi_hyper = Flatten(name='Flatten_PhiHyper')(phi_hyper)
    
    predictions = Dense(1, activation="sigmoid", name='Prediction',
                        use_bias=True,
                        #kernel_initializer=Zeros,
                        kernel_initializer='random_normal',
                        kernel_constraint= sigmoid_kernel_constraint,                        
                        bias_initializer=Zeros(),                        
                        #bias_initializer='random_normal',
                        kernel_regularizer=sigmoid_kernel_regularisation,                        
                        bias_regularizer=sigmoid_bias_regularisation
                       ) (phi_hyper)

    # instantiate model
    model = Model(inputs=[hypo_input, hyper_input], outputs=predictions)
        
    # inject pre-trained embedding weights into Embedding layer
    #model.get_layer(name='TermEmbedding').set_weights([embeddings_matrix])
    
    embeddings = [l for l in model.layers if type(l) == Model][0].get_layer(name='TermEmbedding').trainable = train_embeddings
    #model.get_layer(name='TermEmbedding').trainable = train_embeddings    

    adam = Adam(lr = learning_rate, beta_1 = 0.9, beta_2 = 0.9, clipnorm=1.)
    if phi_k == 1:
        model.compile(optimizer=adam, loss=regul_loss, metrics=['accuracy'])
    else:
        model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

In [18]:
from tensorflow.keras import models
from tensorflow.keras.utils import get_custom_objects
from tensorflow.python.framework import dtypes
from tensorflow.keras.initializers import Initializer

class CustomInitializer:
    def __call__(self, shape, dtype='float32', partition_info=None):
        ci = CustomIdentity()
        return ci.random_identity(shape, dtype, partition_info)    

class RandomIdentity(Initializer):
    def __init__(self, dtype=dtypes.float32):
        self.dtype = dtypes.as_dtype(dtype)

    
    def __call__(self, shape, dtype=None, partition_info=None):
        if dtype is None:
            dtype = self.dtype
        
        rnorm = K.random_normal((shape[-1],shape[-1]), mean=0., stddev=0.01)        
        #identity = K.eye(shape[-1], dtype='float32')        
        rident = tf.eye(shape[-1]) * rnorm
        return rident
    
    def get_config(self):
        return {"dtype": self.dtype.name}
    
class RandomPlusIdentity(Initializer):
    def __init__(self, dtype=dtypes.float32):
        self.dtype = dtypes.as_dtype(dtype)

    
    def __call__(self, shape, dtype=None, partition_info=None):
        if dtype is None:
            dtype = self.dtype
        
        rnorm = K.random_normal((shape[-1],shape[-1]), mean=0., stddev=0.01)    
        rident = tf.eye(shape[-1]) + rnorm
        return rident            
    
    def get_config(self):
        return {"dtype": self.dtype.name}
        

get_custom_objects().update({'RandomIdentity': RandomIdentity})
get_custom_objects().update({'RandomPlusIdentity': RandomPlusIdentity})


# idea is to train an initial model to generate reasonabl projection matrices that
# when applied to a hyponym, gets it close to the hypernym in question.

# after initial training, we will inject the phi and sigmoid layer trained weights into the model;
# the projections will be frozen but instead we will adjust the embeddings.  
# prediction layer weights may also be modified 
def get_CRIM_model_freeze_phi(model,
                              train_phi=False, 
                              train_embeddings = True,
                              train_sigmoid = True,                              
                              sigmoid_kernel_regularisation = None,
                              sigmoid_bias_regularisation = None,
                              sigmoid_kernel_constraint = None,
                              dropout_rate = 0.,
                              learning_rate = 0.0005):
    
    # retain original model; create copy of passed model
    new_model = models.clone_model(model)    
    # retain model but transfer learned weights
    # get embeddings weights
    #embeddings = model.get_layer(name='TermEmbedding').get_weights()[0]
    
    # get embeddings layer which is "embedded" in a sub-model in model
    embeddings = [l for l in model.layers if type(l) == Model][0].get_layer(name='TermEmbedding').get_weights()[0]
    # get projection matrices
    dense = map(lambda x: x.get_weights()[0], [l for l in model.layers if l.name.startswith('Phi')])
    dense = np.asarray(dense)
    # get sigmoid weights
    lr_weights = model.get_layer(name='Prediction').get_weights()
    
    # inject pre-trained embedding weights into Embedding layer
    #new_model.get_layer(name='TermEmbedding').set_weights([embeddings])
    #new_model.get_layer(name='TermEmbedding').trainable = train_embeddings 
    [l for l in new_model.layers if type(l) == Model][0].get_layer(name='TermEmbedding').set_weights([embeddings])
    [l for l in new_model.layers if type(l) == Model][0].get_layer(name='TermEmbedding').trainable = train_embeddings
        
    phi_projections = [l for l in new_model.layers if l.name.startswith('Phi')]    
    for idx, phi_projection in enumerate(phi_projections):
        phi_projection.set_weights([dense[idx]])
        phi_projection.trainable = train_phi
            
    new_model.get_layer(name='Prediction').set_weights(lr_weights)
    new_model.get_layer(name='Prediction').trainable = train_sigmoid
            
    adam = Adam(lr=learning_rate, beta_1 = 0.9, beta_2 = 0.9, clipnorm=1.)
    new_model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])    
        
    return new_model
    
    

### The training algorithm incorporates mini-batch stochastic descent and negative sampling

In [19]:
def train(model,       # the model which parameters will be learnt
          epochs,      # number of epochs to run          
          batch_size,  # size of mini-batch
          m,           # number of negative samples
          data,        # data required for training                              
          neg_strategy,
          prediction_algol = crim_get_hypernym,
          bag = False
         ):

            
    # create sequences
    term_train_seq = data.tokenizer.texts_to_sequences(data.train_query)
    hyper_train_seq = data.tokenizer.texts_to_sequences(data.train_hyper)

    term_test_seq = data.tokenizer.texts_to_sequences(data.valid_query)
    hyper_test_seq = data.tokenizer.texts_to_sequences(data.valid_hyper)
                    
    validation_samples = np.arange(len(term_test_seq))
    samples = np.arange(len(term_train_seq))    
    if bag:
        # bag samples    
        samples = np.random.choice(samples, size=len(samples), replace=True)
    
    # train algorithm
    for epoch in range(epochs):
        # reset loss
        loss = 0.                
        np.random.shuffle(samples)

        shuffled_X_term, shuffled_X_hyper =\
            np.array(term_train_seq, dtype='int32')[samples],\
            np.array(hyper_train_seq, dtype='int32')[samples]

        for b in range(0, len(samples), batch_size):
            # product mini-batch, consisting of 32 +ve samples
            batch_X_term = shuffled_X_term[b:b + batch_size] 
            batch_X_hyper = shuffled_X_hyper[b:b + batch_size]

            # complement +ve samples with negatives
            batch_X_term, batch_X_hyper, batch_y_label =\
            extend_batch_with_negatives(batch_X_term, batch_X_hyper,
                                        neg_strategy,
                                        data.tokenizer, m
                                       )                                   

            # train on batch
            loss += model.train_on_batch([batch_X_term, batch_X_hyper], 
                                          batch_y_label)[0]
                    
        # model trained for full epoch       
        # pick batch of shuffled test instances with size equal to training batch
        batch_X_test_term, batch_X_test_hyper =\
                np.array(term_test_seq, dtype='int32')[validation_samples],\
                np.array(hyper_test_seq, dtype='int32')[validation_samples]
        
        batch_y_test_label = [1] * batch_X_test_term.shape[0]
        # distort test batch with some negatives to check how algorithm fares with
        # negatives            
        #batch_X_test_term, batch_X_test_hyper, batch_y_test_label =\
        #extend_batch_with_negatives(batch_X_test_term, batch_X_test_hyper,
        #                            neg_strategy,
        #                            data.tokenizer, m
        #                           )            
        
        test_loss, test_accuracy = model.test_on_batch([batch_X_test_term, batch_X_test_hyper], 
                                          batch_y_test_label)
        
        
        # compute MRR on validation set
        predictions = predict_crim_hypernyms(data.valid_query, data.tokenizer, model, prediction_algol)
        _, all_scores = get_evaluation_scores((data.valid_query, data.valid_hyper), predictions)
        mrr = round(sum([score_list[0] for score_list in all_scores]) / len(all_scores), 5)                                
                    
        print('Epoch:', epoch+1, 'Loss:', loss, 'Test Loss:', test_loss, 'MRR:', mrr, 'Test accuracy:', test_accuracy)    


In [29]:
# training harness

from tensorflow.keras.initializers import RandomNormal, Zeros, Ones
from tensorflow.keras.regularizers import l2, l1, l1_l2

#rand_norm_m0_sd001 = RandomNormal(mean = 0.0, stddev=0.01, seed=42)
#rand_norm = RandomNormal(mean = 0.0, stddev=1., seed=42)

# negative sampling options
neg_sampling_options = {'synonym':data.synonyms,                                                 
                        'random':data.random_words
                       }

# phi random init options
phi_init_options = {'random_plus_identity': RandomPlusIdentity(),
                    'random_identity': RandomIdentity(), 
                   }

kernel_constraints = {'None': None, 'ForceToOne': ForceToOne()}

# positive batch size
batch_size = 32

# implement mini-batch stochastic training
epochs = 6

# number of negative samples
m = 10
# number of projections
phi_k = 1
# train (True) or freeze
train_embeddings = False
# negative sample strategy
negative_option = 'random'
# initialise phi strategy
phi_init_option = 'random_identity'
# constrain LR parameter
kernel_constraint_option = 'None'
# dropout rate
dropout_rate = 0.3
learning_rate = 0.001
bag  = False
normalised = True
max_or_combine = True

np.random.seed(10)

embeddings_layer = get_embeddings_model(embedding_matrix=data.embedding_matrix)
# create model
crim_model = get_CRIM_model(phi_k = phi_k, train_embeddings = train_embeddings,
                            embeddings_dim = data.embeddings_dim, 
                            #vocab_size = len(data.tokenizer.word_counts), embedding = data.embedding_matrix,
                            embeddings_layer = embeddings_layer,
                            phi_init = phi_init_options[phi_init_option],                            
                            sigmoid_kernel_regularisation = None, #l2(0.001),
                            sigmoid_bias_regularisation = None, #l2(0.001),
                            sigmoid_kernel_constraint = kernel_constraints[kernel_constraint_option],
                            dropout_rate = dropout_rate,
                            learning_rate = learning_rate,
                            normalised = normalised,
                            max_or_combine = max_or_combine
                           )

crim_model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Hyponym (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
Hypernym (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
model_4 (Model)                 [(None, 1, 200), (No 43904800    Hyponym[0][0]                    
                                                                 Hypernym[0][0]                   
__________________________________________________________________________________________________
Dropout_Hypo (Dropout)          (None, 1, 200)       0           model_4[1][0]                    
__________

In [None]:
# plot model
from keras.utils.vis_utils import plot_model
#from tensorflow.keras.utils import plot_model

plot_model(crim_model, to_file='CRIM_alternate_model.png', show_shapes=True, show_layer_names=True)

In [30]:
import math

predict = True

print ('Epochs: ', epochs, 'Batch size: ', batch_size, 'm: ', m, 'pki_k: ', phi_k, 'train_embeddings: ', train_embeddings,
      'Negative sampling: ', negative_option, 'Phi Init: ', phi_init_option, 'Dropout rate: ', dropout_rate, 
      'Kernel constraint: ', kernel_constraint_option, 'Learning rate: ', learning_rate, 
      'Bagging:', bag, 'Normalised: ', normalised, 'max_or_combine: ', max_or_combine)

train(crim_model, epochs, batch_size, m, data, neg_sampling_options[negative_option], crim_max_get_hypernym)

# evaluate
if predict:
    print ("Generating predictions...")    
    crim_predictions = predict_crim_hypernyms(data.test_query, data.tokenizer, crim_model, crim_max_get_hypernym)

    print ("CRIM evaluation:")
    score_names, all_scores = get_evaluation_scores((data.test_query, data.test_hyper), crim_predictions)
    for k in range(len(score_names)):
        print (score_names[k]+': '+str(round(sum([score_list[k] for score_list in all_scores]) / len(all_scores), 5)))



('Epochs: ', 6, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 1, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None', 'Learning rate: ', 0.001, 'Bagging:', False, 'Normalised: ', True, 'max_or_combine: ', True)
('Epoch:', 1, 'Loss:', 220.35547143220901, 'Test Loss:', 0.9784799, 'MRR:', 0.0, 'Test accuracy:', 0.0)
('Epoch:', 2, 'Loss:', 169.29875203967094, 'Test Loss:', 1.2827737, 'MRR:', 0.0, 'Test accuracy:', 0.0)
('Epoch:', 3, 'Loss:', 135.76979410648346, 'Test Loss:', 1.579487, 'MRR:', 0.00333, 'Test accuracy:', 0.0)
('Epoch:', 4, 'Loss:', 115.8390389084816, 'Test Loss:', 1.7987796, 'MRR:', 0.02833, 'Test accuracy:', 0.0)
('Epoch:', 5, 'Loss:', 105.12239652872086, 'Test Loss:', 1.8679018, 'MRR:', 0.1, 'Test accuracy:', 0.0)
('Epoch:', 6, 'Loss:', 99.66103854775429, 'Test Loss:', 1.8479438, 'MRR:', 0.13, 'Test accuracy:', 0.0)
Generating predictions...
('Done', 100)
('Done', 200)
('Done', 300)

In [31]:
#crim_model.get_layer(name='DotProduct').get_config()
print len([k for k, v in crim_predictions.items() if k in v])

#[k for k, v in crim_predictions.items() if k in v]


crim_predictions



137


{u'scouter': [u'sports_organization',
  u'sports',
  u'athlete',
  u'sport',
  u'roster',
  u'professional_sports',
  u'basketball',
  u'team_sport',
  u'professional_football',
  u'wheelchair_basketball',
  u'sportsmanship',
  u'athletic_competition',
  u'soccer',
  u'sporting',
  u'football'],
 u'gatekeeper': [u'computer_system',
  u'computer_network',
  u'local_area_network',
  u'service_provider',
  u'personal_computer',
  u'application_software',
  u'telecommunication',
  u'software',
  u'computer',
  u'telecommunications_network',
  u'software_application',
  u'data_communication',
  u'applications_software',
  u'end-user',
  u'services'],
 u'mackerel': [u'edible',
  u'fish',
  u'shellfish',
  u'plant_part',
  u'food_product',
  u'molluscan',
  u'algae',
  u'freshwater_fish',
  u'plant_food',
  u'brine_shrimp',
  u'plant_material',
  u'oysters',
  u'fish_species',
  u'plankton',
  u'bivalve'],
 u'prefix': [u'application_software',
  u'input/output',
  u'software_application',
  u

In [53]:
hypo_seq = data.tokenizer.texts_to_sequences(data.train_query)
hyper_seq = data.tokenizer.texts_to_sequences(data.train_hyper)

train_pred_1 = crim_model.predict([hypo_seq, hyper_seq]).flatten()

In [57]:
train_pred_2 = crim_model.predict([hypo_seq, hyper_seq]).flatten()

In [66]:
print Counter(train_pred_2 > train_pred_1)


train_pred_2[100], train_pred_1[100]
len(train_pred_1)


Counter({True: 7162, False: 4617})


11779

## Start second phase of training

In [32]:
# run further training if required
m = 10
negative_option = 'random'
epochs = 2
batch_size = 32
dropout_rate = 0.3
learning_rate = 0.0005

predict = True
resume = False

if resume == False:        
    print "Create transfer learning model"
    crim_model_2 = get_CRIM_model_freeze_phi(crim_model,
                                             train_phi = False,
                                             train_embeddings = True,
                                             dropout_rate = dropout_rate,
                                             learning_rate = learning_rate)                                             

print ('Epochs: ', epochs, 'Batch size: ', batch_size, 'm: ', m, 'pki_k: ', phi_k, 'train_embeddings: ', train_embeddings,
      'Negative sampling: ', negative_option, 'Phi Init: ', phi_init_option, 'Dropout rate: ', dropout_rate, 
      'Kernel constraint: ', kernel_constraint_option, 'Learning rate: ', learning_rate, 'Bag:', bag)

train(crim_model_2, epochs, batch_size, m, data, neg_sampling_options[negative_option], crim_max_get_hypernym, bag=bag)
if predict:
    print ("Generating predictions...")
    crim_predictions_2 = predict_crim_hypernyms(data.test_query, data.tokenizer, crim_model_2, crim_max_get_hypernym)

    print ("CRIM evaluation:")
    score_names, all_scores = get_evaluation_scores((data.test_query, data.test_hyper), crim_predictions_2)
    for k in range(len(score_names)):
        print (score_names[k]+': '+str(round(sum([score_list[k] for score_list in all_scores]) / len(all_scores), 5)))



Create transfer learning model
('Epochs: ', 2, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 1, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None', 'Learning rate: ', 0.0005, 'Bag:', False)
('Epoch:', 1, 'Loss:', 91.90591688454151, 'Test Loss:', 1.619671, 'MRR:', 0.244, 'Test accuracy:', 0.0)
('Epoch:', 2, 'Loss:', 79.96274498105049, 'Test Loss:', 1.3946604, 'MRR:', 0.26389, 'Test accuracy:', 0.03)
Generating predictions...
('Done', 100)
('Done', 200)
('Done', 300)
('Done', 400)
('Done', 500)
('Done', 600)
('Done', 700)
('Done', 800)
('Done', 900)
('Done', 1000)
('Done', 1100)
('Done', 1200)
('Done', 1300)
('Done', 1400)
CRIM evaluation:
MRR: 0.30296
MAP: 0.14996
P@1: 0.26217
P@5: 0.14275
P@10: 0.13407


## Threw away affine layer weights, locked down on phi weights
('Epochs: ', 2, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 10, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None', 'Learning rate: ', 0.0005)

/home/jfarrugia/venv/local/lib/python2.7/site-packages/tensorflow/python/ops/gradients_impl.py:108: UserWarning: Converting sparse IndexedSlices to a dense Tensor of unknown shape. This may consume a large amount of memory.
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "

('Epoch:', 1, 'Loss:', 232.49733012914658, 'Test Loss:', 0.8287846446037292, 'MRR:', 0.09667)
('Epoch:', 2, 'Loss:', 199.24632835388184, 'Test Loss:', 0.9277108907699585, 'MRR:', 0.1882)

MRR: 0.27108
MAP: 0.13106
P@1: 0.23215
P@5: 0.12406
P@10: 0.117



## Third phase of training?

In [50]:
# run further training if required
m = 10
negative_option = 'random'
epochs = 5
batch_size = 1
dropout_rate = 0.3
learning_rate = 0.0001
train_embeddings = False
train_phi = True
predict = True

# transfer weights to new model
crim_model_3 = get_CRIM_model_freeze_phi(crim_model_2,
                                             train_phi = train_phi,
                                             train_embeddings = train_embeddings,
                                             dropout_rate = dropout_rate,
                                             learning_rate = learning_rate)

crim_model_3.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
Hyponym (InputLayer)            (None, 1)            0                                            
__________________________________________________________________________________________________
Hypernym (InputLayer)           (None, 1)            0                                            
__________________________________________________________________________________________________
model_6 (Model)                 [(None, 1, 200), (No 43904800    Hyponym[0][0]                    
                                                                 Hypernym[0][0]                   
__________________________________________________________________________________________________
Dropout_Hypo (Dropout)          (None, 1, 200)       0           model_6[1][0]                    
__________

In [51]:
print ('Epochs: ', epochs, 'Batch size: ', batch_size, 'm: ', m, 'pki_k: ', phi_k, 'train_embeddings: ', train_embeddings,
      'Negative sampling: ', negative_option, 'Phi Init: ', phi_init_option, 'Dropout rate: ', dropout_rate, 
      'Kernel constraint: ', kernel_constraint_option, 'Learning rate: ', learning_rate)

train(crim_model_3, epochs, batch_size, m, data, neg_sampling_options[negative_option], crim_max_get_hypernym)
if predict:
    print ("Generating predictions...")
    crim_predictions_3 = predict_crim_hypernyms(data.test_query, data.tokenizer, crim_model_3, crim_max_get_hypernym)

    print ("CRIM evaluation:")
    score_names, all_scores = get_evaluation_scores((data.test_query, data.test_hyper), crim_predictions_3)
    for k in range(len(score_names)):
        print (score_names[k]+': '+str(round(sum([score_list[k] for score_list in all_scores]) / len(all_scores), 5)))


('Epochs: ', 5, 'Batch size: ', 1, 'm: ', 10, 'pki_k: ', 1, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None', 'Learning rate: ', 0.0001)
('Epoch:', 1, 'Loss:', 2646.805278584361, 'Test Loss:', 1.389255, 'MRR:', 0.16967, 'Test accuracy:', 0.05)
('Epoch:', 2, 'Loss:', 2418.4388537034392, 'Test Loss:', 1.2388388, 'MRR:', 0.20389, 'Test accuracy:', 0.225)
('Epoch:', 3, 'Loss:', 2214.004394464195, 'Test Loss:', 1.1100814, 'MRR:', 0.17433, 'Test accuracy:', 0.33)
('Epoch:', 4, 'Loss:', 2037.8692606203258, 'Test Loss:', 1.0190609, 'MRR:', 0.18, 'Test accuracy:', 0.4)
('Epoch:', 5, 'Loss:', 1882.0565821416676, 'Test Loss:', 0.94320244, 'MRR:', 0.18367, 'Test accuracy:', 0.475)
Generating predictions...
('Done', 100)
('Done', 200)
('Done', 300)
('Done', 400)
('Done', 500)
('Done', 600)
('Done', 700)
('Done', 800)
('Done', 900)
('Done', 1000)
('Done', 1100)
('Done', 1200)
('Done', 1300)
('Done', 14

In [109]:
for q, h in zip(data.train_query, data.train_hyper):
    hyp_prob = crim_model_2.predict([[data.tokenizer.word_index[q]], [data.tokenizer.word_index[h]] ])
    print q, h, hyp_prob

blackfly homopterous_insect [[0.13822134]]
blackfly insect [[0.31352118]]
turonian technical_specification [[0.23077463]]
turonian geologic_timescale [[0.17301801]]
turonian physical_property [[0.24317496]]
turonian geological_period [[0.22001244]]
turonian magnitude [[0.25076795]]
turonian unit_of_time [[0.19958243]]
turonian geological_time [[0.29483446]]
turonian geologic_time [[0.3147337]]
abhorrence distaste [[0.26146114]]
abhorrence hatred [[0.301238]]
abhorrence hate [[0.23270667]]
abhorrence disgust [[0.25828835]]
tropical_storm atmosphere [[0.30975243]]
tropical_storm windstorm [[0.3356372]]
tropical_storm violent_storm [[0.3436602]]
tropical_storm air_current [[0.27681932]]
tropical_storm atmospheric_state [[0.28246477]]
tropical_storm density [[0.16726443]]
tropical_storm current_of_air [[0.28305537]]
tropical_storm storm_damage [[0.2825659]]
tropical_storm atmospheric_phenomenon [[0.3401734]]
tropical_storm storm [[0.3732807]]
tropical_storm cyclone [[0.32870743]]
tropical_

murder_one motion_picture [[0.3930161]]
murder_one picture_show [[0.33170384]]
emmett_tyrrell newspaper_columnist [[0.2642208]]
emmett_tyrrell writer [[0.41461605]]
emmett_tyrrell person [[0.50902945]]
emmett_tyrrell columnist [[0.25528604]]
emmett_tyrrell editorialist [[0.20438492]]
emmett_tyrrell media_professional [[0.29104128]]
emmett_tyrrell journalist [[0.31737348]]
failure_rate rate [[0.20301421]]
failure_rate ratio [[0.18220463]]
kim_il-sung_university ceremony [[0.19561918]]
kim_il-sung_university school [[0.29930297]]
kim_il-sung_university academy [[0.2289273]]
kim_il-sung_university university [[0.27446017]]
kim_il-sung_university educational_institution [[0.32194445]]
kim_il-sung_university ceremonial_occasion [[0.14651176]]
chess party_game [[0.269398]]
chess parlor_game [[0.19867644]]
chess board_game [[0.31289226]]
chess parlour_game [[0.12833712]]
sachs_harbour city [[0.32362217]]
sachs_harbour hamlet [[0.15060624]]
myrdal economist [[0.18119095]]
myrdal politician [[0

spinal_fusion life_science [[0.18817279]]
spinal_fusion orthopedic_surgery [[0.32129806]]
spinal_fusion md [[0.27630508]]
spinal_fusion orthopaedics [[0.2974583]]
spinal_fusion orthopedist [[0.31243858]]
spinal_fusion doctor [[0.35547385]]
spinal_fusion medical_doctor [[0.33681884]]
spinal_fusion surgical_operation [[0.21601234]]
spinal_fusion medical_procedure [[0.31789145]]
spinal_fusion orthopaedist [[0.2543239]]
spinal_fusion orthopedics [[0.33698484]]
spinal_fusion specialist_degree [[0.20291413]]
spinal_fusion treatment [[0.28592062]]
spinal_fusion dr. [[0.28194568]]
spinal_fusion medical_care [[0.3435534]]
spinal_fusion practice_of_medicine [[0.34872663]]
spinal_fusion surgical_process [[0.20340286]]
spinal_fusion medical_specialist [[0.31877825]]
trust indicator [[0.16326107]]
trust partnership [[0.3667279]]
trust private_corporation [[0.29238474]]
trust corporation [[0.402647]]
trust company [[0.36188436]]
trust syndicate [[0.22269708]]
trust comradeship [[0.14144793]]
trust c

vendue sale [[0.30768088]]
vendue seller [[0.31579652]]
vendue salesman [[0.2374609]]
vendue selling_technique [[0.21348847]]
leaf_curl illness [[0.28790316]]
leaf_curl plant_disease [[0.30424783]]
leaf_curl sickness [[0.23860763]]
leaf_curl disease [[0.32648575]]
leaf_curl phytopathology [[0.17992261]]
leaf_curl plant_pathology [[0.25151938]]
leaf_curl phytopathogen [[0.1292862]]
leaf_curl disorder [[0.231635]]
crisis difficulty [[0.17936124]]
crisis state [[0.3365583]]
curiosa medium [[0.22006471]]
curiosa book [[0.262291]]
curiosa piece_of_work [[0.28577852]]
curiosa communication_medium [[0.3355832]]
rainfall phenomenon [[0.28784811]]
rainfall downfall [[0.09283576]]
rainfall optical_phenomenon [[0.14846677]]
rainfall natural_phenomenon [[0.36858648]]
rainfall atmospheric_precipitation [[0.21055134]]
rainfall weather [[0.36159596]]
rainfall physical_phenomenon [[0.33717924]]
rainfall weather_condition [[0.3548077]]
rainfall precipitation [[0.31402424]]
endemic_goitre disease [[0.32

course mathematical_process [[0.21270032]]
course route [[0.20126326]]
course steering [[0.16490266]]
course piloting [[0.22015883]]
escutcheon plate [[0.1538961]]
escutcheon personal_armor [[0.19113107]]
escutcheon suit_of_armour [[0.16316023]]
escutcheon armour [[0.1722294]]
escutcheon protection [[0.15596355]]
escutcheon protective_cover [[0.16576788]]
escutcheon suit_of_armor [[0.17236999]]
escutcheon protective_covering [[0.17377162]]
escutcheon body_armour [[0.19911815]]
escutcheon body_armor [[0.21284625]]
suspension supporting [[0.25012743]]
suspension mixture [[0.09946257]]
suspension technical_specification [[0.3200718]]
suspension abstract_object [[0.19791186]]
suspension support [[0.2382636]]
suspension thermodynamic_system [[0.1325794]]
suspension mechanism [[0.30134764]]
suspension state [[0.26529709]]
suspension mechanical_assembly [[0.19331318]]
suspension physical_system [[0.23167461]]
suspension inaction [[0.21673264]]
suspension pure_mathematics [[0.14825869]]
suspen

nuclear_war military_action [[0.27704483]]
nuclear_war war [[0.31941605]]
world serial_publication [[0.17171936]]
world cerebration [[0.09349024]]
world curiosity [[0.14345887]]
world wonder [[0.16217478]]
solent recess [[0.11292539]]
solent world_ocean [[0.20022]]
solent strait [[0.18661803]]
solent waterbody [[0.31075904]]
solent sea [[0.2740468]]
solent stream_channel [[0.2263693]]
solent waterway [[0.32879427]]
solent body_of_water [[0.36398074]]
martin_brest film_director [[0.3532245]]
martin_brest television_producer [[0.34505266]]
martin_brest producer [[0.26831603]]
martin_brest photography [[0.23258936]]
martin_brest filmmaker [[0.37060457]]
martin_brest film_maker [[0.3308839]]
martin_brest visual_art [[0.29877332]]
martin_brest movie_maker [[0.24792656]]
particle part_of_speech [[0.19271263]]
particle grammatical_category [[0.22089933]]
verbal_abuse offence [[0.27898175]]
verbal_abuse abuse [[0.29303655]]
verbal_abuse offensive_activity [[0.16063994]]
verbal_abuse ill-usage 

udp_lite rule [[0.19186546]]
udp_lite standard [[0.24690369]]
udp_lite regulation [[0.13867837]]
udp_lite data_file [[0.2836678]]
udp_lite communications_protocol [[0.29385504]]
udp_lite measure [[0.21537827]]
udp_lite text_file [[0.27718303]]
udp_lite guideline [[0.20946288]]
under_the_moon work_of_art [[0.3432211]]
under_the_moon tv_program [[0.3402193]]
under_the_moon television_series [[0.3379205]]
under_the_moon series [[0.20746408]]
under_the_moon television_program [[0.343409]]
under_the_moon broadcast [[0.25707522]]
under_the_moon tv_show [[0.35745886]]
under_the_moon television_show [[0.35489064]]
under_the_moon tv_series [[0.32847422]]
andrew_anderson corporate_executive [[0.26310658]]
andrew_anderson person [[0.5388877]]
andrew_anderson juridical_person [[0.25749913]]
andrew_anderson executive_director [[0.25127816]]
andrew_anderson c-suite [[0.18128443]]
andrew_anderson political_leader [[0.32033718]]
andrew_anderson business_executive [[0.28774515]]
andrew_anderson decisio

fence merchant [[0.16859809]]
fence obstruction [[0.17864501]]
zero grammatical_category [[0.21111646]]
zero decimal_digit [[0.23588908]]
zero imaginary [[0.16378802]]
zero natural_number [[0.20409343]]
zero complex_quantity [[0.1958786]]
zero complex_number [[0.22275358]]
zero numerical_quantity [[0.20195447]]
zero imaginary_number [[0.19536412]]
zero counting_number [[0.27326632]]
zero natural_numbers [[0.21012805]]
zero whole_number [[0.15368357]]
subspecies population [[0.20243232]]
gerry_davis competitor [[0.30742404]]
gerry_davis sport [[0.2965371]]
gerry_davis baseball_player [[0.22036032]]
investment possession [[0.1632455]]
investment financing [[0.31474382]]
investment tegument [[0.09607606]]
investment finance [[0.31894433]]
investment protection [[0.20270817]]
investment connective_tissue [[0.09542301]]
investment assets [[0.3592923]]
birdwatching zoology [[0.23650427]]
birdwatching observation [[0.18865477]]
birdwatching zoological_science [[0.16647753]]
golden_gate_park p

jerkin jacket [[0.27980044]]
jerkin outer_garment [[0.27189332]]
jerkin clothing [[0.37901464]]
jerkin clothes [[0.362608]]
cowpox illness [[0.36879572]]
cowpox lesion [[0.21872726]]
cowpox pox [[0.2523112]]
cowpox sickness [[0.29759386]]
cowpox skin_disorder [[0.24915615]]
cowpox excoriation [[0.177257]]
cowpox infection [[0.30997097]]
cowpox skin_disease [[0.2779071]]
cowpox acne_vulgaris [[0.19827881]]
cowpox bleb [[0.13527697]]
cowpox pustule [[0.16406827]]
cowpox blister [[0.19245882]]
cowpox contagion [[0.17466708]]
cowpox cutaneous_disease [[0.19456004]]
cowpox communicable_disease [[0.25508094]]
cowpox pathological_state [[0.24558325]]
cowpox disease [[0.39467588]]
cowpox scrape [[0.12971234]]
cowpox inflammatory_disease [[0.26005256]]
cowpox biological_group [[0.13982673]]
cowpox disorder [[0.34097084]]
supermajority relative_majority [[0.23100458]]
supermajority pure_mathematics [[0.17447153]]
supermajority area_of_mathematics [[0.18328014]]
interception interference [[0.1473

coalition political_party [[0.28822863]]
coalition cite [[0.18671861]]
coalition covenant [[0.16263789]]
coalition government [[0.36705458]]
coalition political_coalition [[0.21278103]]
coalition planning_board [[0.23866579]]
coalition mention [[0.16334428]]
coalition unification [[0.19226725]]
coalition jointure [[0.10842416]]
coalition annotation [[0.13718162]]
coalition point_of_reference [[0.20603675]]
coalition treaty [[0.2288974]]
coalition credit [[0.21857578]]
coalition political_organisation [[0.27365544]]
hamlet novel_adaptation [[0.16538778]]
hamlet performing_arts [[0.28589216]]
hamlet musical_composition [[0.2841703]]
hamlet music_group [[0.21077]]
hamlet imaginary_being [[0.14717735]]
hamlet dramatic_art [[0.29676005]]
hamlet moving-picture_show [[0.15029663]]
hamlet fine_art [[0.25194183]]
hamlet film [[0.35906196]]
hamlet brand_image [[0.17415963]]
hamlet literary_adaptation [[0.17668301]]
hamlet theatre [[0.3309224]]
hamlet movie [[0.35216522]]
hamlet rock_group [[0.26

mexico narrative [[0.20850582]]
mexico federal_district [[0.19054858]]
mexico knowledge_organization [[0.22442126]]
register written_account [[0.16423315]]
register component [[0.28485626]]
register evidence [[0.16182981]]
register add-in [[0.25989056]]
register written_record [[0.22638516]]
register electronic_component [[0.24676487]]
register plug-in [[0.23808686]]
register technology [[0.3446803]]
register card [[0.21137296]]
register point_of_reference [[0.21903062]]
register pc_board [[0.19149798]]
register write-up [[0.2380838]]
register controller [[0.23146851]]
register electronic_equipment [[0.27458736]]
register indication [[0.18320067]]
register circuit_card [[0.21383028]]
register circuit [[0.1870842]]
register written_document [[0.31821188]]
register computer_chip [[0.19771458]]
register scientific_evidence [[0.15941544]]
flagship_studios software_company [[0.32397184]]
flagship_studios company [[0.38361073]]
flagship_studios video_game_developer [[0.2111015]]
flagship_stu

bobby_williams jock [[0.24615344]]
bobby_williams football_club [[0.22650771]]
bobby_williams sports_organization [[0.28916582]]
bobby_williams person [[0.448712]]
bobby_williams handler [[0.13865156]]
bobby_williams national_sports_team [[0.1646423]]
bobby_williams team [[0.28117952]]
bobby_williams sports_club [[0.26390937]]
bobby_williams footballer [[0.27778465]]
bobby_williams leader [[0.34114453]]
bobby_williams sportsperson [[0.34443736]]
avenue roadway [[0.3267094]]
avenue freeway [[0.34810138]]
avenue thruway [[0.2964258]]
avenue street [[0.33144408]]
avenue way [[0.20074488]]
avenue route [[0.30133197]]
avenue main_road [[0.34419268]]
avenue motorway [[0.3218986]]
avenue fund [[0.2205655]]
avenue state_highway [[0.33481815]]
avenue physical_system [[0.14329892]]
avenue superhighway [[0.29823676]]
avenue controlled-access_highway [[0.2696405]]
avenue expressway [[0.404]]
avenue store [[0.1974264]]
texas map [[0.2547957]]
texas public_presentation [[0.2375652]]
texas musical_co

union_station public_building [[0.39182687]]
union_station place_of_business [[0.2592685]]
union_station outlet [[0.18659094]]
union_station metro_station [[0.32768732]]
union_station tract [[0.19321603]]
union_station services [[0.22401349]]
union_station railway_station [[0.3540012]]
union_station shopping_precinct [[0.26365188]]
union_station shopping_mall [[0.35366255]]
union_station protected_area [[0.20533723]]
union_station center [[0.22923557]]
union_station restaurant [[0.25234416]]
union_station shopping_center [[0.37369466]]
union_station mercantile_establishment [[0.19377935]]
union_station parcel_of_land [[0.36297545]]
union_station mall [[0.35165137]]
union_station train_depot [[0.3152022]]
union_station motion_picture [[0.17169839]]
union_station picture_show [[0.17015071]]
capability susceptibility [[0.13618708]]
capability state [[0.2536326]]
capability technical_specification [[0.34622753]]
transmission gear_mechanism [[0.09886387]]
transmission sending [[0.16829152]]

kidney_transplantation surgery [[0.27371165]]
kidney_transplantation transplantation [[0.26449746]]
kidney_transplantation technology [[0.20416385]]
kidney_transplantation practice_of_medicine [[0.2897065]]
kidney_transplantation transplant [[0.2501327]]
lipstick make-up [[0.22728287]]
lipstick makeup [[0.26135546]]
lipstick cosmetic [[0.21873993]]
lipstick cosmetics [[0.25361314]]
ulnar_artery blood_supply [[0.26818186]]
ulnar_artery bodily_structure [[0.18337967]]
ulnar_artery closed_circulatory_system [[0.14403574]]
ulnar_artery tube [[0.22246057]]
ulnar_artery blood_circulation [[0.2646604]]
ulnar_artery body_structure [[0.242256]]
ulnar_artery anatomical_structure [[0.22468625]]
ulnar_artery cardiovascular_system [[0.27676427]]
ulnar_artery body_part [[0.30982593]]
ulnar_artery vasculature [[0.21737312]]
ulnar_artery artery [[0.24154045]]
ulnar_artery bloodstream [[0.22539619]]
ulnar_artery vascular_system [[0.23087415]]
hill mountain [[0.28240895]]
hill cartography [[0.20753418]]

hero_of_alexandria video_game [[0.24306436]]
hero_of_alexandria computer_software [[0.39423952]]
space_research scientific_research [[0.27662805]]
space_research methodology [[0.23362605]]
space_research scientific_method [[0.23018712]]
protist microorganism [[0.2551857]]
protist micro-organism [[0.25853762]]
rat-catcher working_man [[0.17063113]]
rat-catcher workingman [[0.16721392]]
rat-catcher workman [[0.18311824]]
rat-catcher worker [[0.2381575]]
rat-catcher employ [[0.17370524]]
rat-catcher person [[0.47918156]]
rat-catcher employee [[0.25708276]]
rat-catcher working_person [[0.21398552]]
rat-catcher quotation [[0.17125611]]
rat-catcher grammatical_relation [[0.10579493]]
rat-catcher technical_specification [[0.19960272]]
retainer working_man [[0.1716094]]
retainer workingman [[0.16353782]]
retainer fixed_costs [[0.21022773]]
retainer worker [[0.24133313]]
retainer retail_price [[0.21230423]]
retainer cost [[0.29390776]]
retainer labourer [[0.18192258]]
retainer disbursement [[0.

dulcimer string_instrument [[0.26745212]]
dulcimer musical_instrument [[0.3292014]]
dulcimer plucked_string_instrument [[0.16936626]]
turrican pc_game [[0.3161112]]
turrican computing_system [[0.34789047]]
turrican software_application [[0.4317551]]
turrican electronic_game [[0.2926231]]
turrican application_program [[0.34417218]]
turrican applications_programme [[0.24290591]]
turrican computer [[0.40997306]]
turrican platform [[0.239716]]
turrican computing_machine [[0.28507698]]
turrican computing_platform [[0.33970755]]
turrican electrical_load [[0.16018687]]
turrican computer_code [[0.38128254]]
turrican application_software [[0.40153456]]
turrican computer_game [[0.34272808]]
turrican software_package [[0.43134013]]
turrican video_game_console [[0.30191687]]
turrican game_console [[0.3310398]]
turrican coding_system [[0.29588053]]
turrican videogaming [[0.22853512]]
turrican application [[0.28281224]]
turrican applications_software [[0.39275944]]
turrican games_console [[0.2936405

chairman boss [[0.26769784]]
chairman person [[0.46065092]]
chairman chief [[0.34976387]]
chairman decision_maker [[0.2960842]]
chairman constituent [[0.19258453]]
chairman corporate_title [[0.20533447]]
chairman senior_management [[0.31491318]]
chairman leader [[0.38775486]]
lisa_see person [[0.49372518]]
lisa_see writer [[0.41018194]]
bottled_water drinkable [[0.26789725]]
bottled_water state_of_matter [[0.19796294]]
bottled_water drink [[0.23645698]]
bottled_water fluid [[0.25975218]]
bottled_water drinking_water [[0.24027798]]
bottled_water h2o [[0.20783117]]
bottled_water beverage [[0.2645137]]
bottled_water potable [[0.26003954]]
bottled_water liquid_state [[0.22772823]]
bottled_water liquidity [[0.20301668]]
supervolcano extinct_volcano [[0.20927176]]
supervolcano mountain [[0.23374957]]
supervolcano harm [[0.15507323]]
supervolcano injury [[0.12312455]]
supervolcano toilet [[0.11038677]]
supervolcano mount [[0.18332618]]
supervolcano terrain [[0.24874203]]
supervolcano hurt [[0

cistron biological_sequence [[0.18415879]]
distributer supplier [[0.26410127]]
distributer electrical_device [[0.18453251]]
distributer provider [[0.26027766]]
shepherd working_man [[0.20507309]]
shepherd worker [[0.21289457]]
shepherd herder [[0.14738256]]
shepherd laborer [[0.24967909]]
shepherd clergyman [[0.23265725]]
shepherd hired_man [[0.18151312]]
shepherd workman [[0.19562252]]
shepherd person [[0.4657208]]
shepherd priest [[0.21714234]]
shepherd employee [[0.22645515]]
shepherd working_person [[0.21481124]]
fasces symbolization [[0.22748767]]
fasces representational_process [[0.19342056]]
fasces symbolic_representation [[0.25746062]]
fasces allegory [[0.1849366]]
hit decease [[0.10021689]]
hit technical_specification [[0.22489807]]
hit specifications [[0.18329957]]
hit killer [[0.18936567]]
hit criminal_offence [[0.2142055]]
hit killing [[0.1649277]]
hit kill [[0.13696869]]
hit wrongful_act [[0.20904742]]
hit death [[0.19490677]]
hit cause_of_death [[0.187877]]
hit actus_reus

jammies clothes [[0.32055157]]
marriage point_of_reference [[0.22214937]]
marriage function [[0.20593704]]
marriage tribe [[0.18916261]]
marriage union [[0.17198232]]
marriage agreement [[0.24801162]]
marriage understanding [[0.22026657]]
marriage note [[0.18862261]]
marriage family_unit [[0.2628169]]
marriage written_agreement [[0.2601045]]
marriage kin [[0.16175337]]
marriage reference_point [[0.10987184]]
marriage annotation [[0.15130989]]
kovel locale [[0.2534753]]
kovel city [[0.38330773]]
isaiah saint [[0.18564889]]
isaiah sacred_writing [[0.22735125]]
isaiah prophet [[0.26700392]]
isaiah religious_writing [[0.21617396]]
isaiah scripture [[0.28240237]]
isaiah person [[0.37469247]]
isaiah holy_man [[0.20923822]]
isaiah book [[0.23851356]]
isaiah sacred_scripture [[0.27357876]]
isaiah sacred_text [[0.29042944]]
isaiah religious_person [[0.28413007]]
isaiah religious_text [[0.24648257]]
isaiah holy_person [[0.24088766]]
archduke affix [[0.16456091]]
archduke title [[0.30806863]]
arc

pocketbook soft-cover_book [[0.1002589]]
pocketbook paperback_book [[0.1670013]]
pocketbook storage_medium [[0.24550939]]
pocketbook storage [[0.22185943]]
pocketbook book [[0.17909607]]
pocketbook softback_book [[0.09396712]]
dub sound [[0.22467375]]
dub sense_experience [[0.16804652]]
dub sensation [[0.18144119]]
dub sense_datum [[0.12168393]]
dub sense_impression [[0.19314305]]
dub auditory_sensation [[0.2022694]]
strategy planned_language [[0.20297077]]
strategy software [[0.43304902]]
strategy utility_program [[0.2788899]]
strategy software_program [[0.40028462]]
strategy operating_system [[0.2800746]]
strategy database [[0.3281269]]
strategy programming [[0.2890061]]
strategy service_program [[0.21851456]]
strategy software_package [[0.40280378]]
strategy plan_of_action [[0.21946123]]
strategy system_software [[0.32745823]]
strategy computer_software [[0.43251082]]
strategy computer_programming [[0.28313175]]
strategy database_management_system [[0.31827345]]
strategy computer_pr

monty_python comedian [[0.28666878]]
monty_python musical_group [[0.30623865]]
monty_python musical_organization [[0.191902]]
monty_python music_group [[0.24229734]]
monty_python team [[0.17116006]]
fleur-de-lis heraldry [[0.25050357]]
fleur-de-lis motif [[0.23079626]]
fleur-de-lis figure [[0.26817587]]
fleur-de-lis picture [[0.30240718]]
fleur-de-lis image [[0.29759046]]
fleur-de-lis illustration [[0.28973776]]
fleur-de-lis emblem [[0.2822121]]
fleur-de-lis drawing [[0.2960652]]
fleur-de-lis icon [[0.32270342]]
escheat portion [[0.14076021]]
escheat transferred_property [[0.23668867]]
escheat assets [[0.31101266]]
century area_of_mathematics [[0.19684908]]
century metric [[0.18452246]]
century time_interval [[0.19820388]]
century time_unit [[0.19453181]]
century years [[0.17385864]]
century solar_year [[0.18472226]]
century si_base_unit [[0.1078776]]
century orbital_period [[0.15592049]]
century fundamental_measure [[0.1794457]]
century measure [[0.20089094]]
century period_of_time [[

nancy cartoon_character [[0.19956326]]
nancy funnies [[0.15208523]]
nancy sailboat [[0.13295662]]
nancy tv_program [[0.3487059]]
nancy television_series [[0.34535384]]
nancy movie [[0.34623674]]
nancy social_event [[0.29855183]]
nancy television_program [[0.36841092]]
nancy drama [[0.3030023]]
nancy person [[0.48263213]]
nancy tv_show [[0.32939819]]
nancy studio_album [[0.19173193]]
nancy tv_series [[0.29975545]]
nancy television_show [[0.3456241]]
nancy movie_maker [[0.21202113]]
bartlett plant [[0.2314571]]
bartlett green_goods [[0.12170913]]
bartlett pear [[0.12831642]]
bartlett edible_fruit [[0.12471958]]
bartlett woody_plant [[0.19388634]]
bartlett flowering_tree [[0.13566947]]
bartlett pear_tree [[0.12511656]]
phoebe_buffay fictitious_character [[0.20146029]]
phoebe_buffay fictional_character [[0.293512]]
phoebe_buffay person [[0.48691145]]
st._bernard_software company [[0.36953837]]
st._bernard_software venture [[0.24265444]]
st._bernard_software enterprise [[0.33671588]]
grande

porsche venture [[0.25380516]]
porsche concern [[0.17420928]]
internet computing_device [[0.33749315]]
internet data_processor [[0.26555464]]
internet information_processing_system [[0.31953412]]
internet communications_system [[0.3416672]]
internet software_component [[0.32721487]]
internet software_program [[0.41104263]]
internet telecom_system [[0.24100281]]
internet telecom_equipment [[0.28291425]]
internet computer [[0.3958781]]
internet server_system [[0.30452928]]
internet telecommunication_system [[0.31941417]]
internet software_engineering [[0.2821552]]
internet connexion [[0.12945187]]
internet electronic_computer [[0.26562673]]
internet electronic_network [[0.30534708]]
internet computer_programme [[0.29546592]]
internet code [[0.34093463]]
internet interconnection [[0.28694668]]
internet computer_code [[0.35600588]]
internet network [[0.35498962]]
internet service [[0.28926086]]
internet software_package [[0.41027087]]
internet network_host [[0.22513868]]
internet computer_

archaean era [[0.13459794]]
archaean geological_time [[0.30914637]]
archaean geologic_time [[0.32043752]]
terminology investigating [[0.22667067]]
terminology investigation [[0.22593679]]
terminology research [[0.28972623]]
qir city [[0.2891724]]
clipping decrease [[0.14466281]]
clipping reduction [[0.14407222]]
clipping extract [[0.15975341]]
clipping lessening [[0.08940285]]
ecology environment [[0.24744076]]
java written_language [[0.25049138]]
java software_program [[0.439231]]
java computer_software [[0.45134944]]
java song [[0.14132926]]
java software_framework [[0.2778509]]
java brand [[0.1750156]]
java music_group [[0.20821492]]
java legal_name [[0.18857111]]
java code [[0.33099058]]
java computer_program [[0.41754252]]
java computer_programme [[0.3435668]]
java communication_medium [[0.3363122]]
java software [[0.45357475]]
java computer_code [[0.39124218]]
java software_package [[0.431547]]
java island [[0.11171421]]
java coding_system [[0.30274373]]
java data-storage_medium 

lyceum educational_institution [[0.31701845]]
lyceum public_building [[0.31441155]]
lyceum hall [[0.21311997]]
bastinado righteousness [[0.21233189]]
bastinado torture [[0.2625676]]
bastinado maltreatment [[0.21433085]]
bastinado mistreatment [[0.24705124]]
bastinado corporal_punishment [[0.22630815]]
bastinado physical_punishment [[0.2149258]]
bastinado bludgeon [[0.19199413]]
bastinado penalisation [[0.18009898]]
bastinado penalty [[0.20269333]]
bastinado person [[0.3837561]]
bastinado penalization [[0.14307535]]
bastinado truncheon [[0.17451486]]
bastinado punishment [[0.2867285]]
bastinado torturing [[0.23108114]]
hurricane_rita windstorm [[0.36333033]]
hurricane_rita tropical_storm [[0.38827485]]
hurricane_rita violent_storm [[0.31205535]]
hurricane_rita atmospheric_state [[0.28101304]]
hurricane_rita weather [[0.38324383]]
hurricane_rita atmospheric_condition [[0.26786998]]
hurricane_rita natural_phenomenon [[0.31940955]]
hurricane_rita tropical_cyclone [[0.37779906]]
hurricane_r

palomino horse [[0.26284173]]
palomino hoofed_mammal [[0.12162719]]
como town [[0.40595713]]
como comune [[0.16589291]]
como city [[0.41835055]]
fishmeal feed [[0.24071673]]
fishmeal plant_food [[0.30828774]]
fishmeal organic_fertiliser [[0.247471]]
fishmeal chemical_fertiliser [[0.24427097]]
fishmeal fertilizer [[0.27531296]]
fishmeal organic_food [[0.24827492]]
umayyad_mosque house_of_god [[0.20384169]]
umayyad_mosque constructed_structure [[0.23152795]]
umayyad_mosque mosque [[0.25924677]]
umayyad_mosque public_building [[0.3685305]]
umayyad_mosque house_of_prayer [[0.21757989]]
umayyad_mosque house_of_worship [[0.28116894]]
umayyad_mosque place_of_worship [[0.31590578]]
mark_tully person [[0.52139914]]
revetment barrier [[0.20264573]]
revetment facing [[0.15471178]]
revetment obstruction [[0.16852674]]
revetment siding [[0.19132118]]
revetment protection [[0.17775097]]
revetment protective_covering [[0.15695556]]
revetment obstructor [[0.11556757]]
mycelium plant_structure [[0.2536

dorothea_puente conclusion [[0.19586623]]
alec_stewart sport [[0.38062307]]
alec_stewart competitor [[0.31625083]]
alec_stewart cricketer [[0.26931173]]
alec_stewart athlete [[0.39836884]]
alec_stewart person [[0.4519802]]
alec_stewart captain [[0.19398475]]
alec_stewart sportsperson [[0.36139822]]
alec_stewart jock [[0.25091195]]
insignia symbolization [[0.23455685]]
insignia symbolic_representation [[0.25798172]]
insignia emblem [[0.27340636]]
insignia symbolisation [[0.16602705]]
insignia representational_process [[0.18965809]]
sort planned_language [[0.2346633]]
sort artificial_language [[0.2021241]]
sort processing [[0.18564203]]
sort formal_language [[0.22982669]]
sort data_processing [[0.22304049]]
sort constructed_language [[0.18395191]]
sort programming_language [[0.22598857]]
hotel_europe public_building [[0.38096753]]
hotel_europe hotel [[0.26159436]]
hotel_europe constructed_structure [[0.19727764]]
hotel_europe hotel_industry [[0.22543074]]
sire_records record_label [[0.31

half-life_2 prefix [[0.18089956]]
half-life_2 knowledge_organization [[0.23834813]]
half-life_2 computer_program [[0.41157886]]
half-life_2 computing_machine [[0.2770456]]
half-life_2 software [[0.4440284]]
half-life_2 console [[0.23759584]]
half-life_2 service [[0.19819443]]
half-life_2 electrical_appliance [[0.1589738]]
half-life_2 video_game_console [[0.29274714]]
half-life_2 coding_system [[0.28908128]]
half-life_2 application [[0.26372007]]
half-life_2 book [[0.20489648]]
half-life_2 piece_of_work [[0.21310198]]
half-life_2 honorific [[0.12352133]]
half-life_2 server_system [[0.28124166]]
half-life_2 channel [[0.26037538]]
half-life_2 medium [[0.21250273]]
half-life_2 electrical_load [[0.16605283]]
half-life_2 action_game [[0.2140008]]
half-life_2 word_string [[0.17419362]]
half-life_2 software_application [[0.406553]]
half-life_2 storage_medium [[0.32223868]]
half-life_2 software_program [[0.43127865]]
half-life_2 volume [[0.16628063]]
half-life_2 host [[0.16773854]]
half-life_2 

republic_of_turkey locus [[0.17286234]]
republic_of_turkey devising [[0.19869809]]
republic_of_turkey making [[0.2086363]]
wave hair_styling [[0.1387902]]
wave tone [[0.13548678]]
wave technical_specification [[0.23608585]]
wave oscillation [[0.21715242]]
wave trend [[0.17395386]]
wave curve [[0.21517585]]
wave move [[0.19548173]]
wave hairstyle [[0.1419784]]
wave nonverbal_communication [[0.16664098]]
wave energy_unit [[0.1500338]]
wave shape [[0.24881224]]
wave growth [[0.14221807]]
wave physical_phenomenon [[0.34461764]]
wave emergence [[0.17014976]]
wave natural_phenomenon [[0.33490536]]
wave phenomenon [[0.32931545]]
wave hairdo [[0.12772398]]
wave form [[0.16002865]]
wave vibration [[0.24699223]]
wave outgrowth [[0.1583865]]
wave hair_style [[0.12604065]]
wave curved_shape [[0.14857027]]
wave motion [[0.22373343]]
wave atmospheric_phenomenon [[0.1992439]]
wave work_unit [[0.12676074]]
wave movement [[0.27116886]]
paul_mellars lecturer [[0.2576481]]
paul_mellars academician [[0.19

craig_anderson hockey_player [[0.23983118]]
craig_anderson sportsperson [[0.2885741]]
craig_anderson riding_horse [[0.12414161]]
craig_anderson series [[0.21629387]]
craig_anderson horseriding [[0.1205368]]
craig_anderson television_show [[0.3117486]]
craig_anderson goaltender [[0.16335891]]
craig_anderson statue [[0.17815207]]
craig_anderson contestant [[0.27682057]]
craig_anderson variable [[0.08637477]]
craig_anderson likeness [[0.16578838]]
craig_anderson olympic_sport [[0.2749966]]
craig_anderson sport [[0.33223936]]
craig_anderson type_of_sport [[0.23494995]]
craig_anderson communication_medium [[0.23620178]]
craig_anderson ice_hockey_player [[0.17904922]]
craig_anderson population [[0.1243894]]
craig_anderson visual_arts [[0.22371249]]
craig_anderson task [[0.19501142]]
craig_anderson tv_program [[0.30034193]]
craig_anderson television_series [[0.28705463]]
craig_anderson transmission_channel [[0.16419782]]
craig_anderson writer [[0.31699988]]
craig_anderson television_program [

wang_xin person [[0.510774]]
wang_xin olympic_sports [[0.31118077]]
wang_xin olympic_sport [[0.27736008]]
wang_xin sportswoman [[0.23875307]]
wang_xin sportsperson [[0.33018264]]
casino tabletop_game [[0.13309921]]
casino business_establishment [[0.268838]]
casino card_game [[0.18562105]]
casino gambling_den [[0.17885023]]
casino gambling_house [[0.17528526]]
casino gaming_house [[0.12796265]]
valley_girls episode [[0.18527749]]
derby hat [[0.13829215]]
bookkeeping accounting [[0.2860081]]
bookkeeping accountancy [[0.24223036]]
chai_tea liquid_state [[0.14727096]]
chai_tea drinkable [[0.23021138]]
chai_tea fluid [[0.20311661]]
chai_tea drink [[0.27223927]]
chai_tea beverage [[0.28139558]]
half-life data_processor [[0.24080426]]
half-life information_processing_system [[0.25965506]]
half-life pc_game [[0.26866782]]
half-life computing_system [[0.35923633]]
half-life adp_system [[0.24196467]]
half-life pic [[0.20520025]]
half-life home_appliance [[0.19469197]]
half-life applications_prog

pricewaterhousecoopers firm [[0.32537457]]
pricewaterhousecoopers corporation [[0.4112811]]
pricewaterhousecoopers company [[0.41789588]]
pricewaterhousecoopers partnership [[0.3607353]]
pricewaterhousecoopers venture [[0.26739338]]
filial_piety moral_philosophy [[0.24367754]]
filial_piety philosophy [[0.2780583]]
filial_piety ideal [[0.15766078]]
filial_piety virtue [[0.2585162]]
filial_piety chastity [[0.1987339]]
battersea_arts_centre public_building [[0.32339802]]
battersea_arts_centre theater [[0.27142215]]
battersea_arts_centre constructed_structure [[0.16958079]]
battersea_arts_centre theatre [[0.2810721]]
saqqaq locale [[0.25224382]]
saqqaq city [[0.2514091]]
john_heydon attorney [[0.22558753]]
john_heydon upper_class [[0.16145538]]
john_heydon legal_expert [[0.23916519]]
john_heydon lawyer [[0.2296166]]
john_heydon jurisconsult [[0.18022908]]
verlot locale [[0.26796517]]
verlot unincorporated_area [[0.20504788]]
verlot census_place [[0.15538618]]
proof point_of_reference [[0.2

light electrical_appliance [[0.19916221]]
light signal [[0.2346948]]
light visual_signal [[0.23146304]]
light variable_quantity [[0.12018139]]
light release [[0.14442486]]
light natural_process [[0.21817784]]
light source_of_illumination [[0.19523701]]
highlander flick [[0.21361142]]
highlander picture [[0.24579737]]
highlander description [[0.15909949]]
highlander moving_picture [[0.3070319]]
highlander title [[0.20556399]]
highlander tie-in [[0.21325393]]
highlander movie [[0.37040532]]
highlander pic [[0.20975356]]
highlander film_genre [[0.2543587]]
highlander picture_show [[0.2842973]]
highlander season [[0.1360131]]
highlander show [[0.26731393]]
highlander motion_picture [[0.34516943]]
highlander action_film [[0.22763269]]
highlander verbal_description [[0.15631874]]
highlander film [[0.35395336]]
highlander moving-picture_show [[0.12128355]]
cutlery tableware [[0.27212155]]
cutlery implement [[0.1478115]]
vibe atmosphere [[0.14870241]]
vibe popular_music [[0.36430565]]
vibe dan

extramarital_sex sex_activity [[0.28960195]]
extramarital_sex sexual_love [[0.26417726]]
extramarital_sex bodily_function [[0.2705116]]
extramarital_sex sexual_activity [[0.37402123]]
extramarital_sex vaginal_sex [[0.2923172]]
extramarital_sex sexual_act [[0.35202038]]
extramarital_sex sexual_relations [[0.37244484]]
extramarital_sex sexual_intercourse [[0.37321022]]
extramarital_sex human_sexual_activity [[0.30475014]]
extramarital_sex sexual_practice [[0.34098294]]
extramarital_sex sexual_penetration [[0.27690235]]
extramarital_sex intercourse [[0.31533402]]
extramarital_sex dicking [[0.14359725]]
extramarital_sex sexual_congress [[0.281014]]
extramarital_sex making_love [[0.2786595]]
extramarital_sex love_life [[0.23538956]]
extramarital_sex sexual_relation [[0.31476924]]
extramarital_sex shag [[0.1435306]]
extramarital_sex copulation [[0.24148546]]
extramarital_sex sex_position [[0.1636957]]
extramarital_sex sex_act [[0.3484234]]
extramarital_sex shagging [[0.17452255]]
extramarita

lepton particle [[0.26685274]]
lepton atom [[0.24225605]]
mode data_point [[0.16613741]]
mode phenomenon [[0.18737401]]
mode electrical_device [[0.17001075]]
mode specifications [[0.3127163]]
mode average [[0.13615192]]
mode electronic_equipment [[0.23958875]]
mode electronic_circuit [[0.28104383]]
mode technical_specification [[0.34099147]]
mode natural_phenomenon [[0.20759337]]
mode mean [[0.14314441]]
chief_research_officer corporate_executive [[0.27173626]]
chief_research_officer administrator [[0.33212498]]
chief_research_officer affix [[0.15624353]]
chief_research_officer business_executive [[0.27274507]]
chief_research_officer executive [[0.30196556]]
chief_research_officer title_of_respect [[0.1566117]]
chief_research_officer person [[0.46090594]]
chief_research_officer executive_director [[0.28853253]]
chief_research_officer constituent [[0.17451498]]
chief_research_officer decision_maker [[0.28391308]]
chief_research_officer senior_management [[0.31835723]]
jim_beach trainer 

lawrence unfortunate_person [[0.11088977]]
lawrence acquirer [[0.15446895]]
lawrence poet [[0.25960362]]
spile stopper [[0.17470586]]
spile vertical [[0.15540972]]
spile structural_member [[0.19057995]]
spile support [[0.09377387]]
spile supporting_structure [[0.18362345]]
spile architectural_element [[0.12284581]]
spile obstruction [[0.14260285]]
spile structural_element [[0.14325851]]
dhole native_species [[0.29193446]]
dhole carnivore [[0.28267586]]
dhole canis_familiaris [[0.28303674]]
dhole indigenous_species [[0.29517835]]
dhole dogness [[0.12462164]]
dhole dog [[0.26826128]]
dhole canine [[0.27894905]]
dhole indigenous [[0.18613306]]
dhole animal [[0.41922623]]
dhole stray_dog [[0.1875856]]
dhole wild_dog [[0.28127247]]
dhole carnivory [[0.23356491]]
dhole native [[0.21866935]]
treasure_map map [[0.26576492]]
student_council extracurricular_activity [[0.25185883]]
student_council extracurricular [[0.24611877]]
retail possession [[0.19334048]]
retail selling [[0.3181273]]
retail 

saint_anna person [[0.44572377]]
saint_anna good_person [[0.21704076]]
rotunda public_building [[0.36015272]]
rotunda edifice [[0.22839907]]
rotunda constructed_structure [[0.23673126]]
altivec written_language [[0.24089529]]
altivec floating_point [[0.24244846]]
altivec computer_code [[0.40740368]]
altivec coding_system [[0.33101562]]
altivec program_line [[0.18730514]]
altivec data_type [[0.27422386]]
altivec written_communication [[0.25955454]]
haloalkane ion [[0.17660211]]
haloalkane atom [[0.18659341]]
haloalkane halon [[0.15821227]]
haloalkane crystal [[0.1513205]]
photograph picture [[0.34981784]]
photograph work_of_art [[0.39653802]]
photograph image [[0.3161043]]
photograph illustration [[0.28354785]]
photograph nonachievement [[0.14641373]]
photograph icon [[0.2824542]]
warfare disagreement [[0.25946575]]
warfare military_action [[0.25890833]]
warfare difference_of_opinion [[0.26153222]]
smoothbore engineering_science [[0.21136443]]
smoothbore cylinder [[0.25097856]]
smoothbo

highschool school [[0.30937746]]
highschool middle_school [[0.2810397]]
highschool constructed_structure [[0.13752918]]
highschool secondary_school [[0.2514425]]
highschool junior_high_school [[0.27449]]
highschool university [[0.25927398]]
highschool educational_institution [[0.28066063]]
highschool intermediate_school [[0.22653584]]
highschool junior_high [[0.2808405]]
highschool public_building [[0.21338339]]
highschool academy [[0.24902795]]
highschool group_event [[0.25268477]]
paragonite aggregate [[0.19098194]]
paragonite silicate [[0.25807354]]
paragonite silicate_mineral [[0.22358407]]
short-term_memory remembering [[0.18590383]]
short-term_memory memory [[0.2178657]]
cinnamomum evergreen [[0.19705357]]
cinnamomum plant [[0.4174069]]
cinnamomum angiosperm [[0.25810048]]
cinnamomum evergreen_plant [[0.24350655]]
act_of_congress statute [[0.27404326]]
act_of_congress enactment [[0.27701926]]
act_of_congress music_group [[0.20766994]]
act_of_congress band [[0.15438254]]
act_of_co

flirting body_process [[0.12805432]]
flirting romp [[0.15620527]]
flirting sex_act [[0.31890386]]
flirting bodily_function [[0.26306584]]
flirting love_life [[0.26873028]]
flirting sexual_relation [[0.26129228]]
flirting sexual_activity [[0.30362594]]
flirting lovemaking [[0.284393]]
flirting sex_activity [[0.2636225]]
flirting sexual_act [[0.30597216]]
flirting bodily_process [[0.215956]]
flirting frolic [[0.18389386]]
flirting human_sexual_activity [[0.25250655]]
overall coverall [[0.1113894]]
overall clothing [[0.17036228]]
overall clothes [[0.12955624]]
bear capitalist [[0.11693276]]
bear person [[0.31736097]]
bear animal [[0.361375]]
bear mammal [[0.2555814]]
bear investor [[0.15052094]]
bear carnivory [[0.13753462]]
decentralisation extension [[0.18932773]]
decentralisation spreading [[0.12561822]]
decentralisation expansion [[0.20375152]]
decentralisation increase [[0.22150803]]
decentralisation enlargement [[0.14888896]]
decentralisation spread [[0.1431461]]
decentralisation st

nitrification chemical_action [[0.2801476]]
nitrification natural_action [[0.12653881]]
nitrification natural_process [[0.2552125]]
electron_microscopy enquiry [[0.15683363]]
electron_microscopy microscopy [[0.24167815]]
electron_microscopy research [[0.2794697]]
electron_microscopy microscope [[0.21102329]]
electron_microscopy laboratory_equipment [[0.2609069]]
electron_microscopy investigation [[0.19524193]]
electron_microscopy investigating [[0.2291037]]
electron_microscopy magnifier [[0.13287804]]
electron_microscopy scientific_instrument [[0.19517149]]
sigma_lambda_beta social_club [[0.26704866]]
timeless function [[0.19175631]]
timeless move [[0.16089988]]
timeless work_of_art [[0.40521717]]
timeless single [[0.1544999]]
timeless narration [[0.31355584]]
timeless figure_of_speech [[0.15711716]]
timeless album [[0.26035145]]
timeless story [[0.30301672]]
timeless novel [[0.21129331]]
timeless episode [[0.19250132]]
timeless narrative [[0.3292233]]
timeless record_album [[0.2487512

st._augustine place_of_worship [[0.27435037]]
diversity difference [[0.19805612]]
diversity dissimilarity [[0.14430967]]
mugwump political_organization [[0.29725096]]
mugwump politician [[0.33001748]]
mugwump political_organisation [[0.25667015]]
mugwump political_leader [[0.3434579]]
mugwump party [[0.25027746]]
mugwump political_party [[0.29165202]]
aaron_copland writer [[0.41128507]]
aaron_copland musician [[0.40237075]]
aaron_copland person [[0.45605996]]
aaron_copland educationist [[0.24210112]]
aaron_copland composer [[0.35906306]]
aaron_copland educator [[0.31418562]]
aaron_copland teacher [[0.31441396]]
sino-japanese_war war [[0.26426554]]
sino-japanese_war warring [[0.21738176]]
scallywag small_fry [[0.13808817]]
scallywag juvenile [[0.14132003]]
scallywag nipper [[0.12925716]]
scallywag chip [[0.11100183]]
scallywag tyke [[0.1627538]]
scallywag bad_person [[0.21092269]]
scallywag person [[0.42178607]]
scallywag youngster [[0.22385333]]
scallywag child [[0.21793452]]
scallywag

ball body_part [[0.24123468]]
ball geometric_shape [[0.18137033]]
ball game_equipment [[0.25035137]]
ball outdoor_game [[0.26133963]]
ball baseball [[0.27182478]]
ball baseball_game [[0.22982344]]
ball plaything [[0.16328827]]
balk throwing [[0.21598694]]
balk team_sport [[0.29868728]]
balk competition [[0.23230997]]
balk ball_player [[0.247641]]
balk ballgame [[0.24450268]]
balk baseball [[0.2860888]]
balk ball_game [[0.26725322]]
balk outdoor_game [[0.20308657]]
balk pitch [[0.16762342]]
balk olympic_sport [[0.273318]]
balk sport [[0.35833085]]
balk baseball_game [[0.25313106]]
reactionary conservative [[0.24791592]]
bear_creek map [[0.28718883]]
bear_creek channel [[0.22991145]]
diploma credential [[0.25100458]]
diploma certificate [[0.2777302]]
diploma title [[0.288262]]
diploma certification [[0.27622548]]
diploma evidence [[0.1841753]]
diploma written_document [[0.34158614]]
diploma scientific_evidence [[0.15936904]]
diploma credentials [[0.2437395]]
montlake_bridge motorway [[0.

guts structural_biology [[0.17477098]]
guts anatomy [[0.22144197]]
guts tummy [[0.16142333]]
guts body_part [[0.2490205]]
guts breadbasket [[0.10154213]]
guts fortitude [[0.11637159]]
guts internal_organ [[0.19934407]]
guts natural_phenomenon [[0.1697211]]
narcolepsy illness [[0.37801868]]
narcolepsy sickness [[0.33250934]]
narcolepsy pathological_state [[0.2562094]]
narcolepsy disease [[0.36312813]]
narcolepsy sleep_disorder [[0.2769475]]
narcolepsy sleep_disturbance [[0.2499054]]
narcolepsy disorder [[0.3545897]]
citizen person [[0.4626088]]
citizen national [[0.26345465]]
ley grassland [[0.13283934]]
ley meadow [[0.11158025]]
hermes celestial_body [[0.21221815]]
hermes app [[0.15107669]]
hermes spacefaring [[0.22030967]]
hermes imaginary_being [[0.14218329]]
hermes greek_deity [[0.11487965]]
hermes applications_programme [[0.21856776]]
hermes spacecraft [[0.2267815]]
hermes heavenly_body [[0.16696385]]
hermes deity [[0.17915937]]
hermes godhood [[0.16374819]]
hermes space_flight [[0

hair body_part [[0.30389434]]
hair body_covering [[0.17780083]]
hair bodily_structure [[0.21338508]]
growth evolution [[0.1992704]]
ostracism proscription [[0.2484341]]
ostracism ejection [[0.10586496]]
recommendation text_file [[0.17251928]]
recommendation approval [[0.28446484]]
recommendation advice [[0.22721113]]
recommendation textfile [[0.1458871]]
empire political_organization [[0.3694939]]
empire form_of_government [[0.34003958]]
empire monarchy [[0.2833547]]
empire polity [[0.34257892]]
empire political_system [[0.35935494]]
empire regime [[0.23623496]]
empire social_control [[0.31894243]]
empire corporation [[0.21157125]]
empire political_organisation [[0.31838056]]
empire enterprise [[0.2379262]]
empire religious_ritual [[0.24057075]]
econometrics statistics [[0.25868797]]
econometrics political_economy [[0.21208026]]
econometrics statistical_analysis [[0.23468913]]
econometrics economics [[0.27605557]]
econometrics economic_science [[0.21255134]]
transport travel [[0.319359

emperor sovereign [[0.2958062]]
emperor alcoholic_beverage [[0.17150587]]
emperor person [[0.47043538]]
emperor monarch [[0.22570063]]
documentary picture [[0.26867768]]
documentary library_science [[0.26284787]]
documentary show [[0.306413]]
jeremy_nathans academician [[0.22491258]]
jeremy_nathans faculty_member [[0.27402416]]
jeremy_nathans professor [[0.26490438]]
jeremy_nathans educator [[0.33805898]]
jeremy_nathans population_genetics [[0.15056454]]
jeremy_nathans lector [[0.15074696]]
jeremy_nathans assistant_professor [[0.2419444]]
jeremy_nathans person [[0.47275808]]
jeremy_nathans educationist [[0.22943935]]
jeremy_nathans blood [[0.16726463]]
jeremy_nathans faculty [[0.24952434]]
jeremy_nathans instructor [[0.25056562]]
jeremy_nathans teacher [[0.32092863]]
stakes collective_investment_scheme [[0.21146363]]
stakes investment_funds [[0.28466403]]
stakes assets [[0.3136504]]
stakes investment [[0.3033701]]
stakes investment_fund [[0.28526]]
trust_company partnership [[0.3513042

mario_kart_ds video_game [[0.33516523]]
mario_kart_ds computing_machine [[0.2759918]]
mario_kart_ds computing_platform [[0.32606775]]
mario_kart_ds software [[0.43543056]]
official adjudicator [[0.21795204]]
official person [[0.47348452]]
official umpire [[0.17617905]]
official referee [[0.2102096]]
official football_official [[0.13984326]]
negritude penchant [[0.18314767]]
negritude social_action [[0.23926353]]
negritude proclivity [[0.19460346]]
negritude predisposition [[0.18082124]]
negritude propensity [[0.17153233]]
negritude sociology [[0.2297919]]
negritude front [[0.12553811]]
negritude social_movement [[0.23489143]]
negritude inclination [[0.16408029]]
negritude tendency [[0.1991356]]
negritude movement [[0.21804056]]
walter_boyce person [[0.42883492]]
nursery_rhyme story [[0.23292066]]
nursery_rhyme rhyming [[0.22407012]]
nursery_rhyme narrative_mode [[0.22618173]]
nursery_rhyme narrative [[0.265665]]
nursery_rhyme rhyme [[0.2220684]]
nursery_rhyme tale [[0.16947605]]
nurser

robert_de_niro thespian [[0.32112962]]
robert_de_niro film_director [[0.39441746]]
robert_de_niro filming [[0.3346187]]
robert_de_niro film_producer [[0.38015106]]
robert_de_niro actor [[0.43421277]]
robert_de_niro filmmaker [[0.40226525]]
robert_de_niro person [[0.35858992]]
robert_de_niro film_making [[0.3404514]]
robert_de_niro film_maker [[0.35280105]]
robert_de_niro movie_maker [[0.2275527]]
piastre monetary_unit [[0.2536142]]
piastre subunit [[0.08948779]]
coupling sexual_practice [[0.14042279]]
coupling bodily_process [[0.193746]]
coupling connection [[0.19608809]]
coupling connecter [[0.11518468]]
coupling connective [[0.17693052]]
coupling connector [[0.1750318]]
coupling sexual_act [[0.1686843]]
coupling sexual_activity [[0.15184824]]
coupling mechanical_assembly [[0.22553618]]
coupling mechanism [[0.29655877]]
coupling sex_activity [[0.13320418]]
coupling bodily_function [[0.19018139]]
coupling sexual_relations [[0.12525706]]
coupling connexion [[0.13038546]]
coupling human_

In [107]:
hypo_seq = data.tokenizer.texts_to_sequences(data.train_query)
hyper_seq = data.tokenizer.texts_to_sequences(data.train_hyper)

train_pred = crim_model.predict([hypo_seq, hyper_seq]).flatten()
print train_pred.shape
train_pred[train_pred < 0.5].shape

(11779,)


(11779,)

In [31]:
# save models
#crim_model.save('models/crim_phi1_notune.h5')
#crim_model_2.save('models/crim_phi1_tuned.h5')


In [33]:
crim_predictions_2

{u'scouter': [u'person',
  u'sport',
  u'leader',
  u'athlete',
  u'sportsperson',
  u'writer',
  u'competitor',
  u'olympic_sports',
  u'work_of_art',
  u'social_event',
  u'olympic_sport',
  u'actor',
  u'movie',
  u'television_show',
  u'type_of_sport'],
 u'gatekeeper': [u'person',
  u'software',
  u'computer_software',
  u'company',
  u'computer_system',
  u'computer_network',
  u'software_program',
  u'software_application',
  u'computer',
  u'applications_software',
  u'enterprise',
  u'corporation',
  u'software_package',
  u'technology',
  u'communication_medium'],
 u'mackerel': [u'fish',
  u'edible',
  u'animal',
  u'plant',
  u'plant_material',
  u'plant_part',
  u'shellfish',
  u'algae',
  u'plant_food',
  u'animal_tissue',
  u'seaweed',
  u'brine_shrimp',
  u'food_product',
  u'bivalve',
  u'crustacea'],
 u'prefix': [u'software',
  u'computer_software',
  u'software_package',
  u'computer_program',
  u'software_program',
  u'computer_code',
  u'coding_system',
  u'code',
  

In [53]:
#print np.sum(embeddings_layer.get_layer(name='TermEmbedding').get_weights()[0])

print np.sum([l for l in crim_model_2.layers if type(l) == Model][0].get_layer(name='TermEmbedding').get_weights()[0])
print np.sum(data.embedding_matrix)

[l for l in crim_model_2.layers if type(l) == Model][0]


-30727.87
-32148.61


<tensorflow.python.keras.engine.training.Model at 0x7f5876281c10>

# Ensemble model running on bagged examples

In [82]:
# train 5 models on bagged data
cluster_list = []
for i in range(5):
    print "Training", i, "model"
    cluster_list.append(get_CRIM_model(phi_k = phi_k, 
                                       train_embeddings = train_embeddings,
                                       embeddings_dim = data.embeddings_dim,                             
                                       embeddings_layer = embeddings_layer,
                                       phi_init = phi_init_options[phi_init_option],
                                       sigmoid_kernel_constraint = kernel_constraints[kernel_constraint_option],
                                       dropout_rate = dropout_rate,
                                       learning_rate = learning_rate))
    train(cluster_list[i], epochs, batch_size, m, data, neg_sampling_options[negative_option], crim_max_get_hypernym, bag=True)

Training 0 model
('Epoch:', 1, 'Loss:', 222.6102659702301, 'Test Loss:', 0.9702204, 'MRR:', 0.0, 'Test accuracy:', 0.0)
('Epoch:', 2, 'Loss:', 170.66988277435303, 'Test Loss:', 1.2822708, 'MRR:', 0.0, 'Test accuracy:', 0.0)
('Epoch:', 3, 'Loss:', 136.55550736188889, 'Test Loss:', 1.5928928, 'MRR:', 0.0, 'Test accuracy:', 0.0)
('Epoch:', 4, 'Loss:', 116.25431981682777, 'Test Loss:', 1.8125272, 'MRR:', 0.02, 'Test accuracy:', 0.0)
('Epoch:', 5, 'Loss:', 105.13798907399178, 'Test Loss:', 1.8739722, 'MRR:', 0.08667, 'Test accuracy:', 0.0)
('Epoch:', 6, 'Loss:', 99.2387764453888, 'Test Loss:', 1.862023, 'MRR:', 0.09533, 'Test accuracy:', 0.0)
('Epoch:', 7, 'Loss:', 96.11313870549202, 'Test Loss:', 1.7641302, 'MRR:', 0.1, 'Test accuracy:', 0.0)
('Epoch:', 8, 'Loss:', 93.698451384902, 'Test Loss:', 1.6952475, 'MRR:', 0.11222, 'Test accuracy:', 0.0)
('Epoch:', 9, 'Loss:', 91.52098897099495, 'Test Loss:', 1.6259766, 'MRR:', 0.11217, 'Test accuracy:', 0.0)
('Epoch:', 10, 'Loss:', 89.198438540101

In [84]:
print ("Generating predictions...")
cluster_crim_predictions = predict_cluster_hypernyms(data.test_query, data.tokenizer, cluster_list)

print ("CRIM evaluation:")
score_names, all_scores = get_evaluation_scores((data.test_query, data.test_hyper), cluster_crim_predictions)
for k in range(len(score_names)):
    print (score_names[k]+': '+str(round(sum([score_list[k] for score_list in all_scores]) / len(all_scores), 5)))


Generating predictions...
('Done', 100)
('Done', 200)
('Done', 300)
('Done', 400)
('Done', 500)
('Done', 600)
('Done', 700)
('Done', 800)
('Done', 900)
('Done', 1000)
('Done', 1100)
('Done', 1200)
('Done', 1300)
('Done', 1400)
CRIM evaluation:
MRR: 0.19501
MAP: 0.09175
P@1: 0.15877
P@5: 0.08695
P@10: 0.08223


In [85]:
for c in cluster_list:
    print ("Generating predictions...")
    crim_predictions = predict_crim_hypernyms(data.test_query, data.tokenizer, c, crim_max_get_hypernym)

    print ("CRIM evaluation:")
    score_names, all_scores = get_evaluation_scores((data.test_query, data.test_hyper), crim_predictions)
    for k in range(len(score_names)):
        print (score_names[k]+': '+str(round(sum([score_list[k] for score_list in all_scores]) / len(all_scores), 5)))

Generating predictions...
('Done', 100)
('Done', 200)
('Done', 300)
('Done', 400)
('Done', 500)
('Done', 600)
('Done', 700)
('Done', 800)
('Done', 900)
('Done', 1000)
('Done', 1100)
('Done', 1200)
('Done', 1300)
('Done', 1400)
CRIM evaluation:
MRR: 0.17378
MAP: 0.0815
P@1: 0.13742
P@5: 0.07843
P@10: 0.07263
Generating predictions...
('Done', 100)
('Done', 200)
('Done', 300)
('Done', 400)
('Done', 500)
('Done', 600)
('Done', 700)
('Done', 800)
('Done', 900)
('Done', 1000)
('Done', 1100)
('Done', 1200)
('Done', 1300)
('Done', 1400)
CRIM evaluation:
MRR: 0.16705
MAP: 0.0779
P@1: 0.13276
P@5: 0.07427
P@10: 0.07028
Generating predictions...
('Done', 100)
('Done', 200)
('Done', 300)
('Done', 400)
('Done', 500)
('Done', 600)
('Done', 700)
('Done', 800)
('Done', 900)
('Done', 1000)
('Done', 1100)
('Done', 1200)
('Done', 1300)
('Done', 1400)
CRIM evaluation:
MRR: 0.18839
MAP: 0.08784
P@1: 0.15077
P@5: 0.08357
P@10: 0.07837
Generating predictions...
('Done', 100)
('Done', 200)
('Done', 300)
('Do

## Evaluation  code

In [None]:
# test whether two words are related by hypernymy
i = data.tokenizer.word_index['ebert']
j = data.tokenizer.word_index['writer']
crim_model_2.predict([[i], [j]])


### Find candidate hypernyms

In [69]:
class ToyClass:
    def __init__(self, **kwargs):
        print kwargs['p1']
        if 'p2' in kwargs:
            print kwargs['p2']
            
ToyClass(p1='hello',p2='goodbye' )


hello
goodbye


<__main__.ToyClass instance at 0x7fc745037c68>

# Try to refine initial training with clustering

In [131]:
class CrimCluster:
    def __init__(self, **kwargs):                              
        
        # create Keras model
        if 'model' in kwargs:
            # we're passing a ready-trained model here
            self.model = kwargs['model']
        else:            
            self.model = self._init_model(phi_init = kwargs['phi_init'], 
                                          embeddings_layer = kwargs['embeddings_layer'],
                                          embeddings_dim = kwargs['embeddings_dim'],
                                          sigmoid_kernel_constraint = kwargs['sigmoid_kernel_constraint'],
                                          dropout_rate  = kwargs['dropout_rate'],
                                          learning_rate = kwargs['learning_rate'])              
        # initialise variables     
        self.epoch_count = 0
        self.epoch_total_loss = 0.
        self.avg_loss = []        
        self.training_samples = {}
        self.mrr = []
                        
    def _init_model(self, phi_init, sigmoid_kernel_constraint, 
                    embeddings_layer, embeddings_dim,
                    dropout_rate, learning_rate):
                                                   
        return get_CRIM_model(phi_k = 1, 
                              train_embeddings = False,
                              embeddings_dim = embeddings_dim,
                              embeddings_layer = embeddings_layer,
                              phi_init = phi_init,
                              sigmoid_kernel_constraint = sigmoid_kernel_constraint,
                              dropout_rate = dropout_rate,
                              learning_rate = learning_rate,
                              normalised = False
                             )
        
    
    def calc_avg_loss(self):        
        self.avg_loss.append(self.epoch_total_loss / self.training_samples[self.epoch_count])
        
    def increment_epoch(self):
        self.epoch_count += 1
        
    def increment_training_samples(self):
        if self.epoch_count in self.training_samples:
            self.training_samples[self.epoch_count] += 1
        else:
            self.training_samples[self.epoch_count] = 1

In [144]:
# rolled-back some changes:
# removed cluster max;
# removed conditons to control creation of new cluster.  Cluster can be appended immediately


def yamane_train(
    epochs,      # number of epochs to run
    m,           # number of negative samples
    data,        # class instance containing all the data required for training/testing        
    embedding_layer,     # shared embeddings layer
    threshold    = 0.15,     # threshold; similarity below this score will trigger new cluster
    negative_option = 'random', # pass dictionary of random terms 
    phi_init_option = None,     # phi dense layer initialisation strategy
    sigmoid_constraint_option = 'None',
    dropout_rate = 0.,
    learning_rate = 0.0001,
    cluster_max = 30,
    init_model = None
): 
    
    phi_init_options = {'random_identity': RandomIdentity(),                         
                        'random_plus_identity': RandomPlusIdentity}
    
    neg_sampling_options = {'synonym':data.synonyms, 'random':data.random_words}
    
    sigmoid_constraint_options = {'ForceToOne': ForceToOne(), 'None': None}
    
    sigmoid_kernel_constraint = sigmoid_constraint_options[sigmoid_constraint_option]
    
    neg_strategy = neg_sampling_options[negative_option]
            
    # create sequences
    # we have two sets of inputs: one for training query and hypernym terms;
    #                             another for the validation query/hyper terms;
    term_train_seq = data.tokenizer.texts_to_sequences(data.train_query)
    hyper_train_seq = data.tokenizer.texts_to_sequences(data.train_hyper)

    #term_test_seq = data.tokenizer.texts_to_sequences(data.valid_query)
    #hyper_test_seq = data.tokenizer.texts_to_sequences(data.valid_hyper)        
    
    term_train_seq, hyper_train_seq = [np.asarray(x, dtype='int32') for x in [term_train_seq, hyper_train_seq]]
            
    # this list stores which cluster each training sequence pertains to
    sample_clusters = np.zeros(len(term_train_seq), dtype='int32')
    
    print ("m: ", m, "lambda: ", threshold, "max epoch per cluster: ", epochs, 
           "Negative sampling: ", negative_option, "Phi Init: ", phi_init_option,
           "sigmoid_kernel_constraint: ", sigmoid_constraint_option, 
           "dropout: ", dropout_rate, "learning_rate: ", learning_rate, 
           "cluster_max: ", cluster_max          
          )
    
    sample_count = len(sample_clusters)
    print ("Sample clusters size: ", sample_count)
    # list containing 1 model per cluster
    clusters = []    
        
    #clusters.append(CrimCluster(model=init_model))
    clusters.append(CrimCluster(phi_init = phi_init_options[phi_init_option],
                                embeddings_layer = embedding_layer,
                                embeddings_dim = data.embeddings_dim,
                                sigmoid_kernel_constraint = sigmoid_kernel_constraint,
                                dropout_rate = dropout_rate,
                                learning_rate = learning_rate))
                    
    # get training set indices
    indices = np.arange(len(term_train_seq))  
    
    # get test set indices
    #test_indices = np.arange(len(term_test_seq))
            
    # initialise each training sample to cluster 0
    sample_clusters[indices] = 0        
    
    # seed random generator
    np.random.seed(42)
    
    # indicator of "current" sample cluster index
    z_i = 0
                    
    #while np.min([c.epoch_count for c in clusters]) < epochs:
    for epoch in range(epochs):
        sample_count = len(sample_clusters)
        # reset loss for each cluster                        
        for c in clusters:
            #if c.epoch_count < epochs:                
            #    c.loss = 0.            
            c.epoch_total_loss = 0.
        
        
        # shuffle indices every epoch
        np.random.shuffle(indices)
        
        # train algorithm by stochastic gradient descent, one sample at a time
        # learn 1 matrix of first epoch only
        for idx, i in enumerate(indices):                        
            if (idx + 1) % 1000 == 0:
                print ("Processed ", idx+1, "samples...")
            
            # calculate similarity on all clusters
            sim = list(map(lambda x: x.model.predict([term_train_seq[i], hyper_train_seq[i]]), clusters))            
            max_sim = np.argmax(sim)            
                        
            #print "Term:", tokenizer.index_word[term_train_seq[i][0]], 'Hyper:', tokenizer.index_word[hyper_train_seq[i][0]], "Max Similarity cluster:", max_sim, "(sim = %0.8f)" % (sim[max_sim])
            # limit cluster creation to a given max
                       
            #if (clusters[0].epoch_count > 0 and  sim[max_sim] < threshold and len(clusters) < cluster_max): 
            if (sim[max_sim] < threshold):
                if len(clusters) >= cluster_max:
                    # cluster ceiling exceeded so skip training example                    
                    sample_clusters[i] = -1
                    continue                                            
                
                # if cluster registering best simalarity hasn't registered minimum quota of training 
                # samples, don't create new cluster
                #if clusters[max_sim].training_samples[clusters[max_sim].epoch_count] < 50:
                #    z_i = max_sim
                #    sample_clusters[i] = z_i
                #else:                
                
                # add new cluster to list of clusters                                
                clusters.append(CrimCluster(phi_init = phi_init_options[phi_init_option],
                                              embeddings_layer = embedding_layer,
                                              embeddings_dim = data.embeddings_dim,
                                              sigmoid_kernel_constraint = sigmoid_kernel_constraint,
                                              dropout_rate = dropout_rate,
                                              learning_rate = learning_rate))

                # assign current cluster index to latest model
                z_i = len(clusters) - 1
                sample_clusters[i] = z_i
            else:            
                z_i = max_sim
                sample_clusters[i] = z_i                
                        
            # allocate sample to cluster for statistics
            
            # if current cluster reached/exceeded epoch count, skip current sample (i.e don't update cluster)                        
            #if (clusters[z_i].epoch_count < epochs):                                
            
            # extend samples in cluster with negative samples
            batch_X_term, batch_X_hyper, batch_y_label =\
                extend_batch_with_negatives(term_train_seq[i], 
                                            hyper_train_seq[i],
                                            neg_strategy,
                                            data.tokenizer, m
                                           )  

            # update parameters of cluster 
            clusters[z_i].epoch_total_loss += clusters[z_i].model.train_on_batch(
                [batch_X_term, batch_X_hyper], batch_y_label)[0]            
            clusters[z_i].increment_training_samples()
        
            ####################### END OF EPOCH #######################                
        
        # instead of test loss, measure MRR as a more indicative validation metric
        print ("Running evaluation on trial data set...")
        predictions = predict_cluster_hypernyms(data.valid_query, data.tokenizer, clusters)
        _, all_scores = get_evaluation_scores((data.valid_query, data.valid_hyper), predictions)
        mrr = round(sum([score_list[0] for score_list in all_scores]) / len(all_scores), 5)
        clusters[0].mrr.append(mrr)
        
        # calculate mean loss and increase epoch_count for clusters
        
        for cluster in clusters:    
            cluster.calc_avg_loss()
            cluster.epoch_count += 1
                
        print('Epoch:', max([c.epoch_count for c in clusters]), 'Cluster #:', len(clusters) ,
              'Loss:', np.mean([c.avg_loss[::-1][0] for c in clusters]),
              'Test MRR:', mrr)
    return clusters, sample_clusters

In [146]:
import datetime

# initialise embedding later which will be shared among all clusters
#embeddings_matrix = [l for l in crim_model_2.layers if type(l) == Model][0].get_layer(name='TermEmbedding').get_weights()[0]
embeddings_matrix = data.embedding_matrix
embedding_layer = get_embeddings_model(embedding_matrix=embeddings_matrix)
epochs = 20
m = 5

print ("Training started at: %s" %  (datetime.datetime.now()))
clusters, sample_clusters =\
    yamane_train(epochs, m, 
                 data,
                 embedding_layer,
                 threshold = 0.2,
                 negative_option = 'random',
                 phi_init_option = 'random_identity',
                 sigmoid_constraint_option = 'ForceToOne',
                 dropout_rate = 0.1,
                 learning_rate = 0.001,
                 cluster_max = 40
                 #init_model = crim_model_2
                )

print ("Training concluded at: %s" % (datetime.datetime.now()))

Training started at: 2019-01-04 02:17:48.946289
('m: ', 5, 'lambda: ', 0.2, 'max epoch per cluster: ', 20, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'sigmoid_kernel_constraint: ', 'ForceToOne', 'dropout: ', 0.1, 'learning_rate: ', 0.001, 'cluster_max: ', 40)
('Sample clusters size: ', 11779)
('Processed ', 1000, 'samples...')
('Processed ', 2000, 'samples...')
('Processed ', 3000, 'samples...')
('Processed ', 4000, 'samples...')
('Processed ', 5000, 'samples...')
('Processed ', 6000, 'samples...')
('Processed ', 7000, 'samples...')
('Processed ', 8000, 'samples...')
('Processed ', 9000, 'samples...')
('Processed ', 10000, 'samples...')
('Processed ', 11000, 'samples...')
Running evaluation on trial data set...


MemoryError: 

In [147]:
from collections import Counter
#sum(Counter(sample_clusters).values()[:])
Counter(sample_clusters)

Counter({0: 9376, 1: 2290, 2: 113})

In [148]:
for c in clusters:
    print c.training_samples, c.avg_loss

{0: 9918, 1: 9377, 2: 9378, 3: 9385, 4: 9376} [0.3383950079052894, 0.1707735969544977, 0.12343727941536435, 0.0986675734687661, 0.08062875606970399]
{0: 1861, 1: 2402, 2: 2401, 3: 2368, 4: 2290} [0.596995979147147, 0.44526770622853334, 0.3491070275394706, 0.28401292040094583, 0.23139021659359402]
{0: 26, 1: 113} [0.6919059547094198, 0.6692267740722251]


In [135]:
clusters[0].model.get_layer(name='Prediction').get_weights()

[array([[1.]], dtype=float32), array([0.68318665], dtype=float32)]

In [None]:
print ("Generating predictions...")

cluster_predictions = predict_cluster_hypernyms(data.test_query, data.tokenizer, clusters)
#cluster_predictions = predict_crim_hypernyms(data.test_query, data.tokenizer, clusters[0].model, crim_get_hypernym)

print ("CRIM evaluation:")
score_names, all_scores = get_evaluation_scores((data.test_query, data.test_hyper), cluster_predictions)
for k in range(len(score_names)):
    print (score_names[k]+': '+str(round(sum([score_list[k] for score_list in all_scores]) / len(all_scores), 5)))

In [142]:
cluster_predictions['pumpkin']

[u'amusement_ride',
 u'business_establishment',
 u'liquor_licence',
 u'commercial_enterprise',
 u'europages',
 u'toy_industry',
 u'manufacture',
 u'definition',
 u'distributor',
 u'person',
 u'purpose',
 u'main_function',
 u'licensable',
 u'society',
 u'interprovincial']

In [44]:
# Pretend we're evaluating only on one of the clusters
cluster0_predictions = predict_crim_hypernyms(data.test_query, data.tokenizer, clusters[0].model, crim_max_get_hypernym)

print ("CRIM evaluation:")
score_names, all_scores = get_evaluation_scores((data.test_query, data.test_hyper), cluster0_predictions)
for k in range(len(score_names)):
    print (score_names[k]+': '+str(round(sum([score_list[k] for score_list in all_scores]) / len(all_scores), 5)))

('Done', 100)
('Done', 200)
('Done', 300)
('Done', 400)
('Done', 500)
('Done', 600)
('Done', 700)
('Done', 800)
('Done', 900)
('Done', 1000)
('Done', 1100)
('Done', 1200)
('Done', 1300)
('Done', 1400)
CRIM evaluation:
MRR: 0.09468
MAP: 0.04557
P@1: 0.06338
P@5: 0.04542
P@10: 0.04246


In [99]:
print Counter(sample_clusters)

for c in clusters:
    #print c.model.get_layer(name ='Prediction').get_weights()
    print c.epoch_count, c.training_samples

#clusters[9].model.get_layer(name='Prediction').get_weights()
#predict_crim_hypernyms(['rod_laver'], data.tokenizer, crim_model, crim_max_get_hypernym)

#crim_model_2.predict([[data.tokenizer.word_index['rod_laver']], 
#                    [data.tokenizer.word_index['athlete']]])


print np.sum([l for l in clusters[0].model.layers if type(l) == Model][0].get_layer(name='TermEmbedding').get_weights()[0])
print np.sum(data.embedding_matrix)

np.sum(embedding_layer.get_layer(name='TermEmbedding').get_weights()[0])

Counter({-1: 10930, 0: 849})
5 {0: 11779, 1: 7330, 2: 2422, 3: 1208, 4: 849}
-32148.61
-32148.61


-32148.61

In [100]:
from collections import Counter
print Counter(sample_clusters)


#sample_clusters
#print np.where(sample_clusters == 1)[0]

#data.train_query[66], data.train_hyper[66]
for q, h in zip(data.train_query, data.train_hyper):
    hyp_prob = clusters[0].model.predict([[data.tokenizer.word_index[q]], [data.tokenizer.word_index[h]] ])
    print q, h, hyp_prob

Counter({-1: 10930, 0: 849})
blackfly homopterous_insect [[0.0920177]]
blackfly insect [[0.15019463]]
turonian technical_specification [[0.09434162]]
turonian geologic_timescale [[0.09426979]]
turonian physical_property [[0.10644199]]
turonian geological_period [[0.10768096]]
turonian magnitude [[0.10547616]]
turonian unit_of_time [[0.09206219]]
turonian geological_time [[0.12664953]]
turonian geologic_time [[0.13282974]]
abhorrence distaste [[0.10247174]]
abhorrence hatred [[0.11265936]]
abhorrence hate [[0.11295412]]
abhorrence disgust [[0.10763728]]
tropical_storm atmosphere [[0.11236698]]
tropical_storm windstorm [[0.1406302]]
tropical_storm violent_storm [[0.13833816]]
tropical_storm air_current [[0.10461441]]
tropical_storm atmospheric_state [[0.11180094]]
tropical_storm density [[0.08922068]]
tropical_storm current_of_air [[0.10645618]]
tropical_storm storm_damage [[0.14270832]]
tropical_storm atmospheric_phenomenon [[0.11525891]]
tropical_storm storm [[0.16198856]]
tropical_sto

deck platform [[0.10865111]]
deck shape [[0.08933691]]
deck surface [[0.09336391]]
deck venture [[0.09007652]]
deck floor [[0.09791429]]
deck upper_deck [[0.10380888]]
jet_stream phenomenon [[0.09883424]]
jet_stream air_current [[0.10835018]]
jet_stream weather [[0.15238938]]
jet_stream atmospheric_phenomenon [[0.11710775]]
jet_stream current_of_air [[0.10401927]]
jet_stream physical_phenomenon [[0.09822613]]
jet_stream weather_condition [[0.13978507]]
jet_stream airstream [[0.11032039]]
jet_stream wind [[0.13711978]]
muhammad_ali_jinnah person [[0.18955883]]
muhammad_ali_jinnah national_leader [[0.10378031]]
muhammad_ali_jinnah statesman [[0.10555199]]
muhammad_ali_jinnah politician [[0.11716806]]
muhammad_ali_jinnah owner [[0.12218209]]
muhammad_ali_jinnah political_leader [[0.11928935]]
muhammad_ali_jinnah leader [[0.10662608]]
habit function [[0.10377374]]
habit affair [[0.08908623]]
habit suit_of_clothes [[0.09431229]]
habit exercise [[0.12535225]]
habit suit [[0.10368294]]
bennin

certified_check cheque [[0.11816584]]
certified_check check [[0.10436872]]
certified_check order_of_payment [[0.11613838]]
pearl stone [[0.08660061]]
pearl ball [[0.081631]]
pearl substantial_form [[0.08347006]]
pearl abstract_object [[0.09152839]]
pearl precious_stone [[0.08760844]]
pearl jewel [[0.08992292]]
pearl antique_white [[0.07260676]]
pearl rock [[0.08500468]]
pearl pebble [[0.07896512]]
pearl orb [[0.08921589]]
pearl white [[0.08915613]]
playwriting writer [[0.12098771]]
sajid_mahmood person [[0.1928514]]
sajid_mahmood sportswoman [[0.10695394]]
sajid_mahmood competitor [[0.10709269]]
sajid_mahmood cricketer [[0.10207116]]
sajid_mahmood sportsperson [[0.12088095]]
smew sea_duck [[0.09961044]]
smew waterfowl [[0.12519911]]
smew waterbird [[0.10835594]]
smew animal [[0.17484443]]
smew duck [[0.10444371]]
biosatellite satellite [[0.0984017]]
biosatellite computer_code [[0.10752177]]
biosatellite transportation [[0.1060435]]
biosatellite electrical_appliance [[0.09764454]]
biosa

photojournalism research [[0.11636108]]
photojournalism investigation [[0.11053377]]
photojournalism piece_of_work [[0.10384881]]
photojournalism mass_media [[0.15154734]]
photojournalism narrative [[0.1136984]]
photojournalism investigating [[0.10886014]]
photojournalism print_media [[0.15385951]]
photojournalism news_media [[0.14177766]]
photojournalism communication_medium [[0.1395691]]
photojournalism book [[0.10881636]]
photojournalism mass_medium [[0.13135728]]
spaceport installation [[0.11669578]]
spaceport facility [[0.12684835]]
el_nino dramatics [[0.09181613]]
el_nino classical [[0.08272935]]
el_nino dramatic_play [[0.0946926]]
till equipment [[0.10136623]]
till parcel [[0.10107803]]
horizontal_bar durable_good [[0.09782653]]
horizontal_bar durable_goods [[0.08973133]]
horizontal_bar gymnastic_apparatus [[0.0890138]]
horizontal_bar equipment [[0.11624561]]
horizontal_bar exerciser [[0.10171664]]
horizontal_bar sports_equipment [[0.10870934]]
parameter computer_code [[0.159922

ghrelin hormone [[0.12013252]]
ghrelin humor [[0.08344194]]
ghrelin biomolecule [[0.11739194]]
ghrelin biopolymer [[0.1030577]]
ghrelin chemical_group [[0.09615467]]
ghrelin chemical_series [[0.08851977]]
ghrelin signaling_molecule [[0.11140775]]
ghrelin chemical_bond [[0.09791756]]
ghrelin internal_secretion [[0.09590401]]
ghrelin gi_hormones [[0.09301982]]
ghrelin body_fluid [[0.12444063]]
ghrelin gastrointestinal_hormone [[0.09798939]]
ghrelin moiety [[0.09555206]]
ghrelin bond [[0.09365266]]
benefaction donation [[0.11414466]]
benefaction kindness [[0.10304992]]
benefaction contribution [[0.10920805]]
benefaction gift [[0.11521389]]
abdication resignation [[0.11033426]]
abdication group_event [[0.10427248]]
abdication written_document [[0.13660353]]
tebuconazole fungicide [[0.12353768]]
heritage transferred_property [[0.11727265]]
delay code [[0.12401038]]
delay time_interval [[0.10680801]]
delay interval [[0.09456098]]
delay break [[0.08761637]]
delay computer_code [[0.11435198]]


tony_trujillo skateboarder [[0.09387194]]
tony_trujillo person [[0.17939694]]
racoon aboriginal [[0.09502466]]
racoon mammal [[0.12781675]]
racoon carnivore [[0.12045991]]
racoon procyonid [[0.10004082]]
racoon animal [[0.1640342]]
thiocyanate crystal [[0.09045114]]
thiocyanate salt [[0.09492788]]
curling sport [[0.10798278]]
curling olympic_sports [[0.10016756]]
curling group_event [[0.10113172]]
curling team_sport [[0.1101157]]
curling type_of_sport [[0.11065906]]
curling winter_sport [[0.1025673]]
curling competition [[0.08865754]]
camelpox disorder [[0.11232433]]
camelpox illness [[0.1387236]]
camelpox animal_disease [[0.12623546]]
camelpox disease [[0.13547933]]
camelpox sickness [[0.10951239]]
psychopathology medical_research [[0.10706355]]
psychopathology psychology [[0.10934672]]
psychopathology learned_profession [[0.10172553]]
psychopathology enquiry [[0.10103688]]
psychopathology psychological_science [[0.10723983]]
psychopathology inquiry [[0.11005857]]
psychopathology bios

red_planet rocky_planet [[0.09607542]]
red_planet film [[0.09915281]]
red_planet picture_show [[0.0931554]]
natural_gas phenomenon [[0.08733913]]
natural_gas gas [[0.11479092]]
natural_gas chemical_phenomenon [[0.07090824]]
natural_gas energy_carrier [[0.1108515]]
natural_gas gaseous_state [[0.10163061]]
natural_gas existence [[0.09254783]]
natural_gas natural_phenomenon [[0.09583106]]
compact_muon_solenoid particle_detector [[0.10738967]]
compact_muon_solenoid research [[0.10806318]]
compact_muon_solenoid scientific_research [[0.10767823]]
pyrrhus king [[0.10362898]]
pyrrhus warrior [[0.09189907]]
pyrrhus chief_of_state [[0.10937724]]
pyrrhus male_monarch [[0.0831792]]
pyrrhus rank [[0.10044571]]
pyrrhus soldier [[0.10216377]]
pyrrhus state [[0.12779482]]
pyrrhus person [[0.18492329]]
pyrrhus military_man [[0.09960351]]
pyrrhus monarch [[0.09623776]]
pyrrhus potentate [[0.09525178]]
program engineering_science [[0.10832261]]
program picture [[0.09503089]]
program medium [[0.09727607]]

dirt granular_material [[0.09267138]]
dirt sanitary_condition [[0.10294682]]
dirt poo [[0.08801007]]
dirt report [[0.09941184]]
dirt defaecation [[0.08932479]]
dirt uncleanness [[0.08754276]]
dirt shit [[0.08435664]]
dirt dirtiness [[0.09349685]]
dirt waste_matter [[0.10667423]]
dirt refuse [[0.10446075]]
dirt trash [[0.09919309]]
dirt crap [[0.08226912]]
dirt hearsay [[0.08885055]]
dirt excrement [[0.10227676]]
dirt waste_material [[0.1183642]]
dirt dog_shit [[0.08346136]]
dirt excretion [[0.10298291]]
dirt excreta [[0.11260875]]
dirt waste_product [[0.10741663]]
dirt waste [[0.10878126]]
gary_fong photographer [[0.09401401]]
gary_fong lensman [[0.0851263]]
gary_fong person [[0.13022295]]
aramaic ordinary_language [[0.11154579]]
aramaic semitic_people [[0.08776526]]
aramaic afro-asiatic [[0.09406052]]
aramaic semite [[0.08846196]]
aramaic semitic [[0.09547697]]
aramaic family_relationship [[0.12870334]]
aramaic philosophy_of_language [[0.1049303]]
aramaic afro-asiatic_languages [[0.09

pollock seafood [[0.106272]]
pollock sea_creature [[0.10015247]]
pollock terminologist [[0.08450422]]
pollock animal [[0.12732953]]
pollock fish [[0.11439142]]
pollock marine_creature [[0.08683232]]
pollock saltwater_fish [[0.10798793]]
teleconference conference [[0.11080927]]
teleconference auditory_communication [[0.09396614]]
teleconference discussion [[0.10832908]]
teleconference spoken_communication [[0.10520779]]
teleconference oral_communication [[0.1150851]]
teleconference group_discussion [[0.10627348]]
teleconference speech_communication [[0.1022086]]
frank_henry specifications [[0.09354166]]
frank_henry olympiad [[0.08851649]]
frank_henry athletic_competition [[0.10577342]]
frank_henry type_of_sport [[0.10106811]]
frank_henry jock [[0.08823381]]
frank_henry competitor [[0.09841437]]
frank_henry contest [[0.10203364]]
frank_henry sportswoman [[0.09188335]]
frank_henry orbit_period [[0.08814041]]
frank_henry person [[0.1921734]]
frank_henry contestant [[0.10720634]]
frank_henr

richard_starkey rocker [[0.09553962]]
richard_starkey rock_star [[0.11235934]]
richard_starkey percussionist [[0.10267711]]
richard_starkey musician [[0.11820004]]
richard_starkey actor [[0.11426945]]
richard_starkey person [[0.18301015]]
richard_starkey drummer [[0.10472318]]
richard_starkey vocalist [[0.10584129]]
ibm firm [[0.12281992]]
ibm engineering_science [[0.11410889]]
ibm corporation [[0.13330135]]
ibm company [[0.13482712]]
ibm business_organization [[0.12986901]]
ibm engineering [[0.1272037]]
ibm business_firm [[0.12546866]]
ibm corp [[0.12473885]]
ibm database_management_system [[0.13606142]]
ibm venture [[0.1065578]]
ibm enterprise [[0.12828329]]
ibm business_organisation [[0.11148325]]
petrology morphology [[0.08886549]]
petrology methodology [[0.11212134]]
petrology geophysics [[0.10884377]]
petrology geophysical_science [[0.09395869]]
petrology geology [[0.11102736]]
petrology geomorphology [[0.10378854]]
ibc manufacturer [[0.11582114]]
ibc company [[0.12153536]]
ibc e

lucrezia_borgia moving-picture_show [[0.08619235]]
nu_metal musical_style [[0.12799199]]
nu_metal popular_music_genre [[0.10187449]]
nu_metal rock_'n'_roll [[0.11899879]]
nu_metal heavy_metal_music [[0.10360441]]
nu_metal music_genre [[0.127511]]
nu_metal economic_science [[0.08949153]]
nu_metal heavy_metal [[0.09935395]]
nu_metal rock_music [[0.12757808]]
turnpike infrastructure [[0.12207948]]
turnpike constructed_structure [[0.08796953]]
turnpike route [[0.12963006]]
turnpike main_road [[0.13414533]]
turnpike way [[0.09302821]]
turnpike obstructor [[0.08557599]]
parveen_babi person [[0.18764861]]
parveen_babi actor [[0.11906444]]
backstay structural_member [[0.10092824]]
backstay support [[0.08658586]]
backstay reinforcement [[0.08471644]]
backstay stay [[0.09278591]]
backstay strengthener [[0.07416286]]
backstay brace [[0.08181296]]
euphemism saying [[0.11681654]]
euphemism humour [[0.09712334]]
euphemism image [[0.08906081]]
euphemism literary_technique [[0.09882212]]
euphemism wit

ideal_gas gaseous_state [[0.09828892]]
ideal_gas state [[0.08999889]]
ideal_gas gas [[0.0941017]]
ideal_gas state_of_matter [[0.09829807]]
pressburg capital [[0.10577575]]
pressburg city [[0.12066234]]
pressburg country [[0.12163568]]
pressburg provincial_capital [[0.10073674]]
pressburg centre [[0.09451304]]
boson subatomic_particle [[0.1028221]]
boson particle [[0.11005718]]
saltire picture [[0.09780452]]
saltire figure [[0.09581445]]
saltire fault [[0.08653078]]
saltire image [[0.09423076]]
saltire cross [[0.09269949]]
saltire icon [[0.10069848]]
brookline town [[0.13668087]]
brookline township [[0.14443357]]
brookline new_england_town [[0.11079603]]
child_care aid [[0.10425344]]
child_care assistance [[0.12325382]]
child_care assist [[0.11917335]]
child_care help [[0.10934113]]
child_care service [[0.12018178]]
van camper_van [[0.09475379]]
van motorhome [[0.11212872]]
van move [[0.09768021]]
van wheeled_vehicle [[0.10138508]]
van traveling [[0.12004261]]
van technology [[0.0930497

turbofan mechanism [[0.10996531]]
turbofan motor [[0.12485927]]
turbofan mode_of_transport [[0.11840222]]
turbofan turbine [[0.13809761]]
turbofan airplane [[0.13096385]]
turbofan aircraft_engine [[0.15026349]]
turbofan means_of_transportation [[0.12022955]]
turbofan jet_aircraft [[0.14292283]]
turbofan jet [[0.12062462]]
turbofan turbomachine [[0.1165213]]
turbofan means_of_transport [[0.11428084]]
turbofan turbomachinery [[0.13990332]]
turbofan gas_turbine [[0.13964403]]
turbofan rotary_engine [[0.12197393]]
turbofan conveyance [[0.10218824]]
turbofan reciprocating_engine [[0.13183762]]
turbofan aeroplane [[0.11734412]]
turbofan aero_engine [[0.12759311]]
turbofan jet_engine [[0.13931324]]
turbofan engine-generator [[0.11333058]]
mollie_katzen chef [[0.10114404]]
mollie_katzen person [[0.17376676]]
mollie_katzen cook [[0.0953403]]
mollie_katzen head_cook [[0.10026099]]
mollie_katzen head_chef [[0.10054479]]
ryan_hall athlete [[0.11606072]]
ryan_hall black_belt [[0.0991755]]
ryan_hall

hill mount [[0.09094127]]
hill mapmaking [[0.08877301]]
megathrust_earthquake natural_disaster [[0.15604027]]
megathrust_earthquake observable [[0.09196885]]
megathrust_earthquake disaster [[0.14603472]]
megathrust_earthquake peril [[0.10296291]]
megathrust_earthquake seism [[0.10557094]]
megathrust_earthquake phenomenon [[0.09413858]]
megathrust_earthquake danger [[0.10475029]]
megathrust_earthquake jeopardy [[0.10492796]]
megathrust_earthquake misfortune [[0.09390798]]
megathrust_earthquake hazard [[0.12169822]]
megathrust_earthquake risk [[0.10878845]]
megathrust_earthquake endangerment [[0.10496396]]
megathrust_earthquake seismic_activity [[0.1331577]]
megathrust_earthquake quake [[0.1315944]]
megathrust_earthquake trouble [[0.08776736]]
megathrust_earthquake natural_phenomenon [[0.11588265]]
megathrust_earthquake earthquake [[0.14045738]]
megathrust_earthquake geological_phenomenon [[0.10263396]]
megathrust_earthquake catastrophe [[0.1334524]]
myers-briggs_type_indicator write-up 

retainer workman [[0.09942414]]
retainer disbursal [[0.10749057]]
retainer charge [[0.12098243]]
retainer outgo [[0.10565141]]
retainer expenditure [[0.11369431]]
retainer working_person [[0.11268647]]
retainer expense [[0.12311187]]
proton_pump protein [[0.11601194]]
proton_pump integral_membrane_protein [[0.09153596]]
proton_pump biomolecule [[0.11520427]]
circuit function [[0.10318709]]
circuit track [[0.10330497]]
circuit route [[0.08970115]]
circuit move [[0.09460602]]
circuit bound [[0.09119975]]
circuit judiciary [[0.10264739]]
circuit electrical_device [[0.11210937]]
circuit membership [[0.10429998]]
circuit circuitry [[0.11655042]]
circuit electronic_equipment [[0.12607442]]
circuit devising [[0.0984721]]
circuit path [[0.0841399]]
circuit race_track [[0.10090793]]
circuit racecourse [[0.08933809]]
circuit electronic_circuit [[0.12476609]]
circuit judicial [[0.11029081]]
circuit travel [[0.10223571]]
circuit racetrack [[0.10279772]]
circuit judicature [[0.09684331]]
circuit ma

dentist medical_practitioner [[0.1527476]]
dentist person [[0.1744171]]
dentist medical_man [[0.11190493]]
dentist dr. [[0.11342799]]
dentist medical_specialist [[0.13437825]]
vindication clearing [[0.09697254]]
vindication justification [[0.11451902]]
vindication research [[0.10209771]]
vindication healing_knowledge [[0.09533442]]
vindication liberation [[0.09823177]]
dulcimer stringed_instrument [[0.10687044]]
dulcimer string_instrument [[0.10978172]]
dulcimer musical_instrument [[0.11758775]]
dulcimer plucked_string_instrument [[0.09191445]]
turrican pc_game [[0.11061816]]
turrican computing_system [[0.15394516]]
turrican software_application [[0.16877307]]
turrican electronic_game [[0.10820179]]
turrican application_program [[0.14177941]]
turrican applications_programme [[0.1071725]]
turrican computer [[0.16527344]]
turrican platform [[0.113704]]
turrican computing_machine [[0.12185878]]
turrican computing_platform [[0.14521582]]
turrican electrical_load [[0.0971724]]
turrican comp

chairman corporate_title [[0.11182955]]
chairman senior_management [[0.12067224]]
chairman leader [[0.11238482]]
lisa_see person [[0.1900355]]
lisa_see writer [[0.10435129]]
bottled_water drinkable [[0.10744393]]
bottled_water state_of_matter [[0.09627885]]
bottled_water drink [[0.10465283]]
bottled_water fluid [[0.09953552]]
bottled_water drinking_water [[0.11312993]]
bottled_water h2o [[0.08657026]]
bottled_water beverage [[0.1111513]]
bottled_water potable [[0.11479337]]
bottled_water liquid_state [[0.09259563]]
bottled_water liquidity [[0.10872301]]
supervolcano extinct_volcano [[0.1071804]]
supervolcano mountain [[0.10730918]]
supervolcano harm [[0.09464914]]
supervolcano injury [[0.08347984]]
supervolcano toilet [[0.08937708]]
supervolcano mount [[0.09881508]]
supervolcano terrain [[0.11350908]]
supervolcano hurt [[0.08887544]]
supervolcano trauma [[0.09267465]]
supervolcano lithostratigraphic_unit [[0.09917571]]
supervolcano volcano [[0.13994263]]
supervolcano separation_process

distributer supplier [[0.12969385]]
distributer electrical_device [[0.10332777]]
distributer provider [[0.13730527]]
shepherd working_man [[0.10428642]]
shepherd worker [[0.11827075]]
shepherd herder [[0.09050581]]
shepherd laborer [[0.10781062]]
shepherd clergyman [[0.10648982]]
shepherd hired_man [[0.09775499]]
shepherd workman [[0.09485693]]
shepherd person [[0.18851879]]
shepherd priest [[0.10709467]]
shepherd employee [[0.12572317]]
shepherd working_person [[0.11631531]]
fasces symbolization [[0.10094187]]
fasces representational_process [[0.10033592]]
fasces symbolic_representation [[0.09283135]]
fasces allegory [[0.08611856]]
hit decease [[0.08362256]]
hit technical_specification [[0.10677692]]
hit specifications [[0.10366155]]
hit killer [[0.10571887]]
hit criminal_offence [[0.10867801]]
hit killing [[0.0920431]]
hit kill [[0.08821075]]
hit wrongful_act [[0.10614403]]
hit death [[0.10278608]]
hit cause_of_death [[0.10341515]]
hit actus_reus [[0.10646117]]
hit success [[0.098936

jammies nightclothes [[0.09388584]]
jammies clothes [[0.11896429]]
marriage point_of_reference [[0.09994517]]
marriage function [[0.09842305]]
marriage tribe [[0.12055577]]
marriage union [[0.10155606]]
marriage agreement [[0.118283]]
marriage understanding [[0.09957089]]
marriage note [[0.10820943]]
marriage family_unit [[0.1268895]]
marriage written_agreement [[0.13689291]]
marriage kin [[0.09814962]]
marriage reference_point [[0.09203976]]
marriage annotation [[0.08827488]]
kovel locale [[0.09412429]]
kovel city [[0.11494844]]
isaiah saint [[0.09225901]]
isaiah sacred_writing [[0.0945776]]
isaiah prophet [[0.09771561]]
isaiah religious_writing [[0.09443072]]
isaiah scripture [[0.09822404]]
isaiah person [[0.18093356]]
isaiah holy_man [[0.09612713]]
isaiah book [[0.09367383]]
isaiah sacred_scripture [[0.09769373]]
isaiah sacred_text [[0.09609882]]
isaiah religious_person [[0.11119072]]
isaiah religious_text [[0.100286]]
isaiah holy_person [[0.10353533]]
archduke affix [[0.10276263]]


bolster pillow [[0.08159775]]
gypsum research_laboratory [[0.09518236]]
gypsum stone [[0.08259281]]
gypsum science_laboratory [[0.09992053]]
gypsum research_lab [[0.09397455]]
gypsum rock [[0.09121277]]
gypsum geographic_point [[0.08784997]]
gypsum lab [[0.09439708]]
gypsum research_centre [[0.0899026]]
gypsum research_center [[0.09081882]]
gypsum research_institute [[0.09376774]]
gypsum laboratory [[0.09905792]]
gypsum calcium_sulfate [[0.10290516]]
gypsum calcium_sulphate [[0.08471254]]
gypsum sulfate_minerals [[0.08936276]]
raise slope [[0.07897431]]
raise increase [[0.11280438]]
raise incline [[0.08036806]]
raise increment [[0.10306958]]
raise gamble [[0.10460139]]
pocketbook soft-cover_book [[0.08453067]]
pocketbook paperback_book [[0.1037664]]
pocketbook storage_medium [[0.10483105]]
pocketbook storage [[0.10424611]]
pocketbook book [[0.09930115]]
pocketbook softback_book [[0.08408798]]
dub sound [[0.11119709]]
dub sense_experience [[0.09565876]]
dub sensation [[0.10352135]]
dub 

fibrillin glycoprotein [[0.10591049]]
budget money [[0.13388416]]
budget fund [[0.13931234]]
budget cost [[0.12818418]]
budget financial_plan [[0.13386971]]
budget expense [[0.13108519]]
budget monetary_fund [[0.1080171]]
kathryn_werdegar justice [[0.11049403]]
kathryn_werdegar person [[0.18906865]]
kathryn_werdegar judge [[0.13034596]]
kathryn_werdegar jurist [[0.10563572]]
kathryn_werdegar legal_expert [[0.11175556]]
confederacy union [[0.10745671]]
confederacy circle [[0.0843091]]
confederacy agreement [[0.12132968]]
confederacy understanding [[0.10061547]]
colugo animal [[0.1711116]]
gamboge saffron_yellow [[0.07381439]]
gamboge plant_substance [[0.09387659]]
gamboge yellow [[0.09358503]]
gamboge plant_product [[0.12606843]]
gamboge secretion [[0.09398559]]
gamboge orange_yellow [[0.08202388]]
gamboge plant_material [[0.13560946]]
gamboge saffron [[0.08294898]]
gamboge natural_resin [[0.08696778]]
skeet_shooting shoot [[0.0949428]]
skeet_shooting sport [[0.1176703]]
skeet_shooting 

timeshift computer_software [[0.16258733]]
coliseum popular_music [[0.09951603]]
coliseum theatre [[0.11833354]]
coliseum facility [[0.11188668]]
coliseum musical_style [[0.0998296]]
coliseum popular_music_genre [[0.08504067]]
coliseum sports_stadium [[0.1306575]]
coliseum public_building [[0.14196843]]
coliseum train_depot [[0.11825657]]
coliseum rock-and-roll [[0.09550857]]
coliseum rock_music [[0.10080653]]
coliseum railroad_terminal [[0.11791729]]
coliseum railway_station [[0.13256016]]
coliseum train_station [[0.13687174]]
coliseum arena [[0.11052454]]
coliseum work_of_art [[0.10645209]]
coliseum terminus [[0.09817689]]
coliseum constructed_structure [[0.08671807]]
coliseum rock_'n'_roll [[0.09946363]]
coliseum music_genre [[0.08965358]]
coliseum terminal [[0.09112163]]
coliseum station [[0.10974561]]
coliseum stadium [[0.13654397]]
coliseum rock [[0.08954461]]
toll_road motorway [[0.15232287]]
toll_road way [[0.08964105]]
toll_road route [[0.13416737]]
toll_road transport_infrast

hany_farid professor [[0.10003064]]
hany_farid faculty [[0.11104113]]
hany_farid lector [[0.09744775]]
hany_farid assistant_professor [[0.09941141]]
hany_farid person [[0.17480838]]
hany_farid educationist [[0.0979744]]
hany_farid educator [[0.11905191]]
hany_farid prof [[0.09347355]]
hany_farid instructor [[0.11330594]]
hany_farid teacher [[0.11691657]]
pakistan cartography [[0.08828162]]
pakistan locale [[0.10101193]]
pakistan country [[0.16193946]]
pakistan mapmaking [[0.0906263]]
pakistan locus [[0.08546237]]
pakistan asian_country [[0.12751742]]
tacitus person [[0.18950297]]
tacitus historiographer [[0.09521715]]
tacitus historian [[0.10396704]]
tacitus intellectual [[0.11622144]]
tacitus scholarly_person [[0.09588545]]
puffing breathing [[0.12349604]]
puffing sickness [[0.13256982]]
puffing reaction_mixture [[0.09201182]]
puffing breathe_out [[0.11295971]]
puffing cigarette [[0.10679694]]
puffing illness [[0.1499561]]
puffing chemical_action [[0.10522594]]
puffing smoking [[0.114

pacaya rupture [[0.09819844]]
developmental_disability malady [[0.09829015]]
developmental_disability illness [[0.12858829]]
developmental_disability sickness [[0.11062063]]
developmental_disability pathological_state [[0.09518398]]
developmental_disability disease [[0.12111093]]
developmental_disability disorder [[0.11059546]]
intersection freeway [[0.16898555]]
intersection junction [[0.1166803]]
intersection constructed_structure [[0.09118094]]
intersection thoroughfare [[0.13930616]]
intersection route [[0.13419005]]
intersection motorway [[0.15142386]]
intersection throughway [[0.12634623]]
intersection state_highway [[0.15487356]]
intersection physical_system [[0.09432799]]
intersection superhighway [[0.13381103]]
intersection turnpike [[0.14219981]]
intersection controlled-access_highway [[0.12875535]]
intersection expressway [[0.17006843]]
richard_rathbun person [[0.18820193]]
loon transmission [[0.10348789]]
loon person [[0.14487714]]
loon binary_relation [[0.08622766]]
loon s

young anthropologist [[0.10662551]]
howard_davis competitor [[0.10045194]]
howard_davis person [[0.19560412]]
howard_davis boxer [[0.10110381]]
howard_davis contestant [[0.11063644]]
howard_davis combatant [[0.10684676]]
howard_davis sport [[0.10429404]]
howard_davis fighter [[0.09503275]]
howard_davis sportsperson [[0.11160967]]
harasser disagreeable_person [[0.09454089]]
harasser attacker [[0.10140661]]
harasser person [[0.1747763]]
harasser wrongdoer [[0.12590751]]
harasser persona_non_grata [[0.10234474]]
harasser aggressor [[0.10663559]]
harasser bad_person [[0.11632285]]
anthony_caro person [[0.18391576]]
elbridge_gerry politician [[0.1254658]]
elbridge_gerry diplomatist [[0.09652554]]
elbridge_gerry governor [[0.12718686]]
elbridge_gerry boss [[0.11409903]]
elbridge_gerry person [[0.18485336]]
elbridge_gerry chief [[0.11979219]]
elbridge_gerry political_leader [[0.12816621]]
elbridge_gerry jurist [[0.1152085]]
elbridge_gerry leader [[0.11591438]]
parceling distribution [[0.09808

beijing means_of_transport [[0.11648841]]
beijing country [[0.15685137]]
beijing piece_of_land [[0.11987847]]
beijing traveling [[0.11822931]]
beijing capital [[0.14069203]]
beijing provincial_capital [[0.12784652]]
beijing piece_of_ground [[0.09188513]]
beijing land_site [[0.09823352]]
irish lexicology [[0.08965325]]
irish lexical [[0.0874416]]
irish philology [[0.08795235]]
irish forename [[0.11327123]]
irish given_name [[0.10342082]]
rejection observable [[0.08992317]]
bell code [[0.11947864]]
bell pc_game [[0.09286025]]
bell spysat [[0.08687727]]
bell move [[0.09973194]]
bell measurement [[0.09779285]]
bell group_event [[0.10593697]]
bell bus_stop [[0.11310098]]
bell automobile [[0.11184371]]
bell edifice [[0.08649641]]
bell road_vehicle [[0.0996882]]
bell locale [[0.09259062]]
bell juridical_person [[0.12748039]]
bell transport [[0.09725438]]
bell air_travel [[0.10746574]]
bell movement [[0.09292567]]
bell transportation [[0.11818915]]
bell computer_software [[0.11950246]]
bell au

dragnet literary_composition [[0.09422895]]
dragnet mesh [[0.08453594]]
dragnet television_show [[0.1720394]]
dragnet moving-picture_show [[0.08186138]]
dragnet mystery_story [[0.1051986]]
dragnet crime_novel [[0.1104831]]
dragnet tv_program [[0.16439414]]
dragnet network [[0.10515659]]
dragnet television_series [[0.16267529]]
dragnet meshwork [[0.08055587]]
dragnet computer_network [[0.10581347]]
dragnet detective_novel [[0.10632275]]
dragnet crime_thriller [[0.11865373]]
dragnet television_program [[0.16606246]]
dragnet drama [[0.12832637]]
dragnet mystery_novel [[0.11745755]]
dragnet tv_show [[0.16708063]]
dragnet technics [[0.08709566]]
dragnet policier [[0.10527503]]
dragnet tv_series [[0.15871501]]
dragnet video [[0.12965357]]
dragnet motion_picture [[0.13049372]]
dragnet murder_mystery [[0.12301814]]
dragnet picture_show [[0.11830468]]
isle_of_arran locale [[0.0976534]]
isle_of_arran cartography [[0.086169]]
caldera separation_process [[0.0925631]]
caldera volcanic_crater [[0.13

amsterdam_internet_exchange profits [[0.10872033]]
amsterdam_internet_exchange earnings [[0.11360609]]
tricolour picture [[0.09253112]]
tricolour figure [[0.09420774]]
tricolour map [[0.08913142]]
tricolour fault [[0.09013086]]
tricolour image [[0.08623127]]
tricolour portraiture [[0.09360479]]
tricolour flag [[0.09589723]]
tricolour drawing [[0.10383467]]
tricolour mistake [[0.10678693]]
whoredom anti-social_behaviour [[0.10708836]]
whoredom criminal_offense [[0.12933405]]
whoredom crime [[0.12258936]]
whoredom wrong [[0.10665248]]
whoredom law-breaking [[0.10263199]]
whoredom wrongful_conduct [[0.11678281]]
whoredom criminal_offence [[0.1285709]]
suite picture [[0.09230793]]
suite flat [[0.08663695]]
suite data_storage_device [[0.1194016]]
suite transmission_channel [[0.10345125]]
suite housing [[0.10052213]]
suite pic [[0.10252933]]
suite social_event [[0.09423786]]
suite living_accommodations [[0.10319349]]
suite business_process [[0.12775114]]
suite data-storage_medium [[0.0896754

index data_structure [[0.12935099]]
index computer_file [[0.15599227]]
index written_language [[0.10562168]]
index database [[0.15690339]]
index list [[0.12898757]]
index social_norm [[0.09169813]]
index arrangement [[0.10468615]]
index ordered_series [[0.09625608]]
index data_file [[0.15041468]]
index write-up [[0.1145566]]
index database_management_system [[0.14403404]]
index technical_standard [[0.11543071]]
index mathematical_notation [[0.11468945]]
index text_file [[0.14795326]]
index written_document [[0.12110583]]
index standard [[0.12286798]]
index written_communication [[0.12227753]]
sonogram picture [[0.09751908]]
sonogram drawing [[0.08772395]]
sonogram figure [[0.08656325]]
sonogram fault [[0.09000532]]
craig_anderson hockey_player [[0.1092892]]
craig_anderson sportsperson [[0.10906562]]
craig_anderson riding_horse [[0.08730619]]
craig_anderson series [[0.09442833]]
craig_anderson horseriding [[0.08509398]]
craig_anderson television_show [[0.10724878]]
craig_anderson goalte

pubmed bibliographic_database [[0.1304825]]
pubmed electrical_load [[0.09425341]]
pubmed database [[0.15791076]]
pubmed electronic_database [[0.14522891]]
pubmed on-line_database [[0.13409059]]
pubmed household_appliance [[0.09383263]]
pubmed data-storage_medium [[0.08825667]]
pubmed online_database [[0.14153334]]
pubmed web_site [[0.13120905]]
pubmed database_management_system [[0.14414023]]
pubmed computer_database [[0.14049427]]
green stone [[0.09498556]]
green precious_stone [[0.10321547]]
green jade [[0.09335933]]
green spectral_color [[0.08632105]]
green silicate_mineral [[0.09285692]]
green crystal [[0.09823249]]
green primary_color [[0.09425427]]
green primary_colour [[0.09306404]]
green spectral_colour [[0.08384684]]
green gem [[0.091517]]
slut album [[0.10100792]]
slut debauchee [[0.08948138]]
slut libertine [[0.09558075]]
slut musical_work [[0.1134703]]
slut sloven [[0.09034722]]
slut record_album [[0.09372599]]
slut pig [[0.10155229]]
slut slob [[0.10001022]]
slut studio_al

bonemeal organic_fertilizer [[0.12544301]]
bonemeal fertilizer [[0.12213799]]
bonemeal organic_food [[0.12760127]]
tutankhamun person [[0.14771026]]
tutankhamun work_of_art [[0.11164726]]
tutankhamun pharaoh_of_egypt [[0.08060164]]
tutankhamun pharaoh [[0.08789913]]
tutankhamun expression [[0.09714471]]
tang_xianzu writer [[0.10716309]]
tang_xianzu playwrighting [[0.09911919]]
tang_xianzu playwright [[0.10582403]]
tang_xianzu dramatist [[0.0998143]]
tang_xianzu person [[0.1926771]]
hypertext_markup_language computer_code [[0.15737405]]
hypertext_markup_language code [[0.13006386]]
hypertext_markup_language computer [[0.16391592]]
hypertext_markup_language language [[0.1118825]]
hypertext_markup_language computer_language [[0.14319329]]
hypertext_markup_language markup_language [[0.13124517]]
hypertext_markup_language string [[0.09220724]]
aryabhata satellite [[0.10340907]]
aryabhata astronomical_object [[0.0921431]]
aryabhata scholar [[0.10006481]]
aryabhata person [[0.15709352]]
aryab

simon_walker person [[0.1788455]]
simon_walker writer [[0.09346692]]
terrier canid [[0.10759345]]
terrier carnivore [[0.10496231]]
terrier domestic_dog [[0.11857381]]
terrier canis_familiaris [[0.11323028]]
terrier dog_type [[0.09529556]]
terrier domestic_animal [[0.12819083]]
terrier dogness [[0.09199597]]
terrier dog [[0.13247998]]
terrier canine [[0.12164046]]
terrier domesticated_animal [[0.1236769]]
terrier animal [[0.14704795]]
terrier carnivory [[0.09436958]]
psychedelic psychoactive_drug [[0.10449593]]
psychedelic drug [[0.10064192]]
ramesh_powar person [[0.18200491]]
blithe_spirit drama [[0.10888209]]
blithe_spirit movie [[0.10614004]]
blithe_spirit work_of_art [[0.10906138]]
blithe_spirit social_event [[0.11921697]]
blithe_spirit show [[0.10704955]]
kingbird tyrant_flycatcher [[0.1008881]]
kingbird bird [[0.13348067]]
kingbird animal [[0.17124023]]
kingbird flycatcher [[0.10199694]]
light electromagnetic_wave [[0.1032239]]
light life [[0.10198276]]
light phenomenon [[0.090894

executive_vice_president vice_president [[0.12706307]]
executive_vice_president business_executive [[0.12673713]]
executive_vice_president executive [[0.13475432]]
executive_vice_president boss [[0.12031994]]
executive_vice_president vp [[0.11827312]]
executive_vice_president chief [[0.12272722]]
executive_vice_president executive_director [[0.12951314]]
executive_vice_president v.p. [[0.11496507]]
executive_vice_president decision_maker [[0.13077551]]
executive_vice_president honorific [[0.09853847]]
executive_vice_president corporate_title [[0.1156047]]
executive_vice_president senior_management [[0.13096257]]
executive_vice_president leader [[0.11316613]]
extramarital_sex love [[0.1069094]]
extramarital_sex lovemaking [[0.12847732]]
extramarital_sex sex_activity [[0.13162635]]
extramarital_sex sexual_love [[0.11942287]]
extramarital_sex bodily_function [[0.11886226]]
extramarital_sex sexual_activity [[0.16876373]]
extramarital_sex vaginal_sex [[0.14413673]]
extramarital_sex sexual_a

chief_research_officer business_executive [[0.1173093]]
chief_research_officer executive [[0.12115851]]
chief_research_officer title_of_respect [[0.09343784]]
chief_research_officer person [[0.16261524]]
chief_research_officer executive_director [[0.12002059]]
chief_research_officer constituent [[0.09194791]]
chief_research_officer decision_maker [[0.12281077]]
chief_research_officer senior_management [[0.11951386]]
jim_beach trainer [[0.11000124]]
jim_beach person [[0.17613895]]
jim_beach manager [[0.11966562]]
jim_beach coach [[0.10159029]]
jim_beach leader [[0.09603967]]
tubing equipment [[0.12216777]]
tubing shape [[0.09807319]]
tubing plane_figure [[0.08283919]]
tubing cylinder [[0.10648458]]
tubing passage [[0.07951425]]
tellico_reservoir reservoir [[0.11704793]]
tellico_reservoir artificial_lake [[0.11917622]]
tellico_reservoir lake [[0.12583567]]
tellico_reservoir fluid [[0.08392876]]
tellico_reservoir waterbody [[0.12974504]]
tellico_reservoir man-made_lake [[0.12309517]]
tell

retail specifications [[0.1263055]]
retail marketing [[0.14576964]]
retail worker [[0.11241458]]
retail sales [[0.1580135]]
retail salesclerk [[0.09935205]]
retail salesperson [[0.12137929]]
retail employee [[0.12295102]]
retail vender [[0.10525717]]
retail seller [[0.13335295]]
retail salesman [[0.10517281]]
retail selling_technique [[0.11395296]]
tau_neutrino molecular_entity [[0.09299665]]
tau_neutrino subatomic_particle [[0.1064442]]
tau_neutrino particle [[0.11192258]]
tau_neutrino elementary_particle [[0.10498907]]
proprioception perception [[0.10700299]]
proprioception stimulus [[0.09308]]
proprioception perceptible [[0.085987]]
proprioception perceptual_experience [[0.09884913]]
proprioception sensation [[0.10479285]]
proprioception percept [[0.09102399]]
proprioception sense [[0.09896462]]
proprioception sensory_faculty [[0.09103864]]
proprioception sensibility [[0.08891571]]
proprioception sensitiveness [[0.09209462]]
proprioception sensitivity [[0.1057072]]
proprioception fi

smoothbore equipment [[0.11238868]]
smoothbore technology [[0.11053061]]
smoothbore tube [[0.08330435]]
smoothbore gun [[0.09125582]]
smoothbore firearm [[0.11054012]]
smoothbore shape [[0.08692347]]
smoothbore engineering_physics [[0.09375039]]
smoothbore geometric_shape [[0.0871905]]
smoothbore projectile [[0.09281897]]
smoothbore tubing [[0.09110014]]
smoothbore weapon_system [[0.11278142]]
smoothbore ranged_weapon [[0.10146026]]
quietness calmness [[0.09628437]]
quietness quiet [[0.09713566]]
quietness silence [[0.09961891]]
mathworks corporation [[0.11511936]]
mathworks company [[0.1132565]]
mathworks business_organization [[0.11600398]]
mathworks business_firm [[0.11487216]]
mathworks corp [[0.11229203]]
mathworks venture [[0.09276233]]
mathworks enterprise [[0.11666192]]
mathworks business_organisation [[0.10914949]]
bomberman software [[0.17548311]]
bomberman software_program [[0.17048979]]
bomberman pc_game [[0.11397969]]
bomberman computer_software [[0.16687833]]
bomberman co

paragonite silicate_mineral [[0.09205054]]
short-term_memory remembering [[0.09160537]]
short-term_memory memory [[0.09786639]]
cinnamomum evergreen [[0.1052529]]
cinnamomum plant [[0.15678655]]
cinnamomum angiosperm [[0.11620497]]
cinnamomum evergreen_plant [[0.1071206]]
act_of_congress statute [[0.1453033]]
act_of_congress enactment [[0.13168709]]
act_of_congress music_group [[0.09730989]]
act_of_congress band [[0.08426476]]
act_of_congress musical_group [[0.09737886]]
act_of_congress legislative_act [[0.12196976]]
pipe engineering_science [[0.10558619]]
pipe cylinder [[0.11329167]]
pipe figure [[0.08685303]]
pipe shape [[0.09355747]]
pipe round_shape [[0.08700241]]
pipe intersection [[0.0943526]]
pipe technology [[0.11789021]]
pipe plane_section [[0.08771044]]
pipe form [[0.09936097]]
pipe tube [[0.10260773]]
pipe equipment [[0.1288424]]
pipe way [[0.08862057]]
pipe tubing [[0.10553036]]
joyce person [[0.19265951]]
joyce linguistics [[0.09279636]]
joyce personal_name [[0.10537247]]


beam side [[0.08033698]]
beam equipment [[0.11976367]]
beam scalar [[0.09899085]]
beam emission [[0.1061914]]
beam ionizing_radiation [[0.1153495]]
beam representational_process [[0.09426061]]
beam physical_phenomenon [[0.10983011]]
beam durables [[0.10014622]]
beam signal [[0.10460337]]
bean plant [[0.1232684]]
bean vegetable [[0.10870887]]
bean legume [[0.10959147]]
bean tv [[0.09147852]]
bean veggie [[0.09678046]]
bean veg [[0.09694068]]
bean seed [[0.11124425]]
bean video [[0.10474001]]
bean leguminous_plant [[0.08666153]]
bean applied_science [[0.09453208]]
bean telecasting [[0.09456684]]
bean climbing_plant [[0.0851705]]
bean television [[0.09440716]]
suer complainant [[0.12384722]]
magistrate legal_expert [[0.11280519]]
magistrate justice [[0.12018347]]
magistrate adjudicator [[0.11118385]]
magistrate jurisconsult [[0.10417473]]
magistrate person [[0.19052589]]
magistrate judge [[0.13874021]]
magistrate jurist [[0.11267206]]
typhoon_muifa atmosphere [[0.1090777]]
typhoon_muifa w

electron_microscopy investigating [[0.10118102]]
electron_microscopy magnifier [[0.08300454]]
electron_microscopy scientific_instrument [[0.09488936]]
sigma_lambda_beta social_club [[0.11826797]]
timeless function [[0.10072897]]
timeless move [[0.09061375]]
timeless work_of_art [[0.12224538]]
timeless single [[0.10076093]]
timeless narration [[0.11593557]]
timeless figure_of_speech [[0.09521525]]
timeless album [[0.1132]]
timeless story [[0.11121792]]
timeless novel [[0.09843633]]
timeless episode [[0.10893259]]
timeless narrative [[0.11381176]]
timeless record_album [[0.10681001]]
timeless fiction [[0.11903838]]
timeless motion [[0.09761058]]
timeless opus [[0.10413211]]
timeless piece_of_work [[0.10660171]]
timeless studio_album [[0.10095713]]
timeless rhetorical_device [[0.10211902]]
timeless movement [[0.09225638]]
card_index file [[0.1206519]]
card_index file_cabinet [[0.11907104]]
card_index filing_cabinet [[0.11026862]]
card_index office_furniture [[0.10819496]]
stucco plaster [

monoclonal protein [[0.13225412]]
monoclonal antibody [[0.12976125]]
monoclonal biomolecule [[0.12177581]]
monoclonal macromolecule [[0.11527807]]
st._augustine diocese [[0.10818475]]
st._augustine intellectual [[0.10702531]]
st._augustine church_fathers [[0.09756552]]
st._augustine house_of_prayer [[0.09718867]]
st._augustine church [[0.11223069]]
st._augustine deity [[0.10193853]]
st._augustine godhood [[0.10146248]]
st._augustine town [[0.10508278]]
st._augustine clergyman [[0.10901434]]
st._augustine church_father [[0.09762099]]
st._augustine holy_man [[0.10154662]]
st._augustine philosopher [[0.09884545]]
st._augustine holy_order [[0.09975123]]
st._augustine holy_person [[0.10498815]]
st._augustine saint [[0.1007782]]
st._augustine capital [[0.09455038]]
st._augustine theologist [[0.09335592]]
st._augustine place_of_worship [[0.11770064]]
diversity difference [[0.10256517]]
diversity dissimilarity [[0.08387584]]
mugwump political_organization [[0.10932316]]
mugwump politician [[0.

terrorism offense [[0.11612722]]
terrorism misconduct [[0.11676658]]
terrorism wrongfulness [[0.11496755]]
terrorism wrongful_act [[0.11943957]]
terrorism criminal_offense [[0.1312186]]
terrorism crime [[0.1267016]]
terrorism unlawfulness [[0.11733725]]
terrorism injustice [[0.12173632]]
terrorism wrongdoing [[0.12242003]]
terrorism wrongful_conduct [[0.11880982]]
terrorism criminal_offence [[0.13188173]]
st._edward city [[0.12090562]]
disciple person [[0.1924898]]
disciple follower [[0.10083467]]
ball field_game [[0.09283853]]
ball social_function [[0.1060703]]
ball form [[0.11243486]]
ball dance [[0.0959966]]
ball shape [[0.08644399]]
ball round_shape [[0.08220718]]
ball spheroid [[0.08021978]]
ball genital [[0.10241229]]
ball olympic_sport [[0.10381713]]
ball type_of_sport [[0.11459965]]
ball ellipsoid [[0.08232829]]
ball propulsion [[0.08940604]]
ball sex_organ [[0.09836092]]
ball pellet [[0.08439991]]
ball sexual_organ [[0.10324975]]
ball body_part [[0.11580658]]
ball geometric_sh

hermes spacefaring [[0.09418854]]
hermes imaginary_being [[0.10666751]]
hermes greek_deity [[0.0837125]]
hermes applications_programme [[0.09681037]]
hermes spacecraft [[0.09590679]]
hermes heavenly_body [[0.08691106]]
hermes deity [[0.09485193]]
hermes godhood [[0.09698103]]
hermes space_flight [[0.09263504]]
hermes turbine [[0.09172239]]
hermes cartoon_character [[0.10245124]]
hermes aircraft_engine [[0.09616449]]
hermes orbiter [[0.09495706]]
hermes astronomical_object [[0.09184566]]
hermes application_software [[0.10612185]]
hermes video_game_console [[0.09091603]]
hermes space_travel [[0.10032665]]
hermes computing_platform [[0.10249997]]
hermes air_transportation [[0.10561773]]
hermes games_console [[0.09572623]]
hermes air_travel [[0.11049117]]
hermes manufacture [[0.10334908]]
hermes satellite [[0.09048877]]
hermes electronic_game [[0.09446153]]
hermes artificial_satellite [[0.08525581]]
hermes fictional_character [[0.10396955]]
hermes aircraft [[0.09652032]]
hermes language [[

truffle plant [[0.13750483]]
truffle plant_structure [[0.11653318]]
truffle green_goods [[0.10045905]]
truffle goody [[0.09232794]]
truffle tracheophyte [[0.0924999]]
truffle delicacy [[0.09316944]]
truffle candy [[0.10560989]]
truffle sweets [[0.1001826]]
truffle plant_organ [[0.0865583]]
truffle treat [[0.11408426]]
truffle dainty [[0.08805463]]
truffle plant_part [[0.13074304]]
truffle confection [[0.09830377]]
truffle sweet [[0.09791212]]
don_baldwin competitor [[0.09957205]]
don_baldwin athlete [[0.11448663]]
don_baldwin person [[0.18453124]]
don_baldwin skater [[0.08980215]]
don_baldwin sport [[0.09521502]]
don_baldwin sportsperson [[0.10538402]]
great-grandfather forbear [[0.09141254]]
great-grandfather antecedent [[0.0974097]]
great-grandfather ancestor [[0.09809014]]
great-grandfather paternity [[0.10362198]]
great-grandfather parent [[0.12245066]]
great-grandfather forebear [[0.09409192]]
great-grandfather relative [[0.09792958]]
great-grandfather person [[0.18954957]]
great-

angular_momentum natural_action [[0.07577547]]
angular_momentum momentum [[0.08982228]]
angular_momentum natural_process [[0.0953118]]
greccio comune [[0.0935168]]
greccio city [[0.11087576]]
shortsightedness softness [[0.08127858]]
shortsightedness visual_impairment [[0.11596639]]
shortsightedness sickness [[0.11776762]]
shortsightedness impairment [[0.1200942]]
macroeconomics economics [[0.11263908]]
ridge convex_shape [[0.07493343]]
ridge elevation [[0.10225245]]
ridge natural_elevation [[0.08492781]]
ridge structural_member [[0.08886942]]
ridge support [[0.0792508]]
ridge beam [[0.08626557]]
ridge shape [[0.07777116]]
ridge structural_element [[0.079677]]
ridge appendage [[0.07959195]]
standerton town [[0.10592004]]
standerton city [[0.11831442]]
snowplow travel [[0.12163011]]
snowplow move [[0.09083003]]
snowplow transport [[0.12399141]]
snowplow road_vehicle [[0.13514033]]
snowplow vehicle [[0.1343127]]
grapevine album [[0.09132314]]
grapevine vinifera [[0.10166208]]
grapevine pl

masquerade_ball show [[0.09789938]]
masquerade_ball masquerade [[0.09638412]]
masquerade_ball masque [[0.09531285]]
masquerade_ball mask [[0.09467316]]
masquerade_ball social_occasion [[0.11746582]]
masquerade_ball social_event [[0.12385956]]
masquerade_ball party [[0.11854265]]
masquerade_ball function [[0.09493471]]
masquerade_ball occasion [[0.12039457]]
masquerade_ball group_event [[0.11171931]]
masquerade_ball masquerade_party [[0.09744855]]
masquerade_ball social_gathering [[0.11705574]]
mechanics performance [[0.10647947]]
mechanics physics [[0.1010772]]
bearing mechanical_assembly [[0.08699782]]
bearing direction [[0.09221358]]
bearing component [[0.10748389]]
bearing support [[0.09948286]]
bearing mechanism [[0.10359461]]
carborundum abrasive [[0.09172519]]
carborundum abrasive_material [[0.08950742]]
épée sword [[0.08091982]]
épée weapon [[0.10465256]]
rubble scrap [[0.09006798]]
crusade disagreement [[0.1208376]]
crusade task [[0.11283879]]
crusade military_campaign [[0.1042

praetor magistrate [[0.12316488]]
praetor judge [[0.13197124]]
praetor person [[0.193164]]
endocarditis carditis [[0.11675758]]
endocarditis inflammation [[0.1339646]]
endocarditis disorder [[0.1260834]]
endocarditis disease [[0.15686767]]
endocarditis sickness [[0.12046529]]
michael_owen footballer [[0.10681432]]
michael_owen person [[0.19513299]]


# Some readings

* ('Epochs: ', 30, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 24, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout 0.5: ', True)

CRIM evaluation:<br>
MRR: 0.13002<br>
P@1: 0.08806<br>
P@5: 0.06368<br>
P@10: 0.06131<br>

* ('Epochs: ', 10, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 1, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.5, 'Kernel constraint: ', 'None')

CRIM evaluation:<br>
MRR: 0.16015<br>
P@1: 0.12675<br>
P@5: 0.07046<br>
P@10: 0.06632<br>

* ('Epochs: ', 15, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 1, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.5: ')

CRIM evaluation:<br>
MRR: 0.15522<br>
P@1: 0.11741<br>
P@5: 0.07256<br>
P@10: 0.06844<br>

* ('Epochs: ', 5, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 1, 'train_embeddings: ', True, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout (0.5): ', True)

CRIM evaluation:<br>
MRR: 0.22385<br>
P@1: 0.1968<br>
P@5: 0.08815<br>
P@10: 0.08352<br>

Even though these results are superior on the outset, in reality, tuning the embeddings reduces the model to Most Frequent Hypernym.


* ('Epochs: ', 10, 'Batch size: ', 32, 'm: ', 5, 'pki_k: ', 24, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout: ', True)

Reducing the number of random samples has a negative impact.

    CRIM evaluation:<br>
MRR: 0.06439<br>
P@1: 0.04536<br>
P@5: 0.02777<br>
P@10: 0.02703<br>

* ('Epochs: ', 10, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 24, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.5, 'Kernel constraint: ', 'ForceToOne')

CRIM evaluation:<br>
MRR: 0.12434<br>
P@1: 0.09006<br>
P@5: 0.05667<br>
P@10: 0.0549<br>

* ('Epochs: ', 10, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 5, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.5, 'Kernel constraint: ', 'None')

CRIM evaluation:<br>
MRR: 0.1346<br>
P@1: 0.0974<br>
P@5: 0.0627<br>
P@10: 0.05998<br>

* ('Epochs: ', 10, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 10, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.5, 'Kernel constraint: ', 'None')

CRIM evaluation:<br>
MRR: 0.12567<br>
P@1: 0.08539<br>
P@5: 0.06105<br>
P@10: 0.05916<br>


* ('Epochs: ', 10, 'Batch size: ', 32, 'm: ', 5, 'pki_k: ', 12, 'train_embeddings: ', False, 'Negative sampling: ', 'synonym', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.5, 'Kernel constraint: ', 'None')
('Epoch:', 1, 'Loss:', 155.5154161900282, 'Test Loss:', 485.83162450790405)
('Epoch:', 2, 'Loss:', 88.82768769562244, 'Test Loss:', 348.13482135534286)
('Epoch:', 3, 'Loss:', 66.63960940390825, 'Test Loss:', 258.7310974597931)
('Epoch:', 4, 'Loss:', 56.400326274335384, 'Test Loss:', 216.1373891234398)
('Epoch:', 5, 'Loss:', 51.70219925045967, 'Test Loss:', 207.98559176921844)
('Epoch:', 6, 'Loss:', 48.9724283516407, 'Test Loss:', 204.41960680484772)
('Epoch:', 7, 'Loss:', 46.962790466845036, 'Test Loss:', 195.1675413697958)
('Epoch:', 8, 'Loss:', 45.41264865081757, 'Test Loss:', 198.74169850349426)
('Epoch:', 9, 'Loss:', 45.02060864120722, 'Test Loss:', 191.37398713827133)
('Epoch:', 10, 'Loss:', 44.76244197413325, 'Test Loss:', 188.13010711967945)

CRIM evaluation:<br>
MRR: 0.01452<br>
P@1: 0.00801<br>
P@5: 0.00653<br>
P@10: 0.00623<br>

* ('Epochs: ', 15, 'Batch size: ', 32, 'm: ', 15, 'pki_k: ', 1, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.5, 'Kernel constraint: ', 'None')
('Epoch:', 1, 'Loss:', 173.66530799865723, 'Test Loss:', 422.2526289820671)
('Epoch:', 2, 'Loss:', 89.43182794749737, 'Test Loss:', 888.1642265319824)
('Epoch:', 3, 'Loss:', 82.11764796078205, 'Test Loss:', 897.4773137569427)
('Epoch:', 4, 'Loss:', 77.61969149112701, 'Test Loss:', 871.3285944461823)
('Epoch:', 5, 'Loss:', 73.61261868476868, 'Test Loss:', 838.6195100545883)
('Epoch:', 6, 'Loss:', 70.76602215319872, 'Test Loss:', 790.632804274559)
('Epoch:', 7, 'Loss:', 68.09195621311665, 'Test Loss:', 758.9123626947403)
('Epoch:', 8, 'Loss:', 66.28909918665886, 'Test Loss:', 722.3105019330978)
('Epoch:', 9, 'Loss:', 64.0174068659544, 'Test Loss:', 701.5663638114929)
('Epoch:', 10, 'Loss:', 62.1955054551363, 'Test Loss:', 685.4047366380692)
('Epoch:', 11, 'Loss:', 61.18628938496113, 'Test Loss:', 666.7885119915009)
('Epoch:', 12, 'Loss:', 60.10102154314518, 'Test Loss:', 654.0900322198868)
('Epoch:', 13, 'Loss:', 59.095360577106476, 'Test Loss:', 653.2110993862152)
('Epoch:', 14, 'Loss:', 58.58073855936527, 'Test Loss:', 634.9157860279083)
('Epoch:', 15, 'Loss:', 57.885950952768326, 'Test Loss:', 631.2514699697495)

CRIM evaluation:
MRR: 0.15946
P@1: 0.11674
P@5: 0.07472
P@10: 0.06976

-----------------------------------------------
Tried new technique involving mix of ensemble and transfer learning.<br>
* Developed two models: one is the standard CRIM model I've always used.  The embeddings are frozen and populated with the word2vec vectors provided by Gabriel.
* After training this model for an adequate number of epochs (i.e. until optimal fitting; early stopping implemented manually);
* After the first cycle of training is over I extracted the phi and LR weights;
* These weights were subsequently injected in a new model.  This time the embeddings later was set to trainable and the Phi dense layer was frozen;
* The model was encouraged to use the same projection weights and fine-tune the embeddings to learn a better hypernym generation model;

#### 1 Projection; First Cycle 
('Epochs: ', 10, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 1, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None')
('Epoch:', 1, 'Loss:', 177.9488000869751, 'Test Loss:', 178.53641974925995)
('Epoch:', 2, 'Loss:', 103.15769128501415, 'Test Loss:', 102.5895478874445)
('Epoch:', 3, 'Loss:', 90.33858793973923, 'Test Loss:', 90.0393942296505)
('Epoch:', 4, 'Loss:', 80.81938941776752, 'Test Loss:', 79.73569859564304)
('Epoch:', 5, 'Loss:', 74.30441601574421, 'Test Loss:', 73.00559163093567)
('Epoch:', 6, 'Loss:', 68.31351159512997, 'Test Loss:', 67.29328979551792)
('Epoch:', 7, 'Loss:', 64.69703111797571, 'Test Loss:', 64.18054696917534)
('Epoch:', 8, 'Loss:', 61.36914176493883, 'Test Loss:', 60.54891411960125)
('Epoch:', 9, 'Loss:', 57.571076557040215, 'Test Loss:', 58.882518880069256)
('Epoch:', 10, 'Loss:', 54.78656556457281, 'Test Loss:', 57.673407919704914)
Generating predictions...
('Done', 100)
('Done', 200)
('Done', 300)
('Done', 400)
('Done', 500)
('Done', 600)
('Done', 700)
('Done', 800)
('Done', 900)
('Done', 1000)
('Done', 1100)
('Done', 1200)
('Done', 1300)
('Done', 1400)
CRIM evaluation:
MRR: 0.14914
P@1: 0.10874
P@5: 0.0687
P@10: 0.06505


#### 1 Projection; Second Cycle
('Epochs: ', 5, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 1, 'train_embeddings: ', True, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None')
('Epoch:', 1, 'Loss:', 29.900435734540224, 'Test Loss:', 50.436300061643124)
('Epoch:', 2, 'Loss:', 8.763934534537839, 'Test Loss:', 46.93847519904375)
('Epoch:', 3, 'Loss:', 4.247505006627762, 'Test Loss:', 51.40926795452833)
('Epoch:', 4, 'Loss:', 2.347970368108463, 'Test Loss:', 56.144402757287025)
('Epoch:', 5, 'Loss:', 1.38899088197104, 'Test Loss:', 63.76580411195755)
Generating predictions...
('Done', 100)
('Done', 200)
('Done', 300)
('Done', 400)
('Done', 500)
('Done', 600)
('Done', 700)
('Done', 800)
('Done', 900)
('Done', 1000)
('Done', 1100)
('Done', 1200)
('Done', 1300)
('Done', 1400)
CRIM evaluation:
MRR: 0.28634
P@1: 0.24483
P@5: 0.1263
P@10: 0.11961

---------------------------------------------------------------------------------------
#### 24 Projections; First Cycle
('Epochs: ', 10, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 24, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None')
('Epoch:', 1, 'Loss:', 109.97628237307072, 'Test Loss:', 110.91941311955452)
('Epoch:', 2, 'Loss:', 61.31632810086012, 'Test Loss:', 65.23697778582573)
('Epoch:', 3, 'Loss:', 48.39542027562857, 'Test Loss:', 59.35354737192392)
('Epoch:', 4, 'Loss:', 41.11544480547309, 'Test Loss:', 60.305753372609615)
('Epoch:', 5, 'Loss:', 36.67992676049471, 'Test Loss:', 64.0475360751152)
('Epoch:', 6, 'Loss:', 34.28844119235873, 'Test Loss:', 63.37563705444336)
('Epoch:', 7, 'Loss:', 32.068154136650264, 'Test Loss:', 64.99528855085373)
('Epoch:', 8, 'Loss:', 31.266997564584017, 'Test Loss:', 64.47643724828959)
('Epoch:', 9, 'Loss:', 30.232711946591735, 'Test Loss:', 63.88381798565388)
('Epoch:', 10, 'Loss:', 29.51552465558052, 'Test Loss:', 65.2800731509924)
Generating predictions...
('Done', 100)
('Done', 200)
('Done', 300)
('Done', 400)
('Done', 500)
('Done', 600)
('Done', 700)
('Done', 800)
('Done', 900)
('Done', 1000)
('Done', 1100)
('Done', 1200)
('Done', 1300)
('Done', 1400)
CRIM evaluation:
MRR: 0.10304
P@1: 0.07138
P@5: 0.04817
P@10: 0.04453

#### 24 Projections; Second Cycle
('Epochs: ', 5, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 24, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None')

('Epoch:', 1, 'Loss:', 13.566975106863538, 'Test Loss:', 59.5253451615572)
('Epoch:', 2, 'Loss:', 2.032865798302737, 'Test Loss:', 60.16112444549799)
('Epoch:', 3, 'Loss:', 0.46449146703412225, 'Test Loss:', 64.23307839781046)
('Epoch:', 4, 'Loss:', 0.1069298386116202, 'Test Loss:', 68.53682653605938)
('Epoch:', 5, 'Loss:', 0.027284826716368116, 'Test Loss:', 73.16503396630287)


CRIM evaluation:
MRR: 0.30386
P@1: 0.24817
P@5: 0.14445
P@10: 0.13732

---------------------------------------------------------------------------------------------------
### Experiment with single projection but changing: i) Random initialiser (random + identity); ii) 2nd phase keeps trainining Phi; iii) Change Learning Rate of 2nd phase.

#### 1 Projections; First Cycle
('Epochs: ', 10, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 1, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None')
('Epoch:', 1, 'Loss:', 176.2122738659382, 'Test Loss:', 177.55351921916008)
('Epoch:', 2, 'Loss:', 104.77461278438568, 'Test Loss:', 105.96388858556747)
('Epoch:', 3, 'Loss:', 93.10278041660786, 'Test Loss:', 95.61527897417545)
('Epoch:', 4, 'Loss:', 83.65695556998253, 'Test Loss:', 86.35148683190346)
('Epoch:', 5, 'Loss:', 76.63541767001152, 'Test Loss:', 80.32908068597317)
('Epoch:', 6, 'Loss:', 71.39560843259096, 'Test Loss:', 74.56421269476414)
('Epoch:', 7, 'Loss:', 67.28997546434402, 'Test Loss:', 70.91447427868843)
('Epoch:', 8, 'Loss:', 63.82748632133007, 'Test Loss:', 67.26722575724125)
('Epoch:', 9, 'Loss:', 60.12399164587259, 'Test Loss:', 65.32279951870441)
('Epoch:', 10, 'Loss:', 57.98733665794134, 'Test Loss:', 63.860010385513306)

CRIM evaluation:
MRR: 0.15094
P@1: 0.11474
P@5: 0.0674
P@10: 0.065

#### 24 Projections; Second Cycle
('Epochs: ', 10, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 1, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None')
('Epoch:', 1, 'Loss:', 37.44226049259305, 'Test Loss:', 62.1908989623189)
('Epoch:', 2, 'Loss:', 14.947778060100973, 'Test Loss:', 74.91842666268349)
('Epoch:', 3, 'Loss:', 7.418942770687863, 'Test Loss:', 98.69380746781826)
('Epoch:', 4, 'Loss:', 4.618407022906467, 'Test Loss:', 120.58737960457802)
('Epoch:', 5, 'Loss:', 2.871887466167209, 'Test Loss:', 142.8387492597103)
('Epoch:', 6, 'Loss:', 1.9543396250010119, 'Test Loss:', 163.086307823658)
('Epoch:', 7, 'Loss:', 1.3212263471978076, 'Test Loss:', 183.57101076841354)
('Epoch:', 8, 'Loss:', 0.9992299735413326, 'Test Loss:', 209.59179404377937)
('Epoch:', 9, 'Loss:', 0.8104324492987871, 'Test Loss:', 228.58731454610825)
('Epoch:', 10, 'Loss:', 0.5762173827174593, 'Test Loss:', 254.7513089776039)

CRIM evaluation:
MRR: 0.27074
P@1: 0.22148
P@5: 0.13155
P@10: 0.12557


* Overfit manifests for sure.  Person, work-of-art feature heavily as hypernyms even for query terms which are unrelated

### Experiment with dual projections.  Extended epochs of first cycle to 25.  Reduced learning rate of second cycle to reduce overfit and training for 5 epochs

#### First Cycle
('Epochs: ', 25, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 2, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.35, 'Kernel constraint: ', 'None')
('Epoch:', 1, 'Loss:', 160.74599489569664, 'Test Loss:', 160.929179251194)
('Epoch:', 2, 'Loss:', 96.68699912726879, 'Test Loss:', 96.50307583808899)
('Epoch:', 3, 'Loss:', 82.84739479422569, 'Test Loss:', 82.26891721785069)
('Epoch:', 4, 'Loss:', 73.6079184487462, 'Test Loss:', 72.73883473873138)
('Epoch:', 5, 'Loss:', 67.65975984185934, 'Test Loss:', 67.05146090686321)
('Epoch:', 6, 'Loss:', 63.158750265836716, 'Test Loss:', 62.18984428048134)
('Epoch:', 7, 'Loss:', 59.12502943724394, 'Test Loss:', 59.458139173686504)
('Epoch:', 8, 'Loss:', 55.11561170220375, 'Test Loss:', 56.17893383651972)
('Epoch:', 9, 'Loss:', 52.53363721072674, 'Test Loss:', 54.554423585534096)
('Epoch:', 10, 'Loss:', 50.518984742462635, 'Test Loss:', 53.33502481132746)
('Epoch:', 11, 'Loss:', 48.320613726973534, 'Test Loss:', 51.69379674643278)
('Epoch:', 12, 'Loss:', 46.352201879024506, 'Test Loss:', 51.4660424888134)
('Epoch:', 13, 'Loss:', 45.39920901507139, 'Test Loss:', 51.60422394424677)
('Epoch:', 14, 'Loss:', 44.41912394762039, 'Test Loss:', 50.1107277572155)
('Epoch:', 15, 'Loss:', 43.92356888204813, 'Test Loss:', 50.16625649482012)
('Epoch:', 16, 'Loss:', 42.618239261209965, 'Test Loss:', 49.68374668061733)
('Epoch:', 17, 'Loss:', 41.93088800087571, 'Test Loss:', 49.52678156644106)
('Epoch:', 18, 'Loss:', 42.28301604837179, 'Test Loss:', 49.41834541410208)
('Epoch:', 19, 'Loss:', 41.13799152523279, 'Test Loss:', 48.8290878534317)
('Epoch:', 20, 'Loss:', 40.83357220888138, 'Test Loss:', 48.22099205851555)
('Epoch:', 21, 'Loss:', 40.20593152567744, 'Test Loss:', 48.0087883323431)
('Epoch:', 22, 'Loss:', 40.49513000249863, 'Test Loss:', 47.994462229311466)
('Epoch:', 23, 'Loss:', 39.6955735757947, 'Test Loss:', 48.454381965100765)
('Epoch:', 24, 'Loss:', 39.38141195476055, 'Test Loss:', 48.52395910024643)
('Epoch:', 25, 'Loss:', 39.834498304873705, 'Test Loss:', 48.30932606011629)

CRIM evaluation:
MRR: 0.11931
P@1: 0.07872
P@5: 0.05491
P@10: 0.05259

#### Second cycle
('Epochs: ', 5, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 2, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.35, 'Kernel constraint: ', 'None')
('Epoch:', 1, 'Loss:', 30.962760228663683, 'Test Loss:', 46.86121924221516)
('Epoch:', 2, 'Loss:', 18.49303602334112, 'Test Loss:', 43.47840115427971)
('Epoch:', 3, 'Loss:', 12.296762353973463, 'Test Loss:', 41.97538521140814)
('Epoch:', 4, 'Loss:', 8.417490374995396, 'Test Loss:', 40.455049715936184)
('Epoch:', 5, 'Loss:', 6.027016263862606, 'Test Loss:', 40.4208921790123)

CRIM evaluation:
MRR: 0.30662
P@1: 0.2515
P@5: 0.14699
P@10: 0.13961

-------------------------------------------------------------------------------------------------
### Experiment with two Phi layers. 
#### I insert dropout after the hyponym and hypernym embeddings and after the second phi.  Training goes on for 25 epochs.  The solution converges much more slowly than if were using a single affine layer.

('Epochs: ', 25, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 1, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.35, 'Kernel constraint: ', 'None')
('Epoch:', 1, 'Loss:', 132.44914108514786, 'Test Loss:', 129.82295709848404)
('Epoch:', 2, 'Loss:', 95.7211739718914, 'Test Loss:', 92.90788823366165)
('Epoch:', 3, 'Loss:', 93.52208907902241, 'Test Loss:', 90.88466887176037)
('Epoch:', 4, 'Loss:', 91.18207442760468, 'Test Loss:', 89.7815543860197)
('Epoch:', 5, 'Loss:', 88.62042617797852, 'Test Loss:', 87.20721289515495)
('Epoch:', 6, 'Loss:', 85.08720774948597, 'Test Loss:', 83.62825165688992)
('Epoch:', 7, 'Loss:', 82.6170591711998, 'Test Loss:', 79.72680546343327)
('Epoch:', 8, 'Loss:', 78.23334312438965, 'Test Loss:', 74.8099833726883)
('Epoch:', 9, 'Loss:', 76.347172498703, 'Test Loss:', 72.44513177871704)
('Epoch:', 10, 'Loss:', 74.18829296529293, 'Test Loss:', 69.96501626074314)
('Epoch:', 11, 'Loss:', 72.2932140827179, 'Test Loss:', 67.46124893426895)
('Epoch:', 12, 'Loss:', 71.2011769413948, 'Test Loss:', 65.32345585525036)
('Epoch:', 13, 'Loss:', 70.07241632044315, 'Test Loss:', 63.981889829039574)
('Epoch:', 14, 'Loss:', 68.04309992492199, 'Test Loss:', 62.387570425868034)
('Epoch:', 15, 'Loss:', 67.2650830000639, 'Test Loss:', 61.06334821879864)
('Epoch:', 16, 'Loss:', 67.25092969834805, 'Test Loss:', 60.93150553107262)
('Epoch:', 17, 'Loss:', 66.12663190811872, 'Test Loss:', 60.26040391623974)
('Epoch:', 18, 'Loss:', 64.85526475310326, 'Test Loss:', 59.32122567296028)
('Epoch:', 19, 'Loss:', 64.72251350432634, 'Test Loss:', 58.52266174554825)
('Epoch:', 20, 'Loss:', 63.96191988885403, 'Test Loss:', 57.419067934155464)
('Epoch:', 21, 'Loss:', 62.82784986868501, 'Test Loss:', 56.976065531373024)
('Epoch:', 22, 'Loss:', 62.8604651093483, 'Test Loss:', 57.06210967898369)
('Epoch:', 23, 'Loss:', 62.14057156443596, 'Test Loss:', 56.878740444779396)
('Epoch:', 24, 'Loss:', 61.323521822690964, 'Test Loss:', 56.8377401381731)
('Epoch:', 25, 'Loss:', 61.14446556568146, 'Test Loss:', 56.42692677676678)

* Having one than one linear layer does not increase the hypothesis space of the model.
* Attempting model with 2 hidden layer each with non-linear activation functions stopped impeded the model from learning anything


-----------------------------------------------------------------------------------------------------
### Testing different batch sizes
('Epochs: ', 8, 'Batch size: ', 1, 'm: ', 10, 'pki_k: ', 1, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None', 'Learning rate: ', 0.001, 'Bagging:', False)
('Epoch:', 1, 'Loss:', 3066.596051596105, 'Test Loss:', 1.196591, 'MRR:', 0.06733, 'Test accuracy:', 0.365)
('Epoch:', 2, 'Loss:', 2044.704427516088, 'Test Loss:', 1.1396255, 'MRR:', 0.07917, 'Test accuracy:', 0.53)
('Epoch:', 3, 'Loss:', 1773.883358555846, 'Test Loss:', 1.118648, 'MRR:', 0.08222, 'Test accuracy:', 0.59)
('Epoch:', 4, 'Loss:', 1685.0154238501564, 'Test Loss:', 1.2257615, 'MRR:', 0.08352, 'Test accuracy:', 0.57)
('Epoch:', 5, 'Loss:', 1624.6034373817965, 'Test Loss:', 1.2055718, 'MRR:', 0.10333, 'Test accuracy:', 0.57)
('Epoch:', 6, 'Loss:', 1582.9557659688871, 'Test Loss:', 1.2653193, 'MRR:', 0.08917, 'Test accuracy:', 0.61)
('Epoch:', 7, 'Loss:', 1559.463746579131, 'Test Loss:', 1.146983, 'MRR:', 0.08167, 'Test accuracy:', 0.64)
('Epoch:', 8, 'Loss:', 1533.567981993081, 'Test Loss:', 1.1859033, 'MRR:', 0.06533, 'Test accuracy:', 0.625)
Generating predictions...
('Done', 100)
('Done', 200)
('Done', 300)
('Done', 400)
('Done', 500)
('Done', 600)
('Done', 700)
('Done', 800)
('Done', 900)
('Done', 1000)
('Done', 1100)
('Done', 1200)
('Done', 1300)
('Done', 1400)
CRIM evaluation:
MRR: 0.10216
MAP: 0.04669
P@1: 0.06738
P@5: 0.04474
P@10: 0.04341

* Perhaps results will improve if training rate is reduced?


# Modifying CRIM to work with multiple phi but send only highest similarity to prediction layer

('Epochs: ', 10, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 10, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None', 'Learning rate: ', 0.001)
('Epoch:', 1, 'Loss:', 192.27924165129662, 'Test Loss:', 185.294663220644)
('Epoch:', 2, 'Loss:', 111.60149729251862, 'Test Loss:', 107.93660750985146)
('Epoch:', 3, 'Loss:', 96.55376133322716, 'Test Loss:', 95.19647890329361)
('Epoch:', 4, 'Loss:', 82.45174576342106, 'Test Loss:', 79.94356182217598)
('Epoch:', 5, 'Loss:', 73.00302350521088, 'Test Loss:', 69.28130987286568)
('Epoch:', 6, 'Loss:', 66.95412650704384, 'Test Loss:', 62.31023855507374)
('Epoch:', 7, 'Loss:', 61.48678085207939, 'Test Loss:', 57.86816193163395)
('Epoch:', 8, 'Loss:', 58.002911515533924, 'Test Loss:', 54.93470601737499)
('Epoch:', 9, 'Loss:', 54.53545202314854, 'Test Loss:', 52.65876457095146)
('Epoch:', 10, 'Loss:', 51.42167618870735, 'Test Loss:', 51.305756598711014)

CRIM evaluation:
MRR: 0.14499
MAP: 0.06773
P@1: 0.10207
P@5: 0.06518
P@10: 0.06227

--------------------------------------------------------------------
('Epochs: ', 13, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 20, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_plus_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None', 'Learning rate: ', 0.001)
('Epoch:', 1, 'Loss:', 177.05030685663223, 'Test Loss:', 1.9216485023498535, 'MRR:', 0.0)
('Epoch:', 2, 'Loss:', 105.13821250200272, 'Test Loss:', 2.0207560062408447, 'MRR:', 0.0)
('Epoch:', 3, 'Loss:', 96.8861652314663, 'Test Loss:', 1.8185327053070068, 'MRR:', 0.00667)
('Epoch:', 4, 'Loss:', 89.28570778667927, 'Test Loss:', 1.6428419351577759, 'MRR:', 0.06)
('Epoch:', 5, 'Loss:', 82.289546713233, 'Test Loss:', 1.4797453880310059, 'MRR:', 0.074)
('Epoch:', 6, 'Loss:', 76.45523698627949, 'Test Loss:', 1.336097002029419, 'MRR:', 0.10286)
('Epoch:', 7, 'Loss:', 71.0555507093668, 'Test Loss:', 1.2085713148117065, 'MRR:', 0.09667)
('Epoch:', 8, 'Loss:', 65.79279552400112, 'Test Loss:', 1.1283133029937744, 'MRR:', 0.122)
('Epoch:', 9, 'Loss:', 61.6914467215538, 'Test Loss:', 1.0555990934371948, 'MRR:', 0.13286)
('Epoch:', 10, 'Loss:', 57.9002415984869, 'Test Loss:', 0.9768383502960205, 'MRR:', 0.12952)
('Epoch:', 11, 'Loss:', 54.238177113235, 'Test Loss:', 0.9210295081138611, 'MRR:', 0.117)
('Epoch:', 12, 'Loss:', 51.53789134323597, 'Test Loss:', 0.8832364082336426, 'MRR:', 0.11722)
('Epoch:', 13, 'Loss:', 48.59779189527035, 'Test Loss:', 0.8497580885887146, 'MRR:', 0.11)

CRIM evaluation:
MRR: 0.14087
MAP: 0.06261
P@1: 0.11007
P@5: 0.05931
P@10: 0.05632

-------------------------------------------------------------------------------------------------------
('Epochs: ', 9, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 20, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_plus_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None', 'Learning rate: ', 0.001)
('Epoch:', 1, 'Loss:', 218.10481524467468, 'Test Loss:', 1.2180428504943848, 'MRR:', 0.0)
('Epoch:', 2, 'Loss:', 123.68451875448227, 'Test Loss:', 2.3275296688079834, 'MRR:', 0.0)
('Epoch:', 3, 'Loss:', 98.06867711246014, 'Test Loss:', 1.8287553787231445, 'MRR:', 0.06)
('Epoch:', 4, 'Loss:', 82.21024569869041, 'Test Loss:', 1.4681575298309326, 'MRR:', 0.11667)
('Epoch:', 5, 'Loss:', 71.17102481424809, 'Test Loss:', 1.2787588834762573, 'MRR:', 0.099)
('Epoch:', 6, 'Loss:', 64.6261182948947, 'Test Loss:', 1.1748896837234497, 'MRR:', 0.11852)
('Epoch:', 7, 'Loss:', 59.59312414377928, 'Test Loss:', 1.0992566347122192, 'MRR:', 0.09789)
('Epoch:', 8, 'Loss:', 56.39183371514082, 'Test Loss:', 1.0781883001327515, 'MRR:', 0.12233)
('Epoch:', 9, 'Loss:', 52.29561498016119, 'Test Loss:', 1.0466694831848145, 'MRR:', 0.10833)

CRIM evaluation:
MRR: 0.16512
MAP: 0.08061
P@1: 0.12408
P@5: 0.0774
P@10: 0.07424

* After embeddings tuning
('Epochs: ', 5, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 20, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_plus_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None', 'Learning rate: ', 0.0005)

('Epoch:', 1, 'Loss:', 29.043478064239025, 'Test Loss:', 0.7365559339523315, 'MRR:', 0.22952)
('Epoch:', 2, 'Loss:', 11.500816403888166, 'Test Loss:', 0.5949599742889404, 'MRR:', 0.22786)
('Epoch:', 3, 'Loss:', 5.905871827970259, 'Test Loss:', 0.5205121040344238, 'MRR:', 0.22167)
('Epoch:', 4, 'Loss:', 3.320489032255864, 'Test Loss:', 0.4699397385120392, 'MRR:', 0.22119)
('Epoch:', 5, 'Loss:', 2.18644647533074, 'Test Loss:', 0.4323105216026306, 'MRR:', 0.23333)

CRIM evaluation:
MRR: 0.27001
MAP: 0.12721
P@1: 0.23215
P@5: 0.119
P@10: 0.11395

* Changed 2nd phase parameters
('Epochs: ', 7, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 20, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_plus_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None', 'Learning rate: ', 0.001)
('Epoch:', 1, 'Loss:', 22.901207104325294, 'Test Loss:', 0.6052702069282532, 'MRR:', 0.22352)
('Epoch:', 2, 'Loss:', 5.384110522107221, 'Test Loss:', 0.4855782985687256, 'MRR:', 0.22119)
('Epoch:', 3, 'Loss:', 2.196897844092746, 'Test Loss:', 0.4189225435256958, 'MRR:', 0.20286)
('Epoch:', 4, 'Loss:', 1.1273050970230543, 'Test Loss:', 0.37064453959465027, 'MRR:', 0.19952)
('Epoch:', 5, 'Loss:', 0.5745529867244841, 'Test Loss:', 0.3379283845424652, 'MRR:', 0.18952)
('Epoch:', 6, 'Loss:', 0.3683943189241745, 'Test Loss:', 0.3203800916671753, 'MRR:', 0.19952)
('Epoch:', 7, 'Loss:', 0.21765478086467738, 'Test Loss:', 0.3079485595226288, 'MRR:', 0.19952)
CRIM evaluation:
MRR: 0.24346
MAP: 0.11254
P@1: 0.1988
P@5: 0.10505
P@10: 0.10311


---------------------------------------------------------------------------------------------------
* In this section, I normalise the dot product of the hyponym projection and hypernym embedding.
* Performance improves significantly, even with 1 cluster:

('Epochs: ', 13, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 1, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_plus_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None', 'Learning rate: ', 0.001)
('Epoch:', 1, 'Loss:', 216.5801951289177, 'Test Loss:', 1.01119065284729, 'MRR:', 0.0)
('Epoch:', 2, 'Loss:', 166.6781222820282, 'Test Loss:', 1.3387771844863892, 'MRR:', 0.0)
('Epoch:', 3, 'Loss:', 134.3004250228405, 'Test Loss:', 1.669001579284668, 'MRR:', 0.0)
('Epoch:', 4, 'Loss:', 115.40679916739464, 'Test Loss:', 1.9242345094680786, 'MRR:', 0.02)
('Epoch:', 5, 'Loss:', 105.27983355522156, 'Test Loss:', 1.9711346626281738, 'MRR:', 0.04667)
('Epoch:', 6, 'Loss:', 99.74403658509254, 'Test Loss:', 1.8888331651687622, 'MRR:', 0.10082)
('Epoch:', 7, 'Loss:', 96.84595969319344, 'Test Loss:', 1.785994291305542, 'MRR:', 0.08582)
('Epoch:', 8, 'Loss:', 94.51537863910198, 'Test Loss:', 1.7007386684417725, 'MRR:', 0.09222)
('Epoch:', 9, 'Loss:', 92.27100689709187, 'Test Loss:', 1.6312320232391357, 'MRR:', 0.09552)
('Epoch:', 10, 'Loss:', 90.04288397729397, 'Test Loss:', 1.5578685998916626, 'MRR:', 0.10834)
('Epoch:', 11, 'Loss:', 88.05075532197952, 'Test Loss:', 1.4859631061553955, 'MRR:', 0.11169)
('Epoch:', 12, 'Loss:', 85.85826447606087, 'Test Loss:', 1.4254134893417358, 'MRR:', 0.108)
('Epoch:', 13, 'Loss:', 84.02545890212059, 'Test Loss:', 1.3709036111831665, 'MRR:', 0.10817)

CRIM evaluation:
MRR: 0.17462
MAP: 0.08124
P@1: 0.13809
P@5: 0.07732
P@10: 0.07283

* We tune embeddings for only 2 epochs
('Epochs: ', 2, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 1, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_plus_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None', 'Learning rate: ', 0.0005)

('Epoch:', 1, 'Loss:', 65.86931462585926, 'Test Loss:', 1.1016316413879395, 'MRR:', 0.27186)
('Epoch:', 2, 'Loss:', 45.293831795454025, 'Test Loss:', 0.8902890682220459, 'MRR:', 0.28783)

CRIM evaluation:
MRR: 0.29305
MAP: 0.14925
P@1: 0.24883
P@5: 0.14403
P@10: 0.13475

* Tuning for a futher two epochs improves the validation MRR, but test metric deterioriate 

('Epochs: ', 2, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 1, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_plus_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None', 'Learning rate: ', 0.0005)
('Epoch:', 1, 'Loss:', 32.49462116137147, 'Test Loss:', 0.7281622290611267, 'MRR:', 0.284)
('Epoch:', 2, 'Loss:', 23.98638728633523, 'Test Loss:', 0.6015497446060181, 'MRR:', 0.30733)

CRIM evaluation:
MRR: 0.25017
MAP: 0.13469
P@1: 0.1968
P@5: 0.13179
P@10: 0.12452

* Performance decreases the more epochs we add despite the fact that both training loss and test loss keep decreasing.  Validation MRR is a more important training metric to keep track of.

('Epochs: ', 2, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 1, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_plus_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None', 'Learning rate: ', 0.0005)
('Epoch:', 1, 'Loss:', 18.0652209892869, 'Test Loss:', 0.5042541027069092, 'MRR:', 0.274)
('Epoch:', 2, 'Loss:', 13.913509592413902, 'Test Loss:', 0.4291400909423828, 'MRR:', 0.27622)

MRR: 0.22766
MAP: 0.12481
P@1: 0.17078
P@5: 0.12266
P@10: 0.11679

* Fine-tuning for just 1 epoch:

('Epochs: ', 1, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 1, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_plus_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None', 'Learning rate: ', 0.0005)
('Epoch:', 1, 'Loss:', 65.84822230041027, 'Test Loss:', 1.1002403497695923, 'MRR:', 0.25405)

MRR: 0.29843
MAP: 0.14871
P@1: 0.25751
P@5: 0.14107
P@10: 0.13354

* Now we keep embeddings frozen and we traing the phi layer for a few more epochs

('Epochs: ', 5, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 1, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_plus_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None', 'Learning rate: ', 0.001)
('Epoch:', 1, 'Loss:', 57.45014962553978, 'Test Loss:', 0.9213581681251526, 'MRR:', 0.28186)
('Epoch:', 2, 'Loss:', 54.33015509694815, 'Test Loss:', 0.8853888511657715, 'MRR:', 0.28352)
('Epoch:', 3, 'Loss:', 51.75078434497118, 'Test Loss:', 0.8243922591209412, 'MRR:', 0.2855)
('Epoch:', 4, 'Loss:', 49.316737711429596, 'Test Loss:', 0.7884426712989807, 'MRR:', 0.28686)
('Epoch:', 5, 'Loss:', 47.065613731741905, 'Test Loss:', 0.7453060746192932, 'MRR:', 0.30852)

MRR: 0.30175
MAP: 0.15109
P@1: 0.25751
P@5: 0.14383
P@10: 0.13604

* And a further 5 epochs
('Epochs: ', 5, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 1, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_plus_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None', 'Learning rate: ', 0.001)
('Epoch:', 1, 'Loss:', 44.93913523107767, 'Test Loss:', 0.7189047932624817, 'MRR:', 0.28852)
('Epoch:', 2, 'Loss:', 42.82094585895538, 'Test Loss:', 0.7057857513427734, 'MRR:', 0.28786)
('Epoch:', 3, 'Loss:', 41.27894340455532, 'Test Loss:', 0.6507096886634827, 'MRR:', 0.27452)
('Epoch:', 4, 'Loss:', 39.19503653794527, 'Test Loss:', 0.613694965839386, 'MRR:', 0.27952)
('Epoch:', 5, 'Loss:', 37.61957763880491, 'Test Loss:', 0.623578667640686, 'MRR:', 0.27952)

CRIM evaluation:
MRR: 0.30639
MAP: 0.15227
P@1: 0.26217
P@5: 0.14563
P@10: 0.13702

----------------------------------------------------------------------------------------------------
* In this experiment, we train a 10 projection model in three phases.

('Epochs: ', 10, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 10, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None', 'Learning rate: ', 0.001)
('Epoch:', 1, 'Loss:', 223.62342816591263, 'Test Loss:', 0.9674524664878845, 'MRR:', 0.0)
('Epoch:', 2, 'Loss:', 166.71091252565384, 'Test Loss:', 1.291346788406372, 'MRR:', 0.005)
('Epoch:', 3, 'Loss:', 130.67713418602943, 'Test Loss:', 1.6250698566436768, 'MRR:', 0.0)
('Epoch:', 4, 'Loss:', 110.79544314742088, 'Test Loss:', 1.903112769126892, 'MRR:', 0.03167)
('Epoch:', 5, 'Loss:', 101.37133565545082, 'Test Loss:', 2.0412981510162354, 'MRR:', 0.06067)
('Epoch:', 6, 'Loss:', 97.36877712607384, 'Test Loss:', 1.931593656539917, 'MRR:', 0.06357)
('Epoch:', 7, 'Loss:', 94.5494154393673, 'Test Loss:', 1.8447002172470093, 'MRR:', 0.074)
('Epoch:', 8, 'Loss:', 91.85252003371716, 'Test Loss:', 1.7642910480499268, 'MRR:', 0.103)
('Epoch:', 9, 'Loss:', 89.23252408206463, 'Test Loss:', 1.6848324537277222, 'MRR:', 0.13833)
('Epoch:', 10, 'Loss:', 86.65304459631443, 'Test Loss:', 1.6097363233566284, 'MRR:', 0.144)

CRIM evaluation:
MRR: 0.17335
MAP: 0.07732
P@1: 0.14076
P@5: 0.07268
P@10: 0.06865

* Second phase:
('Epochs: ', 2, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 10, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None', 'Learning rate: ', 0.0005)
('Epoch:', 1, 'Loss:', 71.49462106823921, 'Test Loss:', 1.3946821689605713, 'MRR:', 0.19686)
('Epoch:', 2, 'Loss:', 57.22928865253925, 'Test Loss:', 1.2148321866989136, 'MRR:', 0.22533)

CRIM evaluation:
MRR: 0.28018
MAP: 0.14123
P@1: 0.23749
P@5: 0.1342
P@10: 0.12747


* In third phase, I attempted to train embeddings and phi together
('Epochs: ', 2, 'Batch size: ', 32, 'm: ', 10, 'pki_k: ', 10, 'train_embeddings: ', False, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'Dropout rate: ', 0.3, 'Kernel constraint: ', 'None', 'Learning rate: ', 0.001)
('Epoch:', 1, 'Loss:', 51.64020486921072, 'Test Loss:', 0.8209819197654724, 'MRR:', 0.25533)
('Epoch:', 2, 'Loss:', 37.853846058249474, 'Test Loss:', 0.6199454665184021, 'MRR:', 0.28622)

CRIM evaluation:
MRR: 0.24635
MAP: 0.12464
P@1: 0.19813
P@5: 0.11927
P@10: 0.1132

Results were actually worse

# Hard Clustering

## Common Routines

In [None]:
# light-weight data class containing only 
class ClusterHybrid:
            
    def __init__(self, cluster, cluster_predictions,
                 orig_data,                 
                 phi_init, sigmoid_kernel_constraint,
                 embeddings_layer, dropout_rate, learning_rate
                 ):
        
        # create model
        self.model = self._init_model(phi_init=phi_init,                                       
                                      embeddings_layer = embedding_layer, 
                                      embeddings_dim = orig_data.embeddings_dim,
                                      sigmoid_kernel_constraint = sigmoid_kernel_constraint,
                                      dropout_rate = dropout_rate, learning_rate = learning_rate)
        
        self.cluster_id = cluster
        self.tokenizer = orig_data.tokenizer
        
        self.train_query = map(lambda x: orig_data.train_query[x], np.where(cluster_predictions == cluster)[0])
        self.train_hyper = map(lambda x: orig_data.train_hyper[x], np.where(cluster_predictions == cluster)[0])
                                
        self.valid_query = orig_data.valid_query
        self.valid_hyper = orig_data.valid_hyper
        
        self.train_query_seq, self.train_hyper_seq, self.valid_query_seq, self.valid_hyper_seq =\
        map(lambda x: orig_data.tokenizer.texts_to_sequences(x),\
            [self.train_query, self.train_hyper, self.valid_query, self.valid_hyper])
        
        self.train_query_seq, self.train_hyper_seq, self.valid_query_seq, self.valid_hyper_seq =\
        map(lambda x: np.asarray(x, dtype='int32'),\
            [self.train_query_seq, self.train_hyper_seq, self.valid_query_seq, self.valid_hyper_seq])
        

        self.loss = 0.
        self.test_loss = 0
    
    def _init_model(self, phi_init, sigmoid_kernel_constraint, 
                    embeddings_layer, embeddings_dim,
                    dropout_rate, learning_rate):
                                                   
        return get_CRIM_model(phi_k = phi_init, 
                              train_embeddings = false,
                              embeddings_dim = embeddings_dim,
                              embeddings_layer = embeddings_layer,
                              phi_init = random_identity,
                              sigmoid_kernel_constraint = sigmoid_kernel_constraint,
                              dropout_rate = dropout_rate,
                              learning_rate = learning_rate)
        
    
    def update_loss(self, new_loss):
        self.loss += new_loss
        
    def update_test_loss(self, new_loss):
        self.test_loss += new_loss   

## Hard Clustering Specific

In [None]:
from sklearn.cluster import KMeans

cluster_k = 5

In [None]:
# calculate offsets 
training_query_vector = np.zeros((len(data.train_query), data.embeddings_dim))
training_hyper_vector = np.zeros((len(data.train_hyper), data.embeddings_dim))
for i in range(len(data.train_query)):
    query_embedding_lookup = data.tokenizer.word_index[data.train_query[i]]
    hyper_embedding_lookup = data.tokenizer.word_index[data.train_hyper[i]]
    
    training_query_vector[i] = data.embedding_matrix[query_embedding_lookup]
    training_hyper_vector[i] = data.embedding_matrix[hyper_embedding_lookup]
    

train_offsets = training_hyper_vector - training_query_vector

In [None]:
km = KMeans(n_clusters = cluster_k, n_jobs=-1, random_state=42)
#km.fit_predict(train_offsets)
km.fit_predict(train_offsets)

In [None]:
from collections import Counter
Counter(km.predict(train_offsets))

In [None]:
#np.where(km.predict(train_offsets) == 0)
map(lambda x: (data.train_query[x], data.train_hyper[x]), np.where(km.predict(train_offsets) == 4)[0])

In [None]:
# implement training algorithm modification to deal with clusters
def train_on_clusters(models,      # the clustered models on which parameters will be learnt
                      epochs,      # number of epochs to run          
                      batch_size,  # size of mini-batch 
                      m,           # number of negative samples
                      #data,        # data required for training                              
                      neg_strategy
                     ):                                   
    
    # train algorithm
    for epoch in range(epochs):
        # train each model on their corresponding data
        for model in models:            
            print ("Doing model: ", model.cluster_id, "; epoch: ", epoch)
            # reset loss
            model.loss = 0.
            model.test_loss = 0.
            
            samples = np.arange(len(model.train_query_seq))
            validation_samples = np.arange(len(model.valid_query_seq))
                                        
            np.random.shuffle(samples)                    
                        
            for b in range(0, len(samples), batch_size):    
                if ((b + 1) % 500) == 0:
                    print ('Model: ', model.cluster_id, '; processed ', idx+1, 'samples.')
                                    
                batch_X_term, batch_X_hyper, batch_y_label =\
                    extend_batch_with_negatives(model.train_query_seq[b:b + batch_size], 
                                                model.train_hyper_seq[b:b + batch_size],
                                                neg_strategy,
                                                model.tokenizer, m
                                               )  

                #print model.cluster_id, len(batch_X_term)
                model.update_loss(model.model.train_on_batch([batch_X_term, batch_X_hyper], batch_y_label)[0])                                
                
        # validate on entire validation set after training each model
            
        test_query, test_hyper, test_y_label =\
            extend_batch_with_negatives(model.valid_query_seq, 
                                        model.valid_hyper_seq,
                                        neg_strategy,
                                        model.tokenizer, m
                                       )  
        #batch_label = [1.] * batch_query.shape[0]
        for q, h, l in zip(test_query, test_hyper, test_y_label):                                    
            test_losses = list(map(lambda c: c.model.test_on_batch([q, h], [l])[0], models))
            best_cluster = np.argmin(test_losses)
            models[best_cluster].update_test_loss(test_losses[best_cluster])
                            
        print('Epoch:', epoch+1,\
              'Loss:', np.mean([model.loss for model in models]),\
              'Test Loss:', np.mean([model.test_loss for model in models]))    

In [None]:
# initialise clusters
embeddings_layer = get_embeddings_model(dim=data.embeddings_dim, embedding_matrix=data.embedding_matrix)

cluster_list = []
cluster_predictions = km.predict(train_offsets)



for c in range(cluster_k):
    cluster_list.append(ClusterHybrid(cluster=c, 
                                      cluster_predictions = cluster_predictions, 
                                      orig_data = data, 
                                      phi_init = random_identity, 
                                      sigmoid_kernel_constraint = None,
                                      embeddings_layer = embeddings_layer,
                                      dropout_rate = 0.3, 
                                      learning_rate = 0.001 ))

In [None]:
# negative sampling options
neg_sampling_options = {'synonym':data.synonyms,                                                 
                        'random':data.random_words
                       }

# phi random init options
phi_init_options = {'random_plus_identity': random_plus_identity,
                    'random_identity': random_identity, 
                    'random_normal': random_normal}

kernel_constraints = {'None': None, 'ForceToOne': ForceToOne()}

# positive batch size
batch_size = 32

# implement mini-batch stochastic training
epochs = 10


# number of negative samples
m = 1
# number of projections
phi_k = 1
# train (True) or freeze
train_embeddings = False
# negative sample strategy
negative_option = 'random'
# initialise phi strategy
phi_init_option = 'random_identity'
# constrain LR parameter
kernel_constraint_option = 'None'
# dropout rate
dropout_rate = 0.3
learning_rate = 0.001


print ("Start training")
train_on_clusters(cluster_list, epochs, batch_size, m, neg_sampling_options[negative_option])

In [None]:
print ("Generating predictions...")
cluster_predictions = predict_cluster_hypernyms(data, cluster_list)

print ("CRIM evaluation:")
score_names, all_scores = get_evaluation_scores((data.test_query, data.test_hyper), cluster_predictions)
for k in range(len(score_names)):
    print (score_names[k]+': '+str(round(sum([score_list[k] for score_list in all_scores]) / len(all_scores), 5)))


In [None]:
#cluster_list[0].model.get_layer(name='Phi0').get_weights()[0]

for idx, m in enumerate(clusters):
    print idx, m.loss, m.test_loss, m.model.get_layer(name='Prediction').get_weights()[0]

i = data.tokenizer.word_index['tussle']
j = data.tokenizer.word_index['student']
for c in clusters:
    print c.model.predict([[i], [j]])[0]

## Hard Clustering Results

* batch_size = 32
* epochs = 10
* m = 1
* phi_k = 1
* train_embeddings = False
* negative_option = 'random'
* phi_init_option = 'random_identity'
* kernel_constraint_option = 'None'
* dropout_rate = 0.3
* learning_rate = 0.001

* Performed hard clustering using k = 5.  Vector offset used as basis for clustering.

Start training
('Doing model: ', 0, '; epoch: ', 0)
('Doing model: ', 1, '; epoch: ', 0)
('Doing model: ', 2, '; epoch: ', 0)
('Doing model: ', 3, '; epoch: ', 0)
('Doing model: ', 4, '; epoch: ', 0)
('Epoch:', 1, 'Loss:', 51.20275011062622, 'Test Loss:', 54.16907963752747)
('Doing model: ', 0, '; epoch: ', 1)
('Doing model: ', 1, '; epoch: ', 1)
('Doing model: ', 2, '; epoch: ', 1)
('Doing model: ', 3, '; epoch: ', 1)
('Doing model: ', 4, '; epoch: ', 1)
('Epoch:', 2, 'Loss:', 49.9156032204628, 'Test Loss:', 51.34425748586655)
('Doing model: ', 0, '; epoch: ', 2)
('Doing model: ', 1, '; epoch: ', 2)
('Doing model: ', 2, '; epoch: ', 2)
('Doing model: ', 3, '; epoch: ', 2)
('Doing model: ', 4, '; epoch: ', 2)
('Epoch:', 3, 'Loss:', 47.629679238796236, 'Test Loss:', 47.31288024187088)
('Doing model: ', 0, '; epoch: ', 3)
('Doing model: ', 1, '; epoch: ', 3)
('Doing model: ', 2, '; epoch: ', 3)
('Doing model: ', 3, '; epoch: ', 3)
('Doing model: ', 4, '; epoch: ', 3)
('Epoch:', 4, 'Loss:', 44.66189357638359, 'Test Loss:', 42.583502805233)
('Doing model: ', 0, '; epoch: ', 4)
('Doing model: ', 1, '; epoch: ', 4)
('Doing model: ', 2, '; epoch: ', 4)
('Doing model: ', 3, '; epoch: ', 4)
('Doing model: ', 4, '; epoch: ', 4)
('Epoch:', 5, 'Loss:', 41.36941378712654, 'Test Loss:', 37.747225880622864)
('Doing model: ', 0, '; epoch: ', 5)
('Doing model: ', 1, '; epoch: ', 5)
('Doing model: ', 2, '; epoch: ', 5)
('Doing model: ', 3, '; epoch: ', 5)
('Doing model: ', 4, '; epoch: ', 5)
('Epoch:', 6, 'Loss:', 38.09736560583114, 'Test Loss:', 33.176356403529645)
('Doing model: ', 0, '; epoch: ', 6)
('Doing model: ', 1, '; epoch: ', 6)
('Doing model: ', 2, '; epoch: ', 6)
('Doing model: ', 3, '; epoch: ', 6)
('Doing model: ', 4, '; epoch: ', 6)
('Epoch:', 7, 'Loss:', 34.89849599599838, 'Test Loss:', 29.117837768793105)
('Doing model: ', 0, '; epoch: ', 7)
('Doing model: ', 1, '; epoch: ', 7)
('Doing model: ', 2, '; epoch: ', 7)
('Doing model: ', 3, '; epoch: ', 7)
('Doing model: ', 4, '; epoch: ', 7)
('Epoch:', 8, 'Loss:', 32.04033596813679, 'Test Loss:', 25.581615015119315)
('Doing model: ', 0, '; epoch: ', 8)
('Doing model: ', 1, '; epoch: ', 8)
('Doing model: ', 2, '; epoch: ', 8)
('Doing model: ', 3, '; epoch: ', 8)
('Doing model: ', 4, '; epoch: ', 8)
('Epoch:', 9, 'Loss:', 29.56286123096943, 'Test Loss:', 22.568919814378024)
('Doing model: ', 0, '; epoch: ', 9)
('Doing model: ', 1, '; epoch: ', 9)
('Doing model: ', 2, '; epoch: ', 9)
('Doing model: ', 3, '; epoch: ', 9)
('Doing model: ', 4, '; epoch: ', 9)
('Epoch:', 10, 'Loss:', 27.37933742403984, 'Test Loss:', 20.065976665169)


CRIM evaluation:
MRR: 0.03292
P@1: 0.02402
P@5: 0.01587
P@10: 0.01505

---------------------------------------------------------------------------------------------


* We could try to train a simple model for a few epochs.  
* Once that is done we can refine the results by creating clusters, each initialised with the phi weights from the previous attempt

# Soft Clustering

In [None]:
class YamaneCluster(ClusterHybrid):
    def __init__(self, phi_init, 
                 embeddings_layer, embeddings_dim,
                 sigmoid_kernel_constraint, 
                 dropout_rate, learning_rate):        
        
        # create Keras model
        self.model = self._init_model(phi_init = phi_init, 
                                      embeddings_layer = embeddings_layer,
                                      embeddings_dim = embeddings_dim,
                                      sigmoid_kernel_constraint = sigmoid_kernel_constraint,
                                      dropout_rate  = dropout_rate,
                                      learning_rate = learning_rate)
        # initialise variables     
        self.epoch_count = 0
        self.loss = 0.
        self.test_loss = 0.
        self.mrr = []
    
    def increment_epoch(self):
        self.epoch_count += 1

In [None]:
def yamane_train(
    epochs,      # number of epochs to run
    m,           # number of negative samples
    data,        # class instance containing all the data required for training/testing        
    embedding_layer,     # shared embeddings layer
    threshold    = 0.15,     # threshold; similarity below this score will trigger new cluster
    negative_option = 'random', # pass dictionary of random terms 
    phi_init_option = None,     # phi dense layer initialisation strategy
    sigmoid_constraint_option = 'None',
    dropout_rate = 0.,
    learning_rate = 0.001,
    cluster_max = 5
): 
    
    phi_init_options = {'random_identity': random_identity, 
                        'random_normal': random_normal, 
                        'random_plus_identity': random_plus_identity}
    neg_sampling_options = {'synonym':data.synonyms, 'random':data.random_words}
    sigmoid_constraint_options = {'ForceToOne': ForceToOne(), 'None': None}
    
    sigmoid_kernel_constraint = sigmoid_constraint_options[sigmoid_constraint_option]
    
    neg_strategy = neg_sampling_options[negative_option]
            
    # create sequences
    # we have two sets of inputs: one for training query and hypernym terms;
    #                             another for the validation query/hyper terms;
    term_train_seq = data.tokenizer.texts_to_sequences(data.train_query)
    hyper_train_seq = data.tokenizer.texts_to_sequences(data.train_hyper)

    #term_test_seq = data.tokenizer.texts_to_sequences(data.valid_query)
    #hyper_test_seq = data.tokenizer.texts_to_sequences(data.valid_hyper)
    
    # convert all to arrays
    #term_train_seq, hyper_train_seq, term_test_seq, hyper_test_seq =\
    #[np.asarray(x, dtype='int32') for x in [term_train_seq, hyper_train_seq, term_test_seq, hyper_test_seq]]
    
    term_train_seq, hyper_train_seq = [np.asarray(x, dtype='int32') for x in [term_train_seq, hyper_train_seq]]
            
    # this list stores which cluster each training sequence pertains to
    sample_clusters = np.zeros(len(term_train_seq), dtype='int32')
    
    print ("m: ", m, "lambda: ", threshold, "max epoch per cluster: ", epochs, 
           "Negative sampling: ", negative_option, "Phi Init: ", phi_init_option,
           "sigmoid_kernel_constraint: ", sigmoid_constraint_option, 
           "dropout: ", dropout_rate, "learning_rate: ", learning_rate, 
           "cluster_max: ", cluster_max          
          )
    
    
    print ("Sample clusters size: ", len(sample_clusters))
    # list containing 1 model per cluster
    clusters = []    
        
    clusters.append(YamaneCluster(phi_init = phi_init_options[phi_init_option],
                                  embeddings_layer = embedding_layer,
                                  embeddings_dim = data.embeddings_dim,
                                  sigmoid_kernel_constraint = sigmoid_kernel_constraint,
                                  dropout_rate = dropout_rate,
                                  learning_rate = learning_rate))
                    
    # get training set indices
    indices = np.arange(len(term_train_seq))  
    
    # get test set indices
    #test_indices = np.arange(len(term_test_seq))
            
    # initialise each training sample to cluster 0
    sample_clusters[indices] = 0        
    
    # seed random generator
    np.random.seed(42)
    
    # indicator of "current" sample cluster index
    z_i = 0
                    
    while np.min([c.epoch_count for c in clusters]) < epochs:
        # reset loss for each cluster                        
        for c in clusters:
            if c.epoch_count < epochs:                
                c.loss = 0.
            c.test_loss = 0.                
        
        # shuffle indices every epoch
        np.random.shuffle(indices)
        
        # train algorithm by stochastic gradient descent, one sample at a time
        # learn 1 matrix of first epoch only
        for idx, i in enumerate(indices):                        
            if (idx + 1) % 1000 == 0:
                print ("Processed ", idx+1, "samples...")
            
            # calculate similarity on all clusters
            sim = list(map(lambda x: x.model.predict([term_train_seq[i], hyper_train_seq[i]]), clusters))            
            max_sim = np.argmax(sim)
            #print "Term:", tokenizer.index_word[term_train_seq[i][0]], 'Hyper:', tokenizer.index_word[hyper_train_seq[i][0]], "Max Similarity cluster:", max_sim, "(sim = %0.8f)" % (sim[max_sim])
            # limit cluster creation to a max of 25.
            if ((clusters[0].epoch_count > 0) and (sim[max_sim] < threshold) and (len(clusters) < cluster_max)): 
                # add new cluster to list of clusters
                print data.tokenizer.index_word[term_train_seq[i][0]], data.tokenizer.index_word[hyper_train_seq[i][0]]
                print max_sim, sim[max_sim]
                clusters.append(YamaneCluster(phi_init = phi_init_options[phi_init_option],
                                               embeddings_layer = embedding_layer,
                                               embeddings_dim = data.embeddings_dim,
                                               sigmoid_kernel_constraint = sigmoid_kernel_constraint,
                                               dropout_rate = dropout_rate,
                                               learning_rate = learning_rate))
                
                # assign current cluster index to latest model
                z_i = len(clusters) - 1
                sample_clusters[i] = z_i
            else:            
                z_i = max_sim
                sample_clusters[i] = z_i                
                        
            # if current cluster reached/exceeded epoch count, skip current sample (i.e don't update cluster)
            if clusters[z_i].epoch_count < epochs:                                            
                # extend samples in cluster with negative samples
                batch_X_term, batch_X_hyper, batch_y_label =\
                    extend_batch_with_negatives(term_train_seq[i], 
                                                hyper_train_seq[i],
                                                neg_strategy,
                                                data.tokenizer, m
                                               )  

                # update parameters of cluster 
                clusters[z_i].update_loss(
                    clusters[z_i].model.train_on_batch([batch_X_term, batch_X_hyper], batch_y_label)[0]
                )
        
            ####################### END OF EPOCH #######################                  
        
        # instead of test loss, measure MRR as a more indicative validation metric
        print ("Running evaluation on trial data set...")
        predictions = predict_cluster_hypernyms(data.valid_query, data.tokenizer, clusters)
        _, all_scores = get_evaluation_scores((data.valid_query, data.valid_hyper), predictions)
        mrr = round(sum([score_list[0] for score_list in all_scores]) / len(all_scores), 5)
        clusters[0].mrr.append(mrr)
        
        # increase epoch count for clusters
        for cluster in clusters:            
            cluster.epoch_count += 1
                
        print('Epoch:', max([c.epoch_count for c in clusters]), 'Cluster #:', len(clusters) ,
              'Loss:', np.mean([c.loss for c in clusters]),
              'Test MRR:', mrr)
    return clusters, sample_clusters

In [None]:
import datetime

# initialise embedding later which will be shared among all clusters
embedding_layer = get_embeddings_model(dim=data.embeddings_dim, embedding_matrix=data.embedding_matrix)
epochs = 10
m = 10

print ("Training started at: %s" %  (datetime.datetime.now()))
clusters, sample_clusters =\
    yamane_train(epochs, m, 
                 data,
                 embedding_layer,
                 threshold = 0.13,
                 negative_option = 'random',
                 phi_init_option = 'random_plus_identity',
                 sigmoid_constraint_option = 'ForceToOne',
                 dropout_rate = 0.3,
                 learning_rate = 0.001,
                 cluster_max = 30
                )

print ("Training concluded at: %s" % (datetime.datetime.now()))



In [None]:
for c in clusters:
    print c.loss, c.test_loss, c.epoch_count, c.mrr
    
    
map(lambda x: (data.train_query[x], data.train_hyper[x]), np.where(sample_clusters == 20)[0])
#c15 = Counter(map(lambda x: data.train_hyper[x], np.where(sample_clusters == 18)[0]))
#sorted(c15.items(), key = lambda (k,v): v, reverse=True)
#Counter(sample_clusters)

### Evaluate without attempting to cluster test terms

In [None]:
print ("Generating predictions...")
yamane_predictions = predict_cluster_hypernyms(data.test_query, data.tokenizer, clusters)

print ("CRIM evaluation:")
score_names, all_scores = get_evaluation_scores((data.test_query, data.test_hyper), yamane_predictions)
for k in range(len(score_names)):
    print (score_names[k]+': '+str(round(sum([score_list[k] for score_list in all_scores]) / len(all_scores), 5)))



In [153]:
predict_cluster_hypernyms(['campo_san_polo'], data.tokenizer, clusters)

#filter(lambda x: x.startswith('campo'), data.tokenizer.word_index.keys())


MemoryError: 

### Train KNN classifier on clustering data jointly learnt by model

In [75]:
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=5, weights='distance')

# prepare knn dataset based on learnt clusters
train_seq = np.array(data.tokenizer.texts_to_sequences(data.train_query))

X_knn = {}
for idx, c in enumerate(clusters):
    cluster_ids = np.where(sample_clusters == idx)
    # we can reduce duplicate terms to unique terms    
    uniq_terms = np.unique(train_seq[cluster_ids])
    #print (uniq_terms)    
    X_knn[idx] = data.embedding_matrix[uniq_terms]  

X_features = X_knn[0]
y = np.zeros(X_knn[0].shape[0], dtype='int16')

for k in range(1,len(clusters)):
    X_features = np.vstack((X_features, X_knn[k]))
    y = np.hstack((y, np.array([k] * X_knn[k].shape[0])))
    
neigh.fit(X_features, y) 

KNeighborsClassifier(algorithm='auto', leaf_size=30, metric='minkowski',
           metric_params=None, n_jobs=1, n_neighbors=5, p=2,
           weights='distance')

In [76]:
print ("Generating predictions...")
yamane_predictions = predict_cluster_hypernyms(data.test_query, data.tokenizer, clusters, neigh)

print ("CRIM evaluation:")
score_names, all_scores = get_evaluation_scores((data.test_query, data.test_hyper), yamane_predictions)
for k in range(len(score_names)):
    print (score_names[k]+': '+str(round(sum([score_list[k] for score_list in all_scores]) / len(all_scores), 5)))


Generating predictions...
('Done', 100)
('Done', 200)
('Done', 300)
('Done', 400)
('Done', 500)
('Done', 600)
('Done', 700)
('Done', 800)
('Done', 900)
('Done', 1000)
('Done', 1100)
('Done', 1200)
('Done', 1300)
('Done', 1400)
CRIM evaluation:
MRR: 0.27532
MAP: 0.13254
P@1: 0.23149
P@5: 0.12402
P@10: 0.11837


In [77]:
yamane_predictions
#crim_get_top_hypernyms('turonian', None, clusters[15].model, data, 15)

{u'scouter': [u'person',
  u'movie',
  u'actor',
  u'tv_show',
  u'television_show',
  u'film',
  u'television_series',
  u'writer',
  u'television_program',
  u'tv_program',
  u'musician',
  u'motion_picture',
  u'work_of_art',
  u'tv_series',
  u'feature_film'],
 u'gatekeeper': [u'person',
  u'software',
  u'computer_software',
  u'software_program',
  u'software_package',
  u'software_application',
  u'computer_system',
  u'communication_medium',
  u'company',
  u'enterprise',
  u'applications_software',
  u'computer',
  u'computer_program',
  u'code',
  u'leader'],
 u'mackerel': [u'animal',
  u'plant',
  u'fish',
  u'plant_part',
  u'shellfish',
  u'brine_shrimp',
  u'crustacea',
  u'animal_tissue',
  u'edible',
  u'freshwater_fish',
  u'marine_animal',
  u'waterbody',
  u'algae',
  u'earthworm',
  u'food_web'],
 u'prefix': [u'software',
  u'software_program',
  u'software_package',
  u'computer_software',
  u'computer_program',
  u'computer_code',
  u'software_application',
  u'pe

In [None]:
from collections import Counter
#yamane_predictions['dashi']
np.mean(clusters[10].model.get_layer(name='Phi0').get_weights()[0])
Counter(sample_clusters)

# Yamane Results

Training started...
* ('m: ', 10, 'lambda: ', 0.15, 'max epoch per cluster: ', 15, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_normal')
('Epoch:', 1, 'Cluster #:', 25, 'Loss:', 185.25700701117515, 'Test Loss:', 462.11729974992573)
('Epoch:', 2, 'Cluster #:', 25, 'Loss:', 94.78926725581289, 'Test Loss:', 257.9614339789539)
('Epoch:', 3, 'Cluster #:', 25, 'Loss:', 68.510555833322, 'Test Loss:', 168.7855049063693)
('Epoch:', 4, 'Cluster #:', 25, 'Loss:', 56.637282415106895, 'Test Loss:', 131.58040585272218)
('Epoch:', 5, 'Cluster #:', 25, 'Loss:', 48.74302359417314, 'Test Loss:', 114.5913256756349)
('Epoch:', 6, 'Cluster #:', 25, 'Loss:', 43.784197641848586, 'Test Loss:', 103.29953979119527)
('Epoch:', 7, 'Cluster #:', 25, 'Loss:', 39.676348549440156, 'Test Loss:', 94.9497262534533)
('Epoch:', 8, 'Cluster #:', 25, 'Loss:', 36.70157310644514, 'Test Loss:', 88.89629495775293)
('Epoch:', 9, 'Cluster #:', 25, 'Loss:', 33.68777084685018, 'Test Loss:', 85.53309237194794)
('Epoch:', 10, 'Cluster #:', 25, 'Loss:', 31.62626134102262, 'Test Loss:', 80.67354718668425)
('Epoch:', 11, 'Cluster #:', 25, 'Loss:', 30.100850980100514, 'Test Loss:', 83.60007793995133)
('Epoch:', 12, 'Cluster #:', 25, 'Loss:', 27.972241665369946, 'Test Loss:', 78.48380810162551)
('Epoch:', 13, 'Cluster #:', 25, 'Loss:', 26.79718875462626, 'Test Loss:', 78.7881210642461)
('Epoch:', 14, 'Cluster #:', 25, 'Loss:', 25.295018676088365, 'Test Loss:', 75.88227921965843)
('Epoch:', 15, 'Cluster #:', 25, 'Loss:', 24.44286949604233, 'Test Loss:', 71.73972092286799)
2018-12-10 02:39:45.787354

CRIM evaluation:
MRR: 0.02308
P@1: 0.01201
P@5: 0.01234
P@10: 0.01186

---------------------------------------------------------------
* ('m: ', 10, 'lambda: ', 0.1, 'max epoch per cluster: ', 20, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'sigmoid_constraint': 'ForceToOne', dropout=0.3, learning_rate=0.001)


('Sample clusters size: ', 11779)
('Epoch:', 1, 'Cluster #:', 30, 'Loss:', 164.64302213018138, 'Test Loss:', 16.821501479359963)
('Epoch:', 2, 'Cluster #:', 30, 'Loss:', 85.95771886678412, 'Test Loss:', 9.479251270126163)
('Epoch:', 3, 'Cluster #:', 30, 'Loss:', 60.89030767480532, 'Test Loss:', 7.113059009361313)
('Epoch:', 4, 'Cluster #:', 30, 'Loss:', 49.35616979487046, 'Test Loss:', 5.911477896573342)
('Epoch:', 5, 'Cluster #:', 30, 'Loss:', 42.839287023517926, 'Test Loss:', 5.237530594305887)
('Epoch:', 6, 'Cluster #:', 30, 'Loss:', 37.95000084499964, 'Test Loss:', 4.801821103322982)
('Epoch:', 7, 'Cluster #:', 30, 'Loss:', 34.69708706878203, 'Test Loss:', 4.445396709869783)
('Epoch:', 8, 'Cluster #:', 30, 'Loss:', 31.344791742943926, 'Test Loss:', 4.20229308469837)
('Epoch:', 9, 'Cluster #:', 30, 'Loss:', 29.270582812105324, 'Test Loss:', 4.001659726132235)
('Epoch:', 10, 'Cluster #:', 30, 'Loss:', 27.312983008645823, 'Test Loss:', 3.8331675905460845)
('Epoch:', 11, 'Cluster #:', 30, 'Loss:', 25.86424312723296, 'Test Loss:', 3.7695775713167223)
('Epoch:', 12, 'Cluster #:', 30, 'Loss:', 24.26373833860683, 'Test Loss:', 3.643787452736217)
('Epoch:', 13, 'Cluster #:', 30, 'Loss:', 23.194776870778636, 'Test Loss:', 3.5394367815238903)
('Epoch:', 14, 'Cluster #:', 30, 'Loss:', 22.077418524692256, 'Test Loss:', 3.4249304910639884)
('Epoch:', 15, 'Cluster #:', 30, 'Loss:', 20.919567144932383, 'Test Loss:', 3.3781916145887294)
('Epoch:', 16, 'Cluster #:', 30, 'Loss:', 20.137791508837896, 'Test Loss:', 3.305138163670991)
('Epoch:', 17, 'Cluster #:', 30, 'Loss:', 19.10677393638859, 'Test Loss:', 3.261422005968933)
('Epoch:', 18, 'Cluster #:', 30, 'Loss:', 18.686860465103926, 'Test Loss:', 3.1841310813365644)
('Epoch:', 19, 'Cluster #:', 30, 'Loss:', 17.963673188673297, 'Test Loss:', 3.117555515795205)
('Epoch:', 20, 'Cluster #:', 30, 'Loss:', 16.913953626791166, 'Test Loss:', 3.1116348788942028)
2018-12-10 12:28:42.204443

* Cluster distribution
Counter({0: 68,
         1: 516,
         2: 56,
         3: 1415,
         4: 345,
         5: 30,
         6: 452,
         7: 38,
         8: 45,
         9: 693,
         10: 2442,
         11: 18,
         12: 908,
         13: 28,
         14: 141,
         15: 1306,
         16: 84,
         17: 315,
         18: 412,
         19: 394,
         20: 364,
         21: 60,
         22: 189,
         23: 105,
         24: 24,
         25: 127,
         26: 731,
         27: 119,
         28: 34,
         29: 320})
         
CRIM evaluation:
MRR: 0.02787
P@1: 0.01935
P@5: 0.01447
P@10: 0.01298


* After fitting a KNN model on the cluster results and using it to fine-tune which clusters to compute prediction of each query term, the results improved three-fold.

CRIM evaluation:
MRR: 0.06088
P@1: 0.0447
P@5: 0.02825
P@10: 0.02616

* However, Yamane is, in general, disappointing with respect to SharedTask challenge.  

----------------------------------------------------------------------------------------
Training started...
('m: ', 5, 'lambda: ', 0.05, 'max epoch per cluster: ', 10, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'sigmoid_kernel_constraint: ', 'None', 'dropout: ', 0.2, 'learning_rate: ', 0.001, 'cluster_max: ', 15)
('Sample clusters size: ', 11779)
('Epoch:', 1, 'Cluster #:', 3, 'Loss:', 1109.3663462693028, 'Test Loss:', 50.47739016701659)
('Epoch:', 2, 'Cluster #:', 4, 'Loss:', 443.7235447903154, 'Test Loss:', 17.8407350068523)
('Epoch:', 3, 'Cluster #:', 4, 'Loss:', 337.1281598309441, 'Test Loss:', 20.06709446922047)
('Epoch:', 4, 'Cluster #:', 4, 'Loss:', 267.10441667238143, 'Test Loss:', 19.677748865275674)
('Epoch:', 5, 'Cluster #:', 4, 'Loss:', 229.65082203910208, 'Test Loss:', 17.97698300000083)
('Epoch:', 6, 'Cluster #:', 4, 'Loss:', 193.899041174633, 'Test Loss:', 16.751907205700498)
('Epoch:', 7, 'Cluster #:', 4, 'Loss:', 174.12937078346036, 'Test Loss:', 16.246821108061788)
('Epoch:', 8, 'Cluster #:', 5, 'Loss:', 143.5690400129045, 'Test Loss:', 8.259589462610172)
('Epoch:', 9, 'Cluster #:', 5, 'Loss:', 122.85453648527569, 'Test Loss:', 10.32649805369518)
('Epoch:', 10, 'Cluster #:', 5, 'Loss:', 120.9294774463863, 'Test Loss:', 11.019405025631883)
('Epoch:', 11, 'Cluster #:', 5, 'Loss:', 117.86586058200308, 'Test Loss:', 11.315035963446595)
('Epoch:', 12, 'Cluster #:', 5, 'Loss:', 117.66658123196073, 'Test Loss:', 11.45983252564142)
('Epoch:', 13, 'Cluster #:', 5, 'Loss:', 117.53339115441747, 'Test Loss:', 11.49364635983179)
('Epoch:', 14, 'Cluster #:', 5, 'Loss:', 117.17556676329085, 'Test Loss:', 11.589454222113588)
('Epoch:', 15, 'Cluster #:', 5, 'Loss:', 116.93114152015157, 'Test Loss:', 11.61919019261072)
('Epoch:', 16, 'Cluster #:', 5, 'Loss:', 116.64158589304395, 'Test Loss:', 11.655326009184815)
('Epoch:', 17, 'Cluster #:', 5, 'Loss:', 116.35099397958227, 'Test Loss:', 11.681808996588686)
2018-12-10 21:24:08.962568

CRIM evaluation:
MRR: 0.05966
P@1: 0.03669
P@5: 0.02672
P@10: 0.02536

* After predicting query term clusters
CRIM evaluation:
MRR: 0.06088
P@1: 0.0447
P@5: 0.02825
P@10: 0.02616

----------------------------------------------------------------------------------------------
('m: ', 5, 'lambda: ', 0.1, 'max epoch per cluster: ', 10, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'sigmoid_kernel_constraint: ', 'ForceToOne', 'dropout: ', 0.3, 'learning_rate: ', 0.001, 'cluster_max: ', 15)
('Sample clusters size: ', 11779)
('Epoch:', 1, 'Cluster #:', 9, 'Loss:', 482.0829993935509, 'Test Loss:', 27.62408774221937)
('Epoch:', 2, 'Cluster #:', 10, 'Loss:', 281.1839863856323, 'Test Loss:', 17.055058593617286)
('Epoch:', 3, 'Cluster #:', 11, 'Loss:', 208.62401276072805, 'Test Loss:', 12.390350534596523)
('Epoch:', 4, 'Cluster #:', 12, 'Loss:', 166.41537183626983, 'Test Loss:', 9.618624108000708)
('Epoch:', 5, 'Cluster #:', 13, 'Loss:', 134.7260290642522, 'Test Loss:', 7.7731040777703475)
('Epoch:', 6, 'Cluster #:', 13, 'Loss:', 119.45636204084552, 'Test Loss:', 7.063226612456044)
('Epoch:', 7, 'Cluster #:', 13, 'Loss:', 108.30129835629151, 'Test Loss:', 6.616330906035168)
('Epoch:', 8, 'Cluster #:', 13, 'Loss:', 101.58308503461907, 'Test Loss:', 6.293532006980128)
('Epoch:', 9, 'Cluster #:', 13, 'Loss:', 94.7640274246464, 'Test Loss:', 5.976787171151955)
('Epoch:', 10, 'Cluster #:', 13, 'Loss:', 88.485667872232, 'Test Loss:', 5.796518988622176)
('Epoch:', 11, 'Cluster #:', 13, 'Loss:', 88.32206717526479, 'Test Loss:', 5.7801972194946964)
('Epoch:', 12, 'Cluster #:', 13, 'Loss:', 88.03503130540048, 'Test Loss:', 5.762659813637024)
('Epoch:', 13, 'Cluster #:', 13, 'Loss:', 87.9104809983742, 'Test Loss:', 5.754546612789104)
('Epoch:', 14, 'Cluster #:', 13, 'Loss:', 87.80618179810531, 'Test Loss:', 5.747131267707188)
2018-12-11 00:15:28.930610

CRIM evaluation:
MRR: 0.05927
P@1: 0.03602
P@5: 0.03072
P@10: 0.02855

* After KNN

CRIM evaluation:
MRR: 0.06201
P@1: 0.03736
P@5: 0.03205
P@10: 0.02976

------------------------------------------------------------------------------------------------

('Sample clusters size: ', 11779)
('m: ', 1, 'lambda: ', 0.13, 'max epoch per cluster: ', 10, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_identity', 'sigmoid_kernel_constraint: ', 'ForceToOne', 'dropout: ', 0.3, 'learning_rate: ', 0.001, 'cluster_max: ', 25)
('Epoch:', 1, 'Cluster #:', 2, 'Loss:', 2540.3022823217325, 'Test Loss:', 53.87453193264082)
('Epoch:', 2, 'Cluster #:', 2, 'Loss:', 1667.1898109320173, 'Test Loss:', 40.18764992605429)
('Epoch:', 3, 'Cluster #:', 2, 'Loss:', 1361.9321219512858, 'Test Loss:', 34.321806932479376)
('Epoch:', 4, 'Cluster #:', 2, 'Loss:', 1178.6240238926068, 'Test Loss:', 30.980050023383228)
('Epoch:', 5, 'Cluster #:', 2, 'Loss:', 1044.863646138525, 'Test Loss:', 27.789724176647724)
('Epoch:', 6, 'Cluster #:', 3, 'Loss:', 643.6681745337323, 'Test Loss:', 16.18063761241986)
('Epoch:', 7, 'Cluster #:', 3, 'Loss:', 585.461576843336, 'Test Loss:', 14.75499623647435)
('Epoch:', 8, 'Cluster #:', 3, 'Loss:', 543.6171705845585, 'Test Loss:', 13.808908825924542)
('Epoch:', 9, 'Cluster #:', 3, 'Loss:', 507.70913083471424, 'Test Loss:', 12.98376774797604)
('Epoch:', 10, 'Cluster #:', 3, 'Loss:', 484.66481586770334, 'Test Loss:', 12.76265995549602)
('Epoch:', 11, 'Cluster #:', 3, 'Loss:', 483.03113291140517, 'Test Loss:', 12.732977458312538)
('Epoch:', 12, 'Cluster #:', 3, 'Loss:', 481.7263510373843, 'Test Loss:', 12.668654509856728)
('Epoch:', 13, 'Cluster #:', 3, 'Loss:', 481.51302522893866, 'Test Loss:', 12.680976091299877)
('Epoch:', 14, 'Cluster #:', 3, 'Loss:', 480.39035817460336, 'Test Loss:', 12.628896979962215)
('Epoch:', 15, 'Cluster #:', 3, 'Loss:', 481.034190520407, 'Test Loss:', 12.582702009354458)
2018-12-11 01:20:21.632953

CRIM evaluation:
MRR: 0.06717
MAP: 0.03322
P@1: 0.0427
P@5: 0.03244
P@10: 0.03148

----------------------------------------------------------------------------------------------
* Attempted different technique whereby the first (default) cluster is trained exclusively during the first epoch.  This follows the initialisation assumption that sets every training word pair to first cluster.
* The results were not impressive but better than observed with Yamane so far.
* Also, I'm limiting the number of clusters to 25, a ceiling which is reached immediately in the 2nd epoch.
* Must experiment with leaving clusters open (until no more are learnt); or set a much higher value.
* Alternatively we can set a threshold beyond which training examples are allocated to a "new" cluster but this cluster is not  created.  This still limits the model from growing uncontrollably;

Training started at: 2018-12-12 21:48:50.279247
('m: ', 10, 'lambda: ', 0.15, 'max epoch per cluster: ', 10, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_plus_identity', 'sigmoid_kernel_constraint: ', 'ForceToOne', 'dropout: ', 0.3, 'learning_rate: ', 0.001, 'cluster_max: ', 25)
('Sample clusters size: ', 11779)
Running evaluation on trial data set...
('Epoch:', 1, 'Cluster #:', 1, 'Loss:', 2645.7672815788537, 'Test MRR:', 0.10333)
coalition annotation
0 [[0.14232686]]
battersea_arts_centre constructed_structure
1 [[0.12914188]]
lyceum constructed_structure
1 [[0.14685696]]
('Processed ', 1000, 'samples...')
storyteller beguiler
1 [[0.14815803]]
paris center
2 [[0.146684]]
('Processed ', 2000, 'samples...')
union_station moving-picture_show
5 [[0.11579892]]
world wonder
1 [[0.11191753]]
('Processed ', 3000, 'samples...')
atmosphere magnitude_relation
5 [[0.13706593]]
true_life moving-picture_show
8 [[0.12733255]]
sight perspective
9 [[0.14652736]]
('Processed ', 4000, 'samples...')
young inebriant
8 [[0.14763217]]
('Processed ', 5000, 'samples...')
lucrezia_borgia moving-picture_show
8 [[0.11355487]]
tulsa moving-picture_show
12 [[0.08012623]]
('Processed ', 6000, 'samples...')
under_the_moon series
13 [[0.13277091]]
dale natural_depression
14 [[0.14759865]]
dale vale
15 [[0.1460675]]
('Processed ', 7000, 'samples...')
seaquake trouble
16 [[0.14310668]]
scott hamlet
14 [[0.13601676]]
('Processed ', 8000, 'samples...')
hamlet moving-picture_show
18 [[0.11416043]]
sight moving-picture_show
19 [[0.08516396]]
('Processed ', 9000, 'samples...')
power probality
19 [[0.1498438]]
touch_and_go moving-picture_show
21 [[0.14001596]]
('Processed ', 10000, 'samples...')
star separable_space
20 [[0.1300958]]
scott mathematical_relation
23 [[0.12929982]]
('Processed ', 11000, 'samples...')
('Epoch:', 2, 'Cluster #:', 25, 'Loss:', 156.81297644650564, 'Test MRR:', 0.05333)
('Epoch:', 3, 'Cluster #:', 25, 'Loss:', 87.73115104873665, 'Test MRR:', 0.06417)
('Epoch:', 4, 'Cluster #:', 25, 'Loss:', 73.36540189261082, 'Test MRR:', 0.05733)
('Epoch:', 5, 'Cluster #:', 25, 'Loss:', 64.38504860263784, 'Test MRR:', 0.059)
('Epoch:', 6, 'Cluster #:', 25, 'Loss:', 56.81815980266779, 'Test MRR:', 0.078)
('Epoch:', 7, 'Cluster #:', 25, 'Loss:', 51.290908942183016, 'Test MRR:', 0.094)
('Epoch:', 8, 'Cluster #:', 25, 'Loss:', 47.23172779464861, 'Test MRR:', 0.08517)
('Epoch:', 9, 'Cluster #:', 25, 'Loss:', 43.46400957543636, 'Test MRR:', 0.08822)
('Epoch:', 10, 'Cluster #:', 25, 'Loss:', 40.14030005659443, 'Test MRR:', 0.07756)
('Epoch:', 11, 'Cluster #:', 25, 'Loss:', 38.91978675389197, 'Test MRR:', 0.06556)
Training concluded at: 2018-12-12 23:16:56.594990

CRIM evaluation:
MRR: 0.09636
MAP: 0.04405
P@1: 0.06871
P@5: 0.04286
P@10: 0.04008

* After clustering test queries using KNN: (slight improvement)

CRIM evaluation:
MRR: 0.09942
MAP: 0.04663
P@1: 0.07071
P@5: 0.04554
P@10: 0.04292

----------------------------------------------------------------------------------------------------
* Loosened ceiling somewhat to create 30 clusters and reduced the lambda threshold to 0.13 (from 0.15).  Clusters still increased to 30 pretty quickly.  

Training started at: 2018-12-13 00:32:10.229728
('m: ', 10, 'lambda: ', 0.13, 'max epoch per cluster: ', 10, 'Negative sampling: ', 'random', 'Phi Init: ', 'random_plus_identity', 'sigmoid_kernel_constraint: ', 'ForceToOne', 'dropout: ', 0.3, 'learning_rate: ', 0.001, 'cluster_max: ', 30)
('Sample clusters size: ', 11779)
('Epoch:', 1, 'Cluster #:', 1, 'Loss:', 2648.454608650878, 'Test MRR:', 0.10333)
coalition annotation
0 [[0.11573532]]
country popular_music_genre
1 [[0.11753921]]
campo_san_polo hausdorff_space
1 [[0.12842865]]
('Processed ', 1000, 'samples...')
paris series
2 [[0.11816365]]
('Processed ', 2000, 'samples...')
grand moving-picture_show
4 [[0.08930109]]
sight season
4 [[0.1250305]]
('Processed ', 3000, 'samples...')
rotunda constructed_structure
5 [[0.12891144]]
true_life moving-picture_show
6 [[0.10975606]]
('Processed ', 4000, 'samples...')
century si_base_unit
7 [[0.12408465]]
telemovie data-storage_medium
7 [[0.12526542]]
('Processed ', 5000, 'samples...')
lucrezia_borgia moving-picture_show
10 [[0.11526864]]
tulsa moving-picture_show
11 [[0.08711283]]
('Processed ', 6000, 'samples...')
nancy sailing_boat
5 [[0.12192791]]
dale vale
13 [[0.11943772]]
('Processed ', 7000, 'samples...')
seaquake trouble
14 [[0.12455411]]
scott hamlet
15 [[0.12320519]]
('Processed ', 8000, 'samples...')
hamlet moving-picture_show
16 [[0.10422902]]
sight moving-picture_show
17 [[0.07907726]]
('Processed ', 9000, 'samples...')
campo_san_polo topological_manifold
18 [[0.09345377]]
('Processed ', 10000, 'samples...')
star separable_space
16 [[0.10601153]]
scott mathematical_relation
20 [[0.12851807]]
('Processed ', 11000, 'samples...')
knickers physiological_property
21 [[0.11303463]]
new_orleans moving-picture_show
21 [[0.08724869]]
Running evaluation on trial data set...
('Epoch:', 2, 'Cluster #:', 24, 'Loss:', 160.69473196558343, 'Test MRR:', 0.07)
campo_san_polo hausdorff_space
21 [[0.11840388]]
sight view
19 [[0.12602326]]
('Processed ', 1000, 'samples...')
lyceum constructed_structure
1 [[0.12324508]]
('Processed ', 2000, 'samples...')
hotel_europe constructed_structure
26 [[0.12173095]]
campo_san_polo way
27 [[0.09194536]]
('Processed ', 3000, 'samples...')
keshu moving-picture_show
28 [[0.11239138]]
('Epoch:', 3, 'Cluster #:', 30, 'Loss:', 88.85481828417979, 'Test MRR:', 0.07286)
('Epoch:', 4, 'Cluster #:', 30, 'Loss:', 64.39664095826059, 'Test MRR:', 0.07467)
('Epoch:', 5, 'Cluster #:', 30, 'Loss:', 56.075072061417934, 'Test MRR:', 0.09)
('Epoch:', 6, 'Cluster #:', 30, 'Loss:', 49.8601350386045, 'Test MRR:', 0.11167)
('Epoch:', 7, 'Cluster #:', 30, 'Loss:', 45.05676133218609, 'Test MRR:', 0.11622)
('Epoch:', 8, 'Cluster #:', 30, 'Loss:', 41.27733868958215, 'Test MRR:', 0.09743)
('Epoch:', 9, 'Cluster #:', 30, 'Loss:', 37.57391656068697, 'Test MRR:', 0.08908)
('Epoch:', 10, 'Cluster #:', 30, 'Loss:', 34.95721684883465, 'Test MRR:', 0.08286)
('Epoch:', 11, 'Cluster #:', 30, 'Loss:', 33.32479047970652, 'Test MRR:', 0.08286)
('Epoch:', 12, 'Cluster #:', 30, 'Loss:', 33.07274405640977, 'Test MRR:', 0.08286)
Training concluded at: 2018-12-13 02:46:15.895080

CRIM evaluation:
MRR: 0.10991
MAP: 0.05083
P@1: 0.07805
P@5: 0.04998
P@10: 0.04715

* After KNN
CRIM evaluation:
MRR: 0.10861
MAP: 0.04981
P@1: 0.08005
P@5: 0.04831
P@10: 0.04565



# Scratch Pad

In [None]:
model.vocab['dog'].index


In [None]:
test_these_terms = np.asarray(data.tokenizer.texts_to_sequences(data.train_query))
test_these_hypers = np.asarray(data.tokenizer.texts_to_sequences(data.train_hyper))

indices = np.arange(len(test_these_terms))                               
#np.random.seed(32)
np.random.shuffle(indices)
print indices[:32]

term, hyper, label = extend_batch_with_negatives(test_these_terms[indices[:32]], test_these_hypers[indices[:32]], data.random_words, data.tokenizer, 5)
print len(term)
[(data.tokenizer.index_word[i[0]], data.tokenizer.index_word[j[0]], l) for i, j, l in zip(term, hyper, label)]

In [None]:
#hyper_candidates = [[data.tokenizer.word_index[hyper]] for hyper in data.vocab]
len(set(data.valid_hyper))

In [None]:

[l for l in crim_model.layers if type(l) == Dense][0].get_weights()[0]

In [None]:
np.sort(data.tokenizer.texts_to_sequences(data.vocab)).shape



In [38]:
#for idx, m in enumerate(clusters):
 #   print idx, m.loss, m.test_loss, m.model.get_layer(name='Prediction').get_weights()[0]

i = data.tokenizer.word_index['vegeterian']
j = data.tokenizer.word_index['person']
for c in clusters:
    print c.model.predict([[i], [j]])[0]

[0.8394212]
[0.19308254]
[0.11467936]
[0.15988037]
[0.10992873]
[0.11244643]
[0.16883272]
[0.09364048]
[0.10421132]
[0.11453836]


In [None]:
len(data.test_query)

In [None]:
def get_cluster_CRIM_model(phi_k=1,
                           embeddings_layer=None,
                           embeddings_dim = 200,
                           phi_init = None,
                           phi_activity_regularisation = None,
                           sigmoid_kernel_regularisation = None,
                           sigmoid_bias_regularisation = None,
                           sigmoid_kernel_constraint = None,
                           dropout_rate = 0.,
                           learning_rate = 0.001
                  ):
    
    hypo_input  = Input(shape=(1,), name='Hyponym')
    hyper_input = Input(shape=(1,), name='Hypernym')
        
    hypo_embedding, hyper_embedding = embeddings_layer([hypo_input, hyper_input])
            
    # Add Dropout to avoid overfit    
    hypo_embedding = Dropout(dropout_rate, name='Dropout_Hypo')(hypo_embedding)
    hyper_embedding = Dropout(dropout_rate, name='Dropout_Hyper')(hyper_embedding)
    
    phi_layer = []
    for i in range(phi_k):
        phi_layer.append(Dense(embeddings_dim, activation=None, use_bias=False, 
                               activity_regularizer=phi_activity_regularisation,
                               kernel_initializer=phi_init,                               
                               name='Phi%d' % (i)) (hypo_embedding))            

    if phi_k == 1:
        # flatten tensors
        phi = Flatten()(phi_layer[0])
        hyper_embedding = Flatten()(hyper_embedding)    
    else:
        phi = concatenate(phi_layer, axis=1)
    
    phi = Dropout(dropout_rate, name='Dropout_Phi')(phi)
    
    # this is referred to as "s" in the "CRIM" paper    
    phi_hyper = Dot(axes=-1, normalize=True, name='DotProduct')([phi, hyper_embedding])
    
    if phi_k > 1:
        phi_hyper = Flatten()(phi_hyper)
    
    predictions = Dense(1, activation="sigmoid", name='Prediction',
                        use_bias=True,
                        kernel_initializer='random_normal',
                        kernel_constraint= sigmoid_kernel_constraint,
                        bias_initializer='random_normal',                        
                        kernel_regularizer=sigmoid_kernel_regularisation,
                        bias_regularizer=sigmoid_bias_regularisation
                       ) (phi_hyper)

    # instantiate model
    model = Model(inputs=[hypo_input, hyper_input], outputs=predictions)        

    adam = Adam(lr = learning_rate, beta_1 = 0.9, beta_2 = 0.9, clipnorm=1.)
    model.compile(optimizer=adam, loss='binary_crossentropy', metrics=['accuracy'])
    
    return model

In [None]:
import gc
del clusters

gc.collect()


In [41]:
from collections import Counter
Counter(np.random.choice(np.arange(len(data.train_query)), size=11778, replace=True))

Counter({2: 1,
         3: 1,
         4: 3,
         5: 1,
         6: 3,
         8: 1,
         12: 1,
         13: 1,
         14: 2,
         15: 2,
         16: 2,
         19: 2,
         20: 1,
         21: 1,
         25: 1,
         26: 1,
         28: 2,
         29: 1,
         31: 3,
         32: 2,
         33: 2,
         35: 2,
         36: 1,
         38: 2,
         40: 1,
         41: 1,
         42: 1,
         47: 2,
         48: 1,
         50: 2,
         51: 2,
         54: 2,
         56: 2,
         59: 3,
         60: 3,
         61: 1,
         67: 1,
         69: 3,
         70: 2,
         73: 1,
         74: 1,
         75: 1,
         77: 1,
         79: 2,
         80: 2,
         81: 1,
         82: 1,
         85: 1,
         88: 1,
         90: 2,
         92: 1,
         93: 2,
         94: 1,
         96: 1,
         98: 1,
         99: 3,
         100: 1,
         102: 1,
         103: 1,
         104: 1,
         106: 2,
         107: 3,
        

In [27]:
embeddings_layer = get_embeddings_model(embedding_matrix=data.embedding_matrix)
test_crim_model = get_CRIM_model(phi_k = 1, train_embeddings = True,
                            embeddings_dim = data.embeddings_dim, 
                            #vocab_size = len(data.tokenizer.word_counts), embedding = data.embedding_matrix,
                            embeddings_layer = embeddings_layer,
                            phi_init = RandomIdentity(),                            
                            sigmoid_kernel_regularisation = None, #l2(0.001),
                            sigmoid_bias_regularisation = None, #l2(0.001),
                            sigmoid_kernel_constraint = None,
                            dropout_rate = 0.3,
                            learning_rate = 0.0001
                           )

In [28]:
test_crim_model.get_layer(name='Phi0').get_weights()[0]

array([[-0.00319363, -0.        , -0.        , ..., -0.        ,
        -0.        ,  0.        ],
       [-0.        ,  0.00204002,  0.        , ..., -0.        ,
        -0.        ,  0.        ],
       [ 0.        , -0.        , -0.00759161, ...,  0.        ,
        -0.        , -0.        ],
       ...,
       [-0.        ,  0.        ,  0.        , ..., -0.01416957,
         0.        ,  0.        ],
       [-0.        , -0.        , -0.        , ..., -0.        ,
         0.01755028,  0.        ],
       [ 0.        , -0.        , -0.        , ..., -0.        ,
         0.        , -0.00710576]], dtype=float32)

In [29]:
test_crim_model_2 = get_CRIM_model_freeze_phi(test_crim_model,
                                             train_phi = False,
                                             train_embeddings = True,
                                             dropout_rate = 0.3,
                                             learning_rate = 0.0001)



In [30]:
test_crim_model_3 = get_CRIM_model_freeze_phi(test_crim_model_2,
                                             train_phi = False,
                                             train_embeddings = True,
                                             dropout_rate = 0.3,
                                             learning_rate = 0.0001)

In [98]:
#cluster_list[0].model.get_layer(name='Phi0').get_weights()[0]

for idx, m in enumerate(clusters):
    print idx, np.mean(m.avg_loss), m.model.get_layer(name='Prediction').get_weights()[1]

i = data.tokenizer.word_index['dirham']
j = data.tokenizer.word_index['monetary_unit']
for c in clusters:
    print c.model.predict([[i], [j]])[0]
    


0 0.30369790482193465 [-2.2947042]
[0.1097306]


In [71]:
data.valid_query[0], data.valid_hyper[0]
i = data.tokenizer.word_index['dirham']
j = data.tokenizer.word_index['monetary_unit']


np.dot(data.embedding_matrix[i], data.embedding_matrix[j])



0.40714827

In [29]:
data.tokenizer.texts_to_sequences(data.valid_query[:4])

[[2922], [2089], [2089], [2089]]

In [59]:
filter(lambda x: x in data.test_query, data.test_hyper)

[u'bank',
 u'actor',
 u'spending',
 u'animation',
 u'judge',
 u'minister',
 u'island',
 u'response',
 u'minister',
 u'killer',
 u'horse',
 u'transportation',
 u'form',
 u'record_label',
 u'label',
 u'mark',
 u'watercraft',
 u'management',
 u'language',
 u'engineering',
 u'university',
 u'shopping',
 u'mark',
 u'discharge',
 u'noble',
 u'prefix',
 u'ideal',
 u'horse',
 u'peasant',
 u'maltreatment',
 u'hurt',
 u'persecution',
 u'actor',
 u'cricketer',
 u'standard',
 u'housing',
 u'offense',
 u'arm',
 u'basin',
 u'picture',
 u'bridge',
 u'proposal',
 u'form',
 u'sport',
 u'cap',
 u'island',
 u'mark',
 u'ideal',
 u'flag',
 u'prefix',
 u'language',
 u'offense',
 u'photographer',
 u'maltreatment',
 u'infrastructure',
 u'sport',
 u'policy',
 u'note',
 u'university',
 u'happening',
 u'university',
 u'note',
 u'sport',
 u'mark',
 u'net',
 u'teacher',
 u'horse',
 u'picture',
 u'magazine',
 u'local_government',
 u'spending',
 u'reflex_response',
 u'mark',
 u'theme',
 u'emptiness',
 u'watercraft',

In [4]:

a = np.asarray([10,5,3,13])
b = np.asarray([13,4,5,10])

(b - a) ** 2


array([9, 1, 4, 9])

In [29]:
print np.mean((model['tennis_player'] - model['rod_laver'])) 

print np.mean((model['athlete'] - model['rod_laver']) )

0.004040569
0.005828381
