# Step 3 - Predict Relation and Finish

Our goal during this step is to predict the relation and compute the end-to-end accuracy.

In [1]:
import sys
sys.path.insert(0, '../../')

In [2]:
from lib.utils import get_connection 
from lib.utils import FB2M_NAME_TABLE
from lib.utils import FB2M_KG_TABLE

connection = get_connection()
cursor = connection.cursor()

In [3]:
import pandas as pd
from tqdm import tqdm_notebook
from functools import partial

tqdm_notebook = partial(tqdm_notebook, leave=False)
tqdm_notebook().pandas()

df = pd.read_pickle('step_2_generate_candidates.pkl')
df[:5]

HBox(children=(IntProgress(value=1, bar_style='info', max=1), HTML(value='')))



Unnamed: 0,end_index,object,predicted_question_tokens,predicted_subject_names,question,question_tokens,relation,start_index,subject,subject_name,subject_name_tokens,candidate_mids,predicted_start_index,predicted_end_index,predicted_subject_name
6219,,0bs56bp,"[name, an, american, thoroughbread, racehorse]","[{'name': 'american thoroughbread', 'score': 4...",Name an American Thoroughbread racehorse,,biology/organism_classification/organisms_of_t...,,03k3r,,,"[01z1jf2, 04q7gbh]",2.0,3.0,american
3364,9.0,01sjng,"[what, kind, of, game, is, vision, racing, dri...","[{'name': 'vision racing driving simulator', '...",what kind of game is vision racing driving sim...,"[what, kind, of, game, is, vision, racing, dri...",cvg/computer_videogame/cvg_genre,5.0,02qlppc,vision racing driving simulator,"(vision, racing, driving, simulator)",[02qlppc],5.0,9.0,vision racing driving simulator
9374,6.0,0dlmm88,"[what, tv, program, is, romance, film]","[{'name': 'romance film', 'score': 55.87209701...",what tv program is romance film,"[what, tv, program, is, romance, film]",tv/tv_genre/programs,4.0,02l7c8,romance film,"(romance, film)",[02l7c8],4.0,6.0,romance film
10142,4.0,04rrx,"[what, state, is, polaski, located, in]","[{'name': 'polaski', 'score': 57.6216735839843...",what state is polaski located in,"[what, state, is, polaski, located, in]",location/location/containedby,3.0,049_zj3,polaski,"(polaski,)",[049_zj3],3.0,4.0,polaski
97,8.0,0qcr0,"[what, disease, claimed, the, life, of, fern, ...","[{'name': 'fern emmett', 'score': 69.716217041...",what disease claimed the life of fern emmett,"[what, disease, claimed, the, life, of, fern, ...",people/deceased_person/cause_of_death,6.0,02w9ycr,fern emmett,"(fern, emmett)",[02w9ycr],6.0,8.0,fern emmett


## Generate Facts

Given the candidate mids, the we generate candidate facts.

In [4]:
from collections import defaultdict

def generate_facts(row):
    cursor.execute("""SELECT subject_mid, relation, object_mid
                      FROM {kg}
                      WHERE subject_mid = ANY(%s)""".format(kg=FB2M_KG_TABLE), (row['candidate_mids'],))
    rows = cursor.fetchall()
    candidate_facts = defaultdict(lambda: defaultdict(set))
    
    for subject_mid, relation, object_mid in rows:
        candidate_facts[relation][subject_mid].add(object_mid)
    
    # Convert it back to dict
    for relation in candidate_facts:
        candidate_facts[relation] = dict(candidate_facts[relation])
    candidate_facts = dict(candidate_facts)
        
    return candidate_facts

In [5]:
df['candidate_facts'] = df.progress_apply(generate_facts, axis=1)

HBox(children=(IntProgress(value=0, max=10845), HTML(value='')))




In [6]:
print('Average Number of Relations:', sum(len(r) for r in df['candidate_facts']) / df.shape[0])

Average Number of Relations: 17.962010142923006


## Upperbounds

Check the accuracy of the candidates.

In [7]:
from tqdm import tqdm_notebook

correct_object = 0
correct_relation = 0
correct_subject = 0
for index, row in tqdm_notebook(df.iterrows(), total=df.shape[0]):
    facts = row['candidate_facts']

    if row['relation'] in facts:
        correct_relation += 1
        if row['subject'] in facts[row['relation']]:
            correct_subject += 1
            if row['object'] in facts[row['relation']][row['subject']]:
                correct_object += 1    
        

print('Object Canditate Accuracy:', correct_object / df.shape[0])
print('Relation Canditate Accuracy:', correct_relation / df.shape[0])
print('Subject Canditate Accuracy:', correct_subject / df.shape[0])

# Object Canditate Accuracy: 0.9566620562471185
# Relation Canditate Accuracy: 0.9678192715537114
# Subject Canditate Accuracy: 0.9566620562471185

HBox(children=(IntProgress(value=0, max=10845), HTML(value='')))


Object Canditate Accuracy: 0.9550023052097741
Relation Canditate Accuracy: 0.9674504379898571
Subject Canditate Accuracy: 0.9550023052097741


## Models

Load models to be used in our final step.

In [8]:
from lib.checkpoint import Checkpoint

def load_checkpoint(path):
    checkpoint = Checkpoint(checkpoint_path=path, device=0)
    checkpoint.model.train(mode=False)
    checkpoint.model.apply(lambda m: m.flatten_parameters() if hasattr(m, 'flatten_parameters') else None)
    return checkpoint

cuda = lambda v: v.cuda() if torch.cuda.is_available() else t

In [9]:
def get_predicted_predicate(row):
    formatted_question =  ''
    for i, token in enumerate(row['predicted_question_tokens']):
        if i == row['predicted_start_index']:
            formatted_question += '<e>'
        elif i > row['predicted_start_index'] and i < row['predicted_end_index']:
            continue
        else:
            formatted_question += token.lower().strip()
        formatted_question += ' '
    formatted_question = formatted_question.strip()
    return formatted_question

df['predicted_predicate'] = df.progress_apply(get_predicted_predicate, axis=1)
df[:5]

HBox(children=(IntProgress(value=0, max=10845), HTML(value='')))




Unnamed: 0,end_index,object,predicted_question_tokens,predicted_subject_names,question,question_tokens,relation,start_index,subject,subject_name,subject_name_tokens,candidate_mids,predicted_start_index,predicted_end_index,predicted_subject_name,candidate_facts,predicted_predicate
6219,,0bs56bp,"[name, an, american, thoroughbread, racehorse]","[{'name': 'american thoroughbread', 'score': 4...",Name an American Thoroughbread racehorse,,biology/organism_classification/organisms_of_t...,,03k3r,,,"[01z1jf2, 04q7gbh]",2.0,3.0,american,{'symbols/namesake/named_after': {'04q7gbh': {...,name an <e> thoroughbread racehorse
3364,9.0,01sjng,"[what, kind, of, game, is, vision, racing, dri...","[{'name': 'vision racing driving simulator', '...",what kind of game is vision racing driving sim...,"[what, kind, of, game, is, vision, racing, dri...",cvg/computer_videogame/cvg_genre,5.0,02qlppc,vision racing driving simulator,"(vision, racing, driving, simulator)",[02qlppc],5.0,9.0,vision racing driving simulator,{'cvg/computer_videogame/gameplay_modes': {'02...,what kind of game is <e> ?
9374,6.0,0dlmm88,"[what, tv, program, is, romance, film]","[{'name': 'romance film', 'score': 55.87209701...",what tv program is romance film,"[what, tv, program, is, romance, film]",tv/tv_genre/programs,4.0,02l7c8,romance film,"(romance, film)",[02l7c8],4.0,6.0,romance film,{'tv/tv_genre/programs': {'02l7c8': {'0j42ndm'...,what tv program is <e>
10142,4.0,04rrx,"[what, state, is, polaski, located, in]","[{'name': 'polaski', 'score': 57.6216735839843...",what state is polaski located in,"[what, state, is, polaski, located, in]",location/location/containedby,3.0,049_zj3,polaski,"(polaski,)",[049_zj3],3.0,4.0,polaski,{'location/location/containedby': {'049_zj3': ...,what state is <e> located in
97,8.0,0qcr0,"[what, disease, claimed, the, life, of, fern, ...","[{'name': 'fern emmett', 'score': 69.716217041...",what disease claimed the life of fern emmett,"[what, disease, claimed, the, life, of, fern, ...",people/deceased_person/cause_of_death,6.0,02w9ycr,fern emmett,"(fern, emmett)",[02w9ycr],6.0,8.0,fern emmett,{'people/person/nationality': {'02w9ycr': {'09...,what disease claimed the life of <e>


### Softmax Relation Model

Load the softmax relation model.

In [10]:
from torch.autograd import Variable
import math
import torch

def get_softmax_relation_score(checkpoint, question, relations):
    relations = [r for r in relations]
    relations = [checkpoint.relation_encoder.encode(r)[0] for r in relations]
    mask = set(relations)
    mask = [1 if i in relations else 0 for i in range(checkpoint.relation_encoder.vocab_size)]
    mask = cuda(Variable(torch.FloatTensor(mask), volatile=True))
    question = checkpoint.text_encoder.encode(question)
    question = cuda(Variable(torch.LongTensor(question).unsqueeze(1), volatile=True))
    output_batch = checkpoint.model(question, mask).exp_().data
    output_batch = output_batch.squeeze(0)
    return [output_batch[r] for r in relations]
    
## Test ##
# Checkpoint with 0.880
#checkpoint = load_checkpoint('../../pretrained_models/relation_classifier.02_02_07:59:28/1746.pt')
checkpoint = load_checkpoint('../../experiments/relation_classifier.09_08_10:23:25/3309.pt')
question = 'where was <e> born?'
print('Question:', question)
print('Scores:')
print(get_softmax_relation_score(checkpoint, question, ['people/person/place_of_birth',
                                                        'location/location/people_born_here']))

Question: where was <e> born?
Scores:
[tensor(1.00000e-04 *
       5.6025, device='cuda:0'), tensor(1.00000e-04 *
       6.3373, device='cuda:0')]


  # Remove the CWD from sys.path while we load stuff.
  if sys.path[0] == '':


In [16]:
#checkpoint = load_checkpoint('../../pretrained_models/relation_classifier.02_02_13:31:11/189.pt')
checkpoint = load_checkpoint('../../experiments/relation_classifier.09_08_10:23:25/4238.pt')
def add_softmax_scores(row):
    if len(row['candidate_facts']) != 0:
        candidate_relations = list(row['candidate_facts'].keys())
        return get_softmax_relation_score(checkpoint, row['predicted_predicate'], candidate_relations)
    else:
        return None
    
df['softmax_scores'] = df.progress_apply(add_softmax_scores, axis=1)

HBox(children=(IntProgress(value=0, max=10845), HTML(value='')))

  # Remove the CWD from sys.path while we load stuff.
  if sys.path[0] == '':





In [19]:
import torch
from functools import partial

def add_softmax_ensemble_scores(row, checkpoint):
    if len(row['candidate_facts']) != 0:
        candidate_relations = list(row['candidate_facts'].keys())
        scores = get_softmax_relation_score(checkpoint, row['predicted_predicate'], candidate_relations)
        # Sum together...
        return [ensemble_score + score for ensemble_score, score in zip(row['softmax_ensemble_scores'], scores)]
    else:
        return None

# Copy previous single model scores
df['softmax_ensemble_scores'] = df['softmax_scores']
ensemble = [
#     '../../pretrained_models/relation_classifier.02_02_13:31:11/189.pt', # 1212212=seed, 88.38%=dev_accuracy
    #'../../pretrained_models/relation_classifier.02_02_13:23:02/223.pt', # 457=seed, 88.26%=dev_accuracy
    #'../../pretrained_models/relation_classifier.02_02_07:59:28/1746.pt', # 123=seed, 88.25%=dev_accuracy
    '../../experiments/relation_classifier.09_08_10:23:25/4238.pt',
    '../../experiments/relation_classifier.09_08_10:23:25/4238.pt'
]

for path in ensemble:
    checkpoint = load_checkpoint(path)
    df['softmax_ensemble_scores'] = df.progress_apply(partial(add_softmax_ensemble_scores,
                                                              checkpoint=checkpoint), axis=1)
    torch.cuda.empty_cache()

HBox(children=(IntProgress(value=0, max=10845), HTML(value='')))

  # Remove the CWD from sys.path while we load stuff.
  if sys.path[0] == '':





HBox(children=(IntProgress(value=0, max=10845), HTML(value='')))




## Final End-To-End Metric

Given candidate facts compute the end-to-end metric. Below we start a couple code blocks with basic utilities used by each version.

#### FB2M Probability to Train Probability

In [20]:
cursor.execute('SELECT relation, count(*) FROM ' + FB2M_KG_TABLE + ' GROUP BY relation')
relation_count_fb2m = {}
for relation, n_rows_fb2m in cursor.fetchall():
    relation_count_fb2m[relation] = n_rows_fb2m

In [21]:
from collections import defaultdict
from lib.simple_qa import load_simple_qa 
from tqdm import tqdm_notebook

df_train, = load_simple_qa(train=True)

total_rows_train = df_train.shape[0]
cursor.execute('SELECT count(*) FROM ' + FB2M_KG_TABLE)
total_rows_fb_two_kg = cursor.fetchone()[0]

# Given we see a relation occuring with %x probability in KG, we use `transform_probability_from_kg_to_train`
# to get the probability of the relation occuring in SimpleQuestions.
transform_probability_from_kg_to_train = defaultdict(int)
for relation, n_rows_train in df_train.relation.value_counts().iteritems():
    n_facts_fb2m = relation_count_fb2m[relation]
    
    relation_probability_kg = n_facts_fb2m / total_rows_fb_two_kg
    relation_probability_train = n_rows_train / total_rows_train
    
    transform_probability_from_kg_to_train[relation] = relation_probability_train / relation_probability_kg

  filename, header=None, names=['subject', 'relation', 'object', 'question'])


#### Evaluation

Question interpretations allow us to allow questions to be interpreted multiple times.

In [22]:
import json

question_interpretations = json.load(open('../Simple QA Numbers/question_interpretations.txt'))
question_interpretations = [set([tuple(pair) for pair in row]) for row in question_interpretations]

Get the predicate row for every training example. We use this to analyze the closest negative predicate to confirm if the data supports the false relation.

In [23]:
from functools import partial
import importlib
import lib.import_notebook

edit_distance_link_alias = importlib.import_module(
                "notebooks.Simple QA Numbers.HYPOTHESIS - Question Refers to Multiple Subjects").edit_distance_link_alias
normalize = importlib.import_module(
                "notebooks.Simple QA Numbers.HYPOTHESIS - Subject Name not in Question").normalize

# Create a column with the subject_name linked per example
df_train['subject_name'] = df_train.progress_apply(partial(edit_distance_link_alias, cursor, normalize), axis=1)

find_subject_name_span = importlib.import_module(
                "notebooks.Simple QA Models.Subject Recognition Data").find_subject_name_span

df_train = df_train.progress_apply(find_subject_name_span, axis=1)

HBox(children=(IntProgress(value=0, max=75910), HTML(value='')))


importing Jupyter notebook from ../../notebooks/Simple QA Models/Subject Recognition Data.ipynb


HBox(children=(IntProgress(value=0, max=75910), HTML(value='')))




In [24]:
def get_predicate(row):
    if not isinstance(row['question_tokens'], list):
        return ''
        
    formatted_question =  ''
    for i, token in enumerate(row['question_tokens']):
        if i == row['start_index']:
            formatted_question += '<e>'
        elif i > row['start_index'] and i < row['end_index']:
            continue
        else:
            formatted_question += token.lower().strip()
        formatted_question += ' '
    formatted_question = formatted_question.strip()
    return formatted_question

df_train['predicate'] = df_train.progress_apply(get_predicate, axis=1)
df_train[:5]

HBox(children=(IntProgress(value=0, max=75910), HTML(value='')))




AttributeError: 'LazyConfigValue' object has no attribute 'lower'

   end_index   object                                           question  \
0        5.0   01cj3p                           what is the book e about   
1        9.0  0sjc7c1  to what release does the release track cardiac...   
2        7.0    07ssc            what country was the film the debt from   
3        5.0   0p600l            what songs have nobuo uematsu produced?   
4        5.0   0677ng                          Who produced eve-olution?   

                                     question_tokens  \
0                    [what, is, the, book, e, about]   
1  [to, what, release, does, the, release, track,...   
2   [what, country, was, the, film, the, debt, from]   
3   [what, songs, have, nobuo, uematsu, produced, ?]   
4                [who, produced, eve, -, olution, ?]   

                         relation  start_index  subject    subject_name  \
0      book/written_work/subjects          4.0  04whkz5               e   
1     music/release_track/release          7.0  0tp2p24 

Evaluation function with negative sampling for error analysis.

In [27]:
from lib.utils import format_pipe_table
from Levenshtein import distance
from heapq import nsmallest

relation_count_train = df_train.relation.value_counts().to_dict()

def evaluate(predicted):
    subject_and_relation_correct = 0 # Official metric from the Simple Questions dataset
    object_correct = 0
    relation_correct = 0
    subject_correct = 0
    correct_interpretation = 0
    no_interpretation = 0
    negative_samples = []
    
    for i, (_, row) in tqdm_notebook(enumerate(df.iterrows()), total=df.shape[0]):
        relation, subject, objects = predicted[i]
        if tuple([relation, subject]) in question_interpretations[i]:
            correct_interpretation += 1
        elif len(question_interpretations[i]) != 0:
            count = relation_count_train[row['relation']] if row['relation'] in relation_count_train else 0
            false_predicates = df_train[df_train['relation'] == relation]['predicate']
            if len(false_predicates) > 0:
                closest_false_predicate = nsmallest(3, false_predicates,
                    key=lambda p: distance(p, row['predicted_predicate']))
            negative_samples.append({
                'Question': row['question'],
                'Closest False Predicate': closest_false_predicate,
                'False Relation': relation,
                'True Relation': row['relation'],
                'True Relation Count': count,
                'Predicted Subject Name': row['predicted_subject_name'],
                'Subject Name': row['subject_name'],
            })
        else:
            no_interpretation += 1
            
        if relation == row['relation'] and subject == row['subject']:
            subject_and_relation_correct += 1
        else:
            if relation and relation in row['candidate_facts']:
                subjects = list(row['candidate_facts'][relation].keys())[:3]
                cursor.execute('SELECT mid, alias FROM fb_two_subject_name WHERE mid = ANY(%s)', (subjects,))
                subject_aliases = list(cursor.fetchall())[:3]
            else:
                subjects = None
                subject_aliases = None
        if objects and row['object'] in objects:
            object_correct += 1
        if relation == row['relation']:
            relation_correct += 1
        if subject == row['subject']:
            subject_correct += 1
        
    print('Subject & Relation Accuracy (SOTA 78.7%%): %f [%d of %d]' %
          (subject_and_relation_correct / df.shape[0], subject_and_relation_correct, df.shape[0]))
    print('Object Accuracy: %f [%d of %d]' %
              (object_correct / df.shape[0], object_correct, df.shape[0]))
    print('Relation Accuracy (SOTA 88.4%%): %f [%d of %d]' %
              (relation_correct / df.shape[0], relation_correct, df.shape[0]))
    print('Subject Accuracy (SOTA 79%%): %f [%d of %d]' %
              (subject_correct / df.shape[0], subject_correct, df.shape[0]))
    # Is this a valid interpretation of the question
    print('Correct Interpretation: %f [%d of %d]' %
              (correct_interpretation / df.shape[0], correct_interpretation, df.shape[0]))
    print('No Interpretation: %f [%d of %d]' %
              (no_interpretation / df.shape[0], no_interpretation, df.shape[0]))
    print('Negative Sample:')
    print(format_pipe_table(negative_samples[:50], columns=['Bucket', 'Closest False Predicate', 'True Relation Count', 'Question', 'True Relation',
                                                            'False Relation', 'Subject Name', 'Predicted Subject Name']))

### Version: Softmax

For the version 0 of softmax, we use the highest predicted relation with not additional considerations.

In [31]:
from tqdm import tqdm_notebook
import random

predicted = []
for index, row in tqdm_notebook(df.iterrows(), total=df.shape[0]):
    if len(row['candidate_facts']) != 0:
        candidate_relations = list(row['candidate_facts'].keys())
        max_score = max(row['softmax_scores'])
        predicted_relations = [r for i, r in enumerate(candidate_relations)
                               if row['softmax_scores'][i] == max_score]
        predicted_relation = random.choice(predicted_relations)
        subject_mid, object_mids = random.choice(list(row['candidate_facts'][predicted_relation].items()))
        predicted.append(tuple([predicted_relation, subject_mid, object_mids]))
    else:
        predicted.append(tuple([None, None, None]))

evaluate(predicted)

HBox(children=(IntProgress(value=0, max=10845), HTML(value='')))




HBox(children=(IntProgress(value=0, max=10845), HTML(value='')))


Subject & Relation Accuracy (SOTA 78.7%): 0.178792 [1939 of 10845]
Object Accuracy: 0.197695 [2144 of 10845]
Relation Accuracy (SOTA 88.4%): 0.187644 [2035 of 10845]
Subject Accuracy (SOTA 79%): 0.690088 [7484 of 10845]
Correct Interpretation: 0.000184 [2 of 10845]
No Interpretation: 0.021669 [235 of 10845]
Negative Sample:
| Index | Bucket | Closest False Predicate | True Relation Count | Question | True Relation | False Relation | Subject Name | Predicted Subject Name |
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
| 0 | nan | ['who is <e> recognized as', 'what is <e> regarded as', 'what is <e> recognized as'] | 13 | Name an American Thoroughbread racehorse  | biology/organism_classification/organisms_of_this_type | common/topic/notable_types | nan | american |
| 1 | nan | ['what kind of game is <e> ?', 'what kind of game is <e> ?', 'what kind of game is <e> ?'] | 335 | what kind of game is vision racing driving simulator? | cvg/computer_videogame/cvg_genre | cvg/computer_

### Version: Softmax & Most Facts

For the version 1 of softmax, we use the highest predicted relation and additionally guess the subject with the most objects associated with it.

In [32]:
from tqdm import tqdm_notebook
import random

predicted = []
for index, row in tqdm_notebook(df.iterrows(), total=df.shape[0]):
    if len(row['candidate_facts']) != 0:
        candidate_relations = list(row['candidate_facts'].keys())
        max_score = max(row['softmax_scores'])
        predicted_relations = [r for i, r in enumerate(candidate_relations)
                               if row['softmax_scores'][i] == max_score]
        predicted_relation = random.choice(predicted_relations)
        
        # We use the `Better than random guessing` from notebook 
        # `HYPOTHESIS - Question Refers to Multiple Subjects`.
        subject_mid, object_mids = sorted(row['candidate_facts'][predicted_relation].items(),
                                          key=lambda i: len(i[1]), reverse=True)[0]
        predicted.append(tuple([predicted_relation, subject_mid, object_mids]))
    else:
        predicted.append(tuple([None, None, None]))

evaluate(predicted)

HBox(children=(IntProgress(value=0, max=10845), HTML(value='')))




HBox(children=(IntProgress(value=0, max=10845), HTML(value='')))


Subject & Relation Accuracy (SOTA 78.7%): 0.180083 [1953 of 10845]
Object Accuracy: 0.198801 [2156 of 10845]
Relation Accuracy (SOTA 88.4%): 0.187644 [2035 of 10845]
Subject Accuracy (SOTA 79%): 0.690088 [7484 of 10845]
Correct Interpretation: 0.000184 [2 of 10845]
No Interpretation: 0.021669 [235 of 10845]
Negative Sample:
| Index | Bucket | Closest False Predicate | True Relation Count | Question | True Relation | False Relation | Subject Name | Predicted Subject Name |
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
| 0 | nan | ['who is <e> recognized as', 'what is <e> regarded as', 'what is <e> recognized as'] | 13 | Name an American Thoroughbread racehorse  | biology/organism_classification/organisms_of_this_type | common/topic/notable_types | nan | american |
| 1 | nan | ['what kind of game is <e> ?', 'what kind of game is <e> ?', 'what kind of game is <e> ?'] | 335 | what kind of game is vision racing driving simulator? | cvg/computer_videogame/cvg_genre | cvg/computer_

### Version: Softmax & Most Facts & Entity Relation Distribution

The hypothesis of version 2 is that we can use the distribution of relations for a particular set of facts to our advantage. Particular entities favor some relations more than others; therefore, we use that probability in a product of experts model.

In [33]:
from tqdm import tqdm_notebook
import random

predicted = []
for index, row in tqdm_notebook(df.iterrows(), total=df.shape[0]):
    if len(row['candidate_facts']) != 0:
        candidate_relations = list(row['candidate_facts'].keys())
        kg_relation_probability = [sum(len(row['candidate_facts'][r][mid]) for mid in row['candidate_facts'][r])
                                     for r in candidate_relations]
        scores = [(row['softmax_scores'][i] *
                   transform_probability_from_kg_to_train[r] *
                   kg_relation_probability[i]) for i, r in enumerate(candidate_relations)]
        max_score = max(scores)
        predicted_relations = [r for i, r in enumerate(candidate_relations) if scores[i] == max_score]
        predicted_relation = random.choice(predicted_relations)
        
        # We use the `Better than random guessing` from notebook 
        # `HYPOTHESIS - Question Refers to Multiple Subjects`.
        subject_mid, object_mids = sorted(row['candidate_facts'][predicted_relation].items(),
                                          key=lambda i: len(i[1]), reverse=True)[0]
        predicted.append(tuple([predicted_relation, subject_mid, object_mids]))
    else:
        predicted.append(tuple([None, None, None]))

evaluate(predicted)

HBox(children=(IntProgress(value=0, max=10845), HTML(value='')))




HBox(children=(IntProgress(value=0, max=10845), HTML(value='')))


Subject & Relation Accuracy (SOTA 78.7%): 0.390134 [4231 of 10845]
Object Accuracy: 0.432918 [4695 of 10845]
Relation Accuracy (SOTA 88.4%): 0.416690 [4519 of 10845]
Subject Accuracy (SOTA 79%): 0.769756 [8348 of 10845]
Correct Interpretation: 0.000184 [2 of 10845]
No Interpretation: 0.021669 [235 of 10845]
Negative Sample:
| Index | Bucket | Closest False Predicate | True Relation Count | Question | True Relation | False Relation | Subject Name | Predicted Subject Name |
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
| 0 | nan | ['name a <e> restaurant .', 'what restaurant serves <e> cuisine', 'which restaurant serves <e> ?'] | 13 | Name an American Thoroughbread racehorse  | biology/organism_classification/organisms_of_this_type | dining/cuisine/restaurant | nan | american |
| 1 | nan | ['what kind of game is <e> ?', 'what kind of game is <e>', 'what kind of gameplay is <e>'] | 335 | what kind of game is vision racing driving simulator? | cvg/computer_videogame/cvg_genre | 

**Numbers:**

- Subject & Relation Accuracy (SOTA 78.7%): 0.781067 [16939 of 21687]
- Object Accuracy: 0.835662 [18123 of 21687]
- Relation Accuracy (SOTA 88.4%): 0.881680 [19121 of 21687]
- Subject Accuracy (SOTA 79%): 0.830451 [18010 of 21687]
- Correct Interpretation: 0.915479 [19854 of 21687]
- No Interpretation: 0.019413 [421 of 21687]

**Discussion:**

Following the error bucket given that 14/50 were marked correctly, we revaluate our model at 0.065108 * (14/ 50) + 0.915479 = 0.93370924% correct.

Given that 34/50 are incorrect, excluding noise, we find that there exists at most 0.065108 * (34/50) = 4.4\% room for improvement with 0.02083456 (2.1%) due to modeling low shot relations.

**Error Buckets: (1 - (0.019413 + 0.915479) = 0.065108% of Examples)**

**Buckets:**
- Incorrect, Low (16/50): The relation choosen was incorrect and there are less than 7 examples in the training data for the true relation.
- Correct (14/50): The model interpreted the question correctly. 
- Incorrect (36/50): The model interpreted the question incorrectly.
- Incorrect, Noise (2/50): The question was interpreted incorrectly and did not make sense.
- Incorrect, Relation (30/50): The model failed to get the correct relation.
- Incorrect, Subject (14/50): The model failed to get the correct subject.


| Index | Bucket | Closest False Predicate | True Relation Count | Question | True Relation | False Relation | Subject Name | Predicted Subject Name |
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
| 0 | Correct | ['what town is in <e>', 'which city is in <e>', 'what town is in <e> ?'] | 39 | which town is in new york city | location/place_with_neighborhoods/neighborhoods | location/location/contains | new york city | new york city |
| 1 | Incorrect, Low, Relation | ['what types of hotel is <e>', 'what type of location is the <e>', 'what accommodation type is <e> ?'] | 1 | who operates the fairfield inn & suites austin northwest/arboretum | travel/hotel/operated_by | travel/accommodation/accommodation_type | fairfield inn & suites austin northwest/arboretum | fairfield inn & suites austin northwest / arboretum |
| 2 | Incorrect, Subject, Relation | ['what is the license for <e>', 'what license does <e> have', 'what license does <e> have'] | 30 | whats the name of a law enforcement agency | organization/organization_sector/organizations_in_this_sector | computer/software/license | law enforcement agency | enforcement |
| 3 | Incorrect, Subject, Relation | ['what is a notable type of <e> ?', 'what is a notable type of a <e> ?', 'what is a notable type about <e> ?'] | 1243 | What is an island located in united states minor outlying islands? | location/location/contains | common/topic/notable_types | united states minor outlying islands | outlying islands |
| 4 | Correct | ['in which country was <e> album released', 'in which country was <e> album released', 'in what country was <e> album released'] | 761 | in which country was the greatest hits album released | music/release/region | music/release/region | greatest hits | the greatest hits |
| 5 | Incorrect, Low, Relation | ["what 's an example of a <e> organization"] | 2 | What's an ad campaign focused on health care | user/micahsaul/advertising/advertised_thing/ad_campaigns | government/government_service/operating_organization | health care | health care |
| 6 | Incorrect | ['what is an album recorded by <e> ?', 'what is an album recorded by <e> ?', 'what is an album recorded by <e> ?'] | 1044 | jackie is an album recorded by what artist? | music/album/artist | music/artist/album | jackie | jackie |
| 7 | Correct | ['what is a live <e> by archer prewitt ?', 'what is an <e> released by a dominican singer in 1998', 'what is an <e> by jetplane landing ?'] | 656 | what is the cover album by Japanese pop singer Ken Hirai | music/artist/album | music/album_release_type/albums | ken hirai | album |
| 8 | Incorrect, Low, Relation | ['what is the area code for <e>', 'what is the location of <e>', 'what is the location of <e>'] | 0 | what is the parent institution for cohn-haddow center for judaic studies | base/jewlib/research_center/parent_institution | location/location/containedby | cohn-haddow center for judaic studies | cohn - haddow center for judaic studies |
| 9 | Incorrect, Low, Relation | ['where released <e>', 'where is <e>', 'where is <e> from'] | 5 | where is produced by closer | theater/theater_production/performed_at | music/release/region | closer | closer |
| 10 | Incorrect, Relation | ['which state is <e> located in', 'which state is <e> located in ?', 'which state is <e> in'] | 2472 | Which country is kenai fjords national park located in | location/location/containedby | base/usnationalparks/us_national_park/state | kenai fjords national park | kenai fjords national park |
| 11 | Incorrect, Relation | ['who was born in the <e> ?', 'who is an author from <e> ?', 'who was born in the <e> ?'] | 37 | What is a color of the st helens rlfc | sports/sports_team/colors | location/location/people_born_here | st helens rfc | st helens |
| 12 | Incorrect, Subject, Relation | ['which city is in <e>', 'what city is in <e> ?', 'what city is in <e> ?'] | 16 | which city is river dee, aberdeenshire in  | geography/river/cities | location/location/contains | river dee | dee , aberdeenshire |
| 13 | Incorrect, Low, Relation | ['what country is <e> in', 'what does <e> pertain to', 'what does the <e> belong to'] | 1 | which city is in bosnian county | base/aareas/schema/administrative_area_type/subdivides_place | base/aareas/schema/administrative_area_type/pertains_to | bosnian county | bosnian county |
| 14 | Correct | ['what classification is <e>', 'what kind of classification is <e> given', 'what is the classification of <e>'] | 159 | what classification is the drug sodium sulfacetamide and sulfur 0.05 kit | medicine/drug_formulation/legal_status | medicine/drug_formulation/drug_category | sodium sulfacetamide and sulfur 0.05 kit | sodium sulfacetamide and sulfur 0.05 kit |
| 15 | Incorrect, Low, Subject, Relation | ["what is <e> 's gender identity", "what is <e> 's gender identity", "what is <e> 's sex ?"] | 0 | what is mandy bright's sexual orientation  | base/adultentertainment/adult_entertainer/sexual_orientation | people/person/gender | mandy | mandy bright |
| 16 | Correct | ['which releases has <e> as a track ?', 'which release has the track <e>', 'which releases has the track <e> ?'] | 646 | which releases have gold as the track? | music/recording/releases | music/release_track/release | gold | gold |
| 17 | Incorrect, Subject, Relation | ['what movie was directed by <e>', 'what movie was directed by <e>', 'what film was directed by <e>'] | 1070 | what thriller was directed by Félix Enríquez Alcalá | film/film_genre/films_in_this_genre | film/director/film | thriller | felix enriquez alcala |
| 18 | Incorrect, Subject, Relation | ['what is the country of origin of the movie <e> ?', 'what is the country of origin of the film <e> ?', 'what is the country of origin for the film <e> ?'] | 19 | what's the first episode of the fifth season of deja vu (the outer limits) | tv/tv_series_episode/next_episode | film/film/country | deja vu | the fifth season of |
| 19 | Incorrect, Relation | ['where did the film <e> originate ?', 'where did the film <e> originate ?', 'where did the film <e> originate ?'] | 441 | Where is the artist the lab originate  | music/artist/origin | film/film/country | the lab | the lab |
| 20 | Incorrect, Low, Relation | ['what type of storage does the <e> support', 'what type of storage card does the <e> use', 'what types of card does <e> need'] | 2 | What type of sensor does the nikon d3 have? | digicams/digital_camera/sensor_type | digicams/digital_camera/supported_storage_types | nikon d3 | nikon d3 |
| 21 | Incorrect, Low, Relation | ['what color is <e>', 'what color is worn by <e> ?', 'what color is <e>'] | 7 | What color of horse is george washington | base/thoroughbredracing/thoroughbred_racehorse/color | education/educational_institution/colors | george washington | george washington |
| 22 | Correct | ['the song <e> is from which album', '<e> is from what album', 'what is a song from the album <e>'] | 676 | die in your arms is the single from which album  | music/release_track/release | music/release_track/recording | die in your arms | die in your arms |
| 23 | Incorrect, Subject, Relation | ['what is the title of a film with the <e> genre ?', 'what is the name of a film where the genre is <e>', 'what is the name of an italian <e> ?'] | 1070 | What is the drama film where black-and-white technique was used? | film/film_genre/films_in_this_genre | film/film_genre/films_in_this_genre | black-and-white | drama film |
| 24 | Incorrect | ['what entity governs the <e>', 'what is the governing body over the <e>', 'who is in charge of the <e>'] | 123 | what American politician gave her name for the eleanor roosevelt national historic site | symbols/namesake/named_after | protected_sites/protected_site/governing_body | eleanor roosevelt national historic site | eleanor roosevelt national historic |
| 25 | Incorrect, Subject | ["what is <e> 's profession ?", "what is <e> 's profession ?", "what is <e> 's profession ?"] | 847 | What is aninuddha roy chowdhury's profession? | people/person/profession | common/topic/notable_types | aniruddha roy chowdhury | chowdhury |
| 26 | Correct | ['what time of media is <e>', 'what type of music is <e>', 'what type of model is <e>'] | 6 | What type of media is lemon | type/content/media_type | common/topic/notable_types | lemon | lemon |
| 27 | Incorrect, Low, Subject, Relation | ['what fires were started as a result of <e>'] | 1 | what scandinavian airlines flight was involved in an accident | aviation/airline/accidents | base/fires/fire_cause/fires_caused_this_way | scandinavian airlines | accident |
| 28 | Correct | ['what mountains are located in the <e>', 'what mountain is located in the <e>', 'what mountain is in the <e> ?'] | 1243 | what mountains are located within white mountains? | location/location/contains | geography/mountain_range/mountains | white mountains | white mountains |
| 29 | Incorrect, Noise, Relation | ['what is a <e> group', 'what is a <e> band', "what 's a <e> band"] | 727 | what is an electro-industrial genre  | music/genre/albums | music/genre/artists | electro-industrial | electro - industrial |
| 30 | Incorrect, Relation | ['what is a mode of <e>', 'what is a mode in <e>', 'what is a feature of <e>'] | 169 | What is an adventure cvg game? | cvg/cvg_genre/games | cvg/computer_videogame/gameplay_modes | adeventure | adventure |
| 31 | Correct | ['name a film that was shown at the <e>', 'what film was shown at the <e>', 'what film was shown at the <e>'] | 19 | What's a movie that was shown at the 2010 fantasia festival | film/film_festival_event/films | film/film_festival_event/films | 2010 fantasia film festival | 2010 fantasia festival |
| 32 | Incorrect, Subject, Relation | ['name an <e> person', 'name a <e> .', 'who is a <e> mayor'] | 1070 | name a 2007 American film parody  | film/film_genre/films_in_this_genre | people/ethnicity/people | parody | american |
| 33 | Incorrect, Low, Relation | ['which german city is <e> from', 'which european city is <e> from', 'which state is <e> from'] | 1 | which department contains caroline bynum | education/academic/departments_old | people/person/place_of_birth | caroline bynum | caroline bynum |
| 34 | Correct | ["what 's one of the songs on the album <e>", "what 's one of the songs on the album <e>", "what 's one of the songs on the release <e>"] | 411 | What is one of the songs on the free album? | music/release_track/recording | music/release/track | free | free |
| 35 | Incorrect, Subject | ['is <e> pop music or folk', 'is <e> rock music or hip - hop', 'is <e> dance - pop or k - pop'] | 1553 | is jane child pop music or j-pop | music/artist/genre | music/artist/genre | jane child | jane |
| 36 | Incorrect, Low, Subject, Relation | ['who is the featured artist on the album <e> ?', 'what pretty girl rock singer is featured on <e>', 'who is a featured artist on the album <e> ?'] | 7 | what is the tittle of an entrepreneur who opens and runs restaurants professionally | people/profession/specializations | music/album/featured_artists | entrepreneur | opens |
| 37 | Incorrect, Low, Relation | ['what is the country code for <e>', 'what is the country code for <e>', 'what are the expected codes of <e> ?'] | 1 | which country locates japanese city | base/aareas/schema/administrative_area_type/iso_country | base/aareas/schema/administrative_area_type/expected_codes | japanese city | japanese city |
| 38 | Incorrect, Low, Relation | ['which biological process is <e> a part of'] | 0 | What kind of gas does a nitrogen fixation produce | base/process/process/output | base/change/process/part_of_process | nitrogen fixation | nitrogen fixation |
| 39 | Correct | ['what type of game is <e>', 'what type of game is <e>', 'what type of game is <e>'] | 226 | what type of game is the ncaa football 07 | cvg/computer_videogame/gameplay_modes | cvg/computer_videogame/cvg_genre | ncaa football 07 | ncaa football 07 |
| 40 | Incorrect, Low, Relation | ['what is the name of a woman born in <e> ?', 'what is the name of a man born in <e> ?', 'who was the scientist born in <e> ?'] | 2 | Who was the father of the famous horse Cincinnati? | biology/organism/parents | location/location/people_born_here | cincinnati | cincinnati |
| 41 | Incorrect, Low, Relation | ['what event took place in <e> in 1993 ?', 'what event took place at <e> in 1943 ?', 'what event took place in <e> ?'] | 0 | what event tournament did uganda participate in | sports/sport_country/multi_event_tournaments_participated_in | location/location/events | uganda | uganda |
| 42 | Correct | ['what is the genre of <e> .', 'what is the genre of <e> ?', 'what is the genre of <e> ?'] | 1553 | what is the genre of trout fishing in america's work | music/artist/genre | book/book/genre | trout fishing in america | trout fishing in america |
| 43 | Incorrect, Relation | ['who is a parent of <e>', 'who is father of <e>', 'who is a parent of <e>'] | 114 | who boradcasts olivia newton-john | broadcast/artist/content | people/person/parents | olivia newton-john | olivia newton - john |
| 44 | Incorrect, Subject | ['where is the movie <e> from', 'where is the movie <e> from ?', 'where is the film <e> from'] | 1374 | where is the movie music for madame from | film/film/country | film/film/country | music for madame | madame |
| 45 | Correct | ['where was <e> from', 'where is <e> from', 'where is <e> from'] | 2720 | where was nat king cole from | people/person/place_of_birth | music/release/region | nat \"king\" cole | nat king cole |
| 46 | Incorrect, Noise | ['what is the ninth track on the album <e>', 'what was a track on the album <e>', 'what was a track on the album <e>'] | 679 | travellin light was the fourth track on this album. | music/release/track_list | music/release/track | travellin' light | travellin light |
| 47 | Incorrect, Relation, Subject | ["what 's a drug with <e> in it", "what 's a drug that uses <e>", 'what drug has <e> as the active ingredient'] | 23 | What's a hydrogen peroxide formula available in liquid | medicine/drug_dosage_form/formulations_available_in_this_form | medicine/drug_ingredient/active_ingredient_of_formulation | liquid | hydrogen peroxide |
| 48 | Incorrect, Low, Relation | ['does <e> follow taoism or buddhism', "what is <e> 's theology", '<e> ascribes to what church ?'] | 1 | does gerald j. rip work at the federal court | base/morelaw/canadian_judge/court | people/person/religion | gerald j. rip | gerald j. rip |
| 49 | Correct | ['<e> is what gender ?', '<e> is what gender ?', '<e> self - identifies as which gender ?'] | 1928 | john binder identifies with what gender? | people/person/gender | people/person/gender | john binden | john binder |

### Version: Softmax & Most Facts & Entity Relation Distribution & Ensemble

In version 3, we try to ensemble 3 similary performing softmax models.

In [34]:
from tqdm import tqdm_notebook
import random

predicted = []
for index, row in tqdm_notebook(df.iterrows(), total=df.shape[0]):
    if len(row['candidate_facts']) != 0:
        candidate_relations = list(row['candidate_facts'].keys())
        kg_relation_probability = [sum(len(row['candidate_facts'][r][mid]) for mid in row['candidate_facts'][r])
                                     for r in candidate_relations]
        scores = [(row['softmax_ensemble_scores'][i] *
                   transform_probability_from_kg_to_train[r] *
                   kg_relation_probability[i]) for i, r in enumerate(candidate_relations)]
        max_score = max(scores)
        predicted_relations = [r for i, r in enumerate(candidate_relations) if scores[i] == max_score]
        predicted_relation = random.choice(predicted_relations)
        
        # We use the `Better than random guessing` from notebook 
        # `HYPOTHESIS - Question Refers to Multiple Subjects`.
        subject_mid, object_mids = sorted(row['candidate_facts'][predicted_relation].items(),
                                          key=lambda i: len(i[1]), reverse=True)[0]
        predicted.append(tuple([predicted_relation, subject_mid, object_mids]))
    else:
        predicted.append(tuple([None, None, None]))

evaluate(predicted)

HBox(children=(IntProgress(value=0, max=10845), HTML(value='')))




HBox(children=(IntProgress(value=0, max=10845), HTML(value='')))


Subject & Relation Accuracy (SOTA 78.7%): 0.390134 [4231 of 10845]
Object Accuracy: 0.432918 [4695 of 10845]
Relation Accuracy (SOTA 88.4%): 0.416690 [4519 of 10845]
Subject Accuracy (SOTA 79%): 0.769756 [8348 of 10845]
Correct Interpretation: 0.000184 [2 of 10845]
No Interpretation: 0.021669 [235 of 10845]
Negative Sample:
| Index | Bucket | Closest False Predicate | True Relation Count | Question | True Relation | False Relation | Subject Name | Predicted Subject Name |
| --- | --- | --- | --- | --- | --- | --- | --- | --- |
| 0 | nan | ['name a <e> restaurant .', 'what restaurant serves <e> cuisine', 'which restaurant serves <e> ?'] | 13 | Name an American Thoroughbread racehorse  | biology/organism_classification/organisms_of_this_type | dining/cuisine/restaurant | nan | american |
| 1 | nan | ['what kind of game is <e> ?', 'what kind of game is <e>', 'what kind of gameplay is <e>'] | 335 | what kind of game is vision racing driving simulator? | cvg/computer_videogame/cvg_genre | 

In [35]:
curs = connection.cursor()
curs.execute("ROLLBACK")
connection.commit()