In [2]:
import pandas as pd
import json
from sklearn.metrics import jaccard_score
import numpy as np

In [11]:
dbpedia_df = pd.read_csv("../datasets/DBpedia-entity-replacements/dbpedia-annotated.csv", sep="|")

dbpedia_df['original_id'] = dbpedia_df.id.apply(lambda x: x.split('-')[0])

In [12]:
faketest_ids = dbpedia_df.sample(4400, random_state=42).original_id.values # Test set from organizers has ca. 4400 examples

In [20]:
len(faketest_ids)

4400

In [22]:
faketest_ids[0]

'dbpedia_7113'

In [15]:
dbpedia_json = None

with open('../datasets/DBpedia/smarttask_dbpedia_train.json') as json_file:
    dbpedia_json = json.load(json_file)

In [21]:
dbpedia_json[0]

{'id': 'dbpedia_1177',
 'question': 'Was Jacqueline Kennedy Onassis a follower of Melkite Greek Catholic Church?',
 'category': 'boolean',
 'type': ['boolean']}

In [16]:
faketest = list()
added_ids = list()

for q in dbpedia_json:
    if q['id'] in faketest_ids and q['id'] not in added_ids:
        faketest.append(q)
        added_ids.append(q['id'])

In [17]:
len(faketest)

1902

In [18]:
with open('../datasets/DBpedia/faketest_dbpedia-annotated.json', 'w') as json_file:
    json.dump(faketest, json_file)

In [34]:
"dbpedia_8947" in faketest_ids

True

In [20]:
faketest_preds_json = None

with open('../evaluation/faketest_dbpedia_pred.json') as json_file:
    faketest_preds_json = json.load(json_file)

In [21]:
len(faketest_preds_json)

4400

## Overlap and similarity cheeeck

In [79]:
faketest_preds_mult = None
faketest_preds_en = None
faketest = None

with open('../evaluation/faketest_dbpedia_pred_10.json') as json_file:
    faketest_preds_mult = json.load(json_file)
    
with open('../evaluation/faketest_dbpedia_pred_8.json') as json_file:
    faketest_preds_en = json.load(json_file)
    
with open('../evaluation/faketest_dbpedia.json') as json_file:
    faketest = json.load(json_file)

In [80]:
def get_value(key, json_list, prop):
    for i in json_list:
        if i['id'] == key:
            return i[prop]
    return None

def jaccard_similarity(list1, list2):
    intersection = len(list(set(list1).intersection(list2)))
    union = (len(set(list1)) + len(set(list2))) - intersection
    return float(intersection) / union

In [84]:
ids = list()
mult = list()
en = list()
mult_jacc = list()
en_jacc = list()
diff_jacc = list()

for i in range(len(faketest)):
    id_ = faketest[i]['id']
    
    en_val = get_value(id_, faketest_preds_en,'category') 
    mult_val = get_value(id_, faketest_preds_mult, 'category')
    en_type = get_value(id_, faketest_preds_en,'type') 
    mult_type = get_value(id_, faketest_preds_mult, 'type')
    
    if en_val and mult_val:
        ids.append(id_)
        mult.append(mult_val == faketest[i]['category'])
        en.append(en_val == faketest[i]['category'])
        mult_jacc.append(jaccard_similarity(faketest[i]['type'], mult_type))
        en_jacc.append(jaccard_similarity(faketest[i]['type'], en_type))
        diff_jacc.append(jaccard_similarity(mult_type, en_type))
    else:
        print("No prediction for id", id_)

In [85]:
nonequal = dict()

for i in range(len(ids)):
    if mult[i] != en[i]:
        nonequal[ids[i]] = (mult[i], en[i])

In [89]:
print("There is {0} non-equal answers out of {1}, possible % of increase is {2}".format(len(nonequal.keys()), len(ids), len(nonequal.keys())/len(ids)*100))
print("Average similarity between types predictions of model 1 and model 2: {0} and {1}".format(np.array(mult_jacc).mean(), np.array(en_jacc).mean()))
print("Jaccard Index between predictions {0}".format(np.array(diff_jacc).mean()))

There is 85 non-equal answers out of 4507, possible % of increase is 1.88595518082982
Average similarity between types predictions of model 1 and model 2: 0.8007238299505883 and 0.6289188599039942
Jaccard Index between predictions 0.6512773780468478


In [78]:
get_value("dbpedia_13246", faketest, 'question')

In [45]:
nonequal

{'dbpedia_13246': (True, False),
 'dbpedia_18069': (True, False),
 'dbpedia_10266': (True, False),
 'dbpedia_14087': (False, True),
 'dbpedia_13830': (True, False),
 'dbpedia_6882': (True, False),
 'dbpedia_23488': (False, True),
 'dbpedia_4700': (True, False),
 'dbpedia_8346': (True, False),
 'dbpedia_10188': (False, True),
 'dbpedia_6154': (True, False),
 'dbpedia_3413': (True, False),
 'dbpedia_11922': (True, False),
 'dbpedia_12311': (True, False),
 'dbpedia_17978': (True, False),
 'dbpedia_13597': (True, False),
 'dbpedia_17657': (True, False),
 'dbpedia_17009': (False, True),
 'dbpedia_2966': (False, True),
 'dbpedia_1160': (True, False),
 'dbpedia_19579': (False, True),
 'dbpedia_16689': (False, True),
 'dbpedia_14856': (False, True),
 'dbpedia_1261': (False, True),
 'dbpedia_1819': (True, False),
 'dbpedia_8233': (True, False),
 'dbpedia_20667': (True, False),
 'dbpedia_1756': (True, False),
 'dbpedia_6989': (True, False),
 'dbpedia_4380': (True, False),
 'dbpedia_19901': (Fals