### Explore WNED data

In [1]:
import os
rawtext_dataset = "../../../../WNED/wned-datasets/wikipedia/RawText/"
xml_path = "../../../../WNED/wned-datasets/wikipedia/wikipedia.xml"
tsv_path = "../../../../WNED/wned-datasets/wikipedia/wikipedia-name2bracket.tsv"

In [9]:
len(os.listdir(rawtext_dataset))

345

In [11]:
import pandas as pd
xml = pd.read_xml(xml_path)

In [16]:
from xml.etree import ElementTree as ET

xml = ET.parse(xml_path)

In [18]:
root = xml.getroot()
root

<Element 'wikipediaData.entityAnnotation' at 0x7fd0e31b4040>

In [33]:
mention_and_entity = {'mention':[], 'entity':[]}
for child in root:
    for ann in child:
        mention_and_entity['mention'].append(ann[0].text) # mention
        mention_and_entity['entity'].append(ann[1].text)  # wikiname

In [34]:
mention_entity_dataset = pd.DataFrame(data=mention_and_entity, columns=['mention','entity'])

In [36]:
mention_entity_dataset['uri'] = mention_entity_dataset['entity'].apply(
    lambda x: "http://dbpedia.org/resource/"+"_".join(x.split(" "))
    )

In [37]:
mention_entity_dataset

Unnamed: 0,mention,entity,uri
0,Tasmanian Legislative Council,Tasmanian Legislative Council,http://dbpedia.org/resource/Tasmanian_Legislat...
1,upper house,Tasmanian Legislative Council,http://dbpedia.org/resource/Tasmanian_Legislat...
2,Murchison,Electoral division of Murchison,http://dbpedia.org/resource/Electoral_division...
3,Pipers River,"Pipers River, Tasmania","http://dbpedia.org/resource/Pipers_River,_Tasm..."
4,Scottsdale,"Scottsdale, Tasmania","http://dbpedia.org/resource/Scottsdale,_Tasmania"
...,...,...,...
6816,Miko,Miko,http://dbpedia.org/resource/Miko
6817,Rattan,Rattan,http://dbpedia.org/resource/Rattan
6818,Gold leaf,Gold leaf,http://dbpedia.org/resource/Gold_leaf
6819,male,Male,http://dbpedia.org/resource/Male


In [38]:
mention_entity_dataset.to_csv("./wned_wiki_dataset.csv")

### Evaluate methods

In [1]:
import sys
sys.path.append("../../../GSoC23/")

#### Lookup takes around 120 minutes for 6821 entities.

In [47]:
# Lookup
lookup_results = []
from EntityLinking.ELmethods import EL_DBpedia_lookup    

In [53]:
import time
print("Entity linking using lookup")
start = time.time()
for i,ent_mention in enumerate(mention_entity_dataset['mention'].to_list()[122:]):
    result = EL_DBpedia_lookup(ent_mention, max_results=1)
    if len(result)==0:
        lookup_results.append("NaN")
    else:
        lookup_results.append(result[0])
    if (i+1)%50==0 and i!=0:
        end = time.time()
        print(f"Time taken for 50 entities: {end-start} seconds")
        start = time.time()

Entity linking using lookup
Time taken for 50 entities: 52.619186878204346 seconds
Time taken for 50 entities: 50.00544810295105 seconds
Time taken for 50 entities: 53.38618850708008 seconds
Time taken for 50 entities: 52.81750750541687 seconds
Time taken for 50 entities: 49.695708990097046 seconds
Time taken for 50 entities: 51.184513330459595 seconds
Time taken for 50 entities: 54.49206233024597 seconds
Time taken for 50 entities: 49.24910044670105 seconds
Time taken for 50 entities: 50.16962027549744 seconds
Time taken for 50 entities: 47.4178249835968 seconds
Time taken for 50 entities: 53.95074105262756 seconds
Time taken for 50 entities: 49.91064786911011 seconds
Time taken for 50 entities: 51.1007022857666 seconds
Time taken for 50 entities: 53.22743201255798 seconds
Time taken for 50 entities: 49.382572174072266 seconds
Time taken for 50 entities: 50.55260920524597 seconds
Time taken for 50 entities: 59.80045127868652 seconds
Time taken for 50 entities: 52.469740867614746 secon

In [54]:
len(lookup_results)

6821

In [52]:
EL_DBpedia_lookup(mention_entity_dataset['mention'].to_list()[122], max_results=1)

[]

In [55]:
mention_entity_dataset['lookup_results'] = lookup_results

In [56]:
mention_entity_dataset.to_csv("./wned_wiki_dataset.csv")

In [58]:
acc = len(mention_entity_dataset[mention_entity_dataset['uri']==mention_entity_dataset['lookup_results']])/len(mention_entity_dataset)

In [59]:
acc

0.0035185456677906467

In [60]:
acc*len(mention_entity_dataset)

24.0

#### Redis db takes around 1.5 minutes for 6821 entities

In [2]:
# Using the surface-forms redis database
from redis import StrictRedis
from EntityLinking.ELmethods import EL_redis_db    
redis_forms = StrictRedis(host='172.17.0.2', port=7979, db=0)
redis_redir = StrictRedis(host='172.17.0.2', port=7979, db=1)

In [8]:
from EntityLinking.utilities import get_majority_vote

In [19]:
import time
import pandas as pd
mention_entity_dataset = pd.read_csv("./wned_wiki_dataset.csv")
redis_results = []

In [20]:
# Let us take top 2 or 3 for redis db
print("Entity linking using redis surface forms")
start = time.time()
for i,ent_mention in enumerate(mention_entity_dataset['mention'].to_list()):
    result = EL_redis_db(ent_mention, redis_forms, redis_redir).index.values
    if len(result)==0:
        redis_results.append("NaN")
    else:
        redis_results.append(result[0])
    if (i+1)%50==0 and i!=0:
        end = time.time()
        print(f"Time taken for 50 entities: {end-start} seconds")
        start = time.time()

Entity linking using redis surface forms
Time taken for 50 entities: 1.096778154373169 seconds
Time taken for 50 entities: 3.0428106784820557 seconds
Time taken for 50 entities: 1.434004783630371 seconds
Time taken for 50 entities: 2.1390323638916016 seconds
Time taken for 50 entities: 0.533322811126709 seconds
Time taken for 50 entities: 2.5464718341827393 seconds
Time taken for 50 entities: 0.5816860198974609 seconds
Time taken for 50 entities: 0.5372681617736816 seconds
Time taken for 50 entities: 1.1599137783050537 seconds
Time taken for 50 entities: 0.9162073135375977 seconds
Time taken for 50 entities: 0.7069635391235352 seconds
Time taken for 50 entities: 0.798771858215332 seconds
Time taken for 50 entities: 1.9460337162017822 seconds
Time taken for 50 entities: 2.2771129608154297 seconds
Time taken for 50 entities: 1.1284294128417969 seconds
Time taken for 50 entities: 0.5680899620056152 seconds
Time taken for 50 entities: 0.6935007572174072 seconds
Time taken for 50 entities: 

In [21]:
len(redis_results)

6821

In [22]:
mention_entity_dataset['redis_results'] = redis_results

In [26]:
mention_entity_dataset['redis_results'] = mention_entity_dataset['redis_results'].apply(
    lambda x: "http://dbpedia.org/resource/"+x
    )

In [27]:
acc = len(mention_entity_dataset[mention_entity_dataset['uri']==mention_entity_dataset['redis_results']])/len(mention_entity_dataset)

In [28]:
# Much higher than lookup
acc

0.6182377950447149

In [43]:
acc*6821

4217.0

### Some exploration

In [29]:
mention_entity_dataset.to_csv("./wned_wiki_dataset.csv")

In [32]:
mention_entity_dataset[['uri','redis_results','lookup_results']]

Unnamed: 0,uri,redis_results,lookup_results
0,http://dbpedia.org/resource/Tasmanian_Legislat...,http://dbpedia.org/resource/Tasmanian_Legislat...,http://dbpedia.org/resource/Members_of_the_Tas...
1,http://dbpedia.org/resource/Tasmanian_Legislat...,http://dbpedia.org/resource/Upper_house,http://dbpedia.org/resource/Upper_house
2,http://dbpedia.org/resource/Electoral_division...,http://dbpedia.org/resource/Murchison_bioregion,http://dbpedia.org/resource/Roderick_Murchison
3,"http://dbpedia.org/resource/Pipers_River,_Tasm...",http://dbpedia.org/resource/Pipers_River,http://dbpedia.org/resource/Pipers_River
4,"http://dbpedia.org/resource/Scottsdale,_Tasmania","http://dbpedia.org/resource/Scottsdale,_Arizona","http://dbpedia.org/resource/Scottsdale,_Arizona"
...,...,...,...
6816,http://dbpedia.org/resource/Miko,http://dbpedia.org/resource/Miko,['http://dbpedia.org/resource/Kannazuki_no_Miko']
6817,http://dbpedia.org/resource/Rattan,http://dbpedia.org/resource/Rattan,['http://dbpedia.org/resource/Rattan']
6818,http://dbpedia.org/resource/Gold_leaf,http://dbpedia.org/resource/Gold_leaf,['http://dbpedia.org/resource/Team_Lotus']
6819,http://dbpedia.org/resource/Male,http://dbpedia.org/resource/Male,['http://dbpedia.org/resource/Male']


In [50]:
mention_entity_dataset['lookup_results'] = mention_entity_dataset['lookup_results'].apply(
    lambda x: x.strip("[").strip("'").strip("]") if type(x)==str else x
)

In [52]:
mention_entity_dataset['lookup_results'] = mention_entity_dataset['lookup_results'].apply(
    lambda x: x.strip("'") if type(x)==str else x
)

In [53]:
mention_entity_dataset

Unnamed: 0.1,Unnamed: 0,mention,entity,uri,lookup_results,redis_results
0,0,Tasmanian Legislative Council,Tasmanian Legislative Council,http://dbpedia.org/resource/Tasmanian_Legislat...,http://dbpedia.org/resource/Members_of_the_Tas...,http://dbpedia.org/resource/Tasmanian_Legislat...
1,1,upper house,Tasmanian Legislative Council,http://dbpedia.org/resource/Tasmanian_Legislat...,http://dbpedia.org/resource/Upper_house,http://dbpedia.org/resource/Upper_house
2,2,Murchison,Electoral division of Murchison,http://dbpedia.org/resource/Electoral_division...,http://dbpedia.org/resource/Roderick_Murchison,http://dbpedia.org/resource/Murchison_bioregion
3,3,Pipers River,"Pipers River, Tasmania","http://dbpedia.org/resource/Pipers_River,_Tasm...",http://dbpedia.org/resource/Pipers_River,http://dbpedia.org/resource/Pipers_River
4,4,Scottsdale,"Scottsdale, Tasmania","http://dbpedia.org/resource/Scottsdale,_Tasmania","http://dbpedia.org/resource/Scottsdale,_Arizona","http://dbpedia.org/resource/Scottsdale,_Arizona"
...,...,...,...,...,...,...
6816,6816,Miko,Miko,http://dbpedia.org/resource/Miko,http://dbpedia.org/resource/Kannazuki_no_Miko,http://dbpedia.org/resource/Miko
6817,6817,Rattan,Rattan,http://dbpedia.org/resource/Rattan,http://dbpedia.org/resource/Rattan,http://dbpedia.org/resource/Rattan
6818,6818,Gold leaf,Gold leaf,http://dbpedia.org/resource/Gold_leaf,http://dbpedia.org/resource/Team_Lotus,http://dbpedia.org/resource/Gold_leaf
6819,6819,male,Male,http://dbpedia.org/resource/Male,http://dbpedia.org/resource/Male,http://dbpedia.org/resource/Male


Corrected

In [54]:
mention_entity_dataset.to_csv("./wned_wiki_dataset.csv")

In [55]:
comparison = mention_entity_dataset[['uri','redis_results','lookup_results']]
comparison['uri'] = comparison['uri'].apply(lambda x: x[28:])
comparison['redis_results'] = comparison['redis_results'].apply(lambda x:x[28:])
comparison['lookup_results'] = comparison['lookup_results'].apply(
    lambda x: x[28:] if type(x)==str else x
    )

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  comparison['uri'] = comparison['uri'].apply(lambda x: x[28:])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  comparison['redis_results'] = comparison['redis_results'].apply(lambda x:x[28:])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  comparison['lookup_results'] = comparison['lookup_results'].ap

In [56]:
comparison

Unnamed: 0,uri,redis_results,lookup_results
0,Tasmanian_Legislative_Council,Tasmanian_Legislative_Council,"Members_of_the_Tasmanian_Legislative_Council,_..."
1,Tasmanian_Legislative_Council,Upper_house,Upper_house
2,Electoral_division_of_Murchison,Murchison_bioregion,Roderick_Murchison
3,"Pipers_River,_Tasmania",Pipers_River,Pipers_River
4,"Scottsdale,_Tasmania","Scottsdale,_Arizona","Scottsdale,_Arizona"
...,...,...,...
6816,Miko,Miko,Kannazuki_no_Miko
6817,Rattan,Rattan,Rattan
6818,Gold_leaf,Gold_leaf,Team_Lotus
6819,Male,Male,Male


In [57]:
lookup_acc = len(comparison[comparison['uri']==comparison['lookup_results']])/len(comparison)
redis_acc = len(comparison[comparison['uri']==comparison['redis_results']])/len(comparison)

In [58]:
lookup_acc, redis_acc

(0.4392317841958657, 0.6182377950447149)

In [61]:
redis_correct = comparison[comparison['uri']==comparison['redis_results']]
lookup_correct = comparison[comparison['uri']==comparison['lookup_results']]

In [64]:
both_redis_lookup_correct = comparison[
    (comparison['uri']==comparison['redis_results']) & (comparison['uri']==comparison['lookup_results'])
     ]

In [59]:
redis_correct_lookup_not = comparison[(comparison['uri']==comparison['redis_results']) & (comparison['uri']!=comparison['lookup_results'])]

In [66]:
lookup_correct_redis_not = comparison[(comparison['uri']!=comparison['redis_results']) & (comparison['uri']==comparison['lookup_results'])]

In [69]:
len(redis_correct_lookup_not)/6821, len(lookup_correct_redis_not)/6821

(0.2058349215657528, 0.02682891071690368)

In [71]:
len(redis_correct)/6821, len(lookup_correct)/6821, len(both_redis_lookup_correct)/6821

(0.6182377950447149, 0.4392317841958657, 0.41240287347896204)

In [70]:
len(comparison)

6821

In [72]:
mention_entity_dataset_check = pd.read_csv("./wned_wiki_dataset.csv")

In [73]:
mention_entity_dataset_check

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,mention,entity,uri,lookup_results,redis_results
0,0,0,Tasmanian Legislative Council,Tasmanian Legislative Council,http://dbpedia.org/resource/Tasmanian_Legislat...,http://dbpedia.org/resource/Members_of_the_Tas...,http://dbpedia.org/resource/Tasmanian_Legislat...
1,1,1,upper house,Tasmanian Legislative Council,http://dbpedia.org/resource/Tasmanian_Legislat...,http://dbpedia.org/resource/Upper_house,http://dbpedia.org/resource/Upper_house
2,2,2,Murchison,Electoral division of Murchison,http://dbpedia.org/resource/Electoral_division...,http://dbpedia.org/resource/Roderick_Murchison,http://dbpedia.org/resource/Murchison_bioregion
3,3,3,Pipers River,"Pipers River, Tasmania","http://dbpedia.org/resource/Pipers_River,_Tasm...",http://dbpedia.org/resource/Pipers_River,http://dbpedia.org/resource/Pipers_River
4,4,4,Scottsdale,"Scottsdale, Tasmania","http://dbpedia.org/resource/Scottsdale,_Tasmania","http://dbpedia.org/resource/Scottsdale,_Arizona","http://dbpedia.org/resource/Scottsdale,_Arizona"
...,...,...,...,...,...,...,...
6816,6816,6816,Miko,Miko,http://dbpedia.org/resource/Miko,http://dbpedia.org/resource/Kannazuki_no_Miko,http://dbpedia.org/resource/Miko
6817,6817,6817,Rattan,Rattan,http://dbpedia.org/resource/Rattan,http://dbpedia.org/resource/Rattan,http://dbpedia.org/resource/Rattan
6818,6818,6818,Gold leaf,Gold leaf,http://dbpedia.org/resource/Gold_leaf,http://dbpedia.org/resource/Team_Lotus,http://dbpedia.org/resource/Gold_leaf
6819,6819,6819,male,Male,http://dbpedia.org/resource/Male,http://dbpedia.org/resource/Male,http://dbpedia.org/resource/Male


### Evaluating GENRE on WNED

In [2]:
import pandas as pd
genre_predictions_2600 = pd.read_csv("./result_df_2600.csv")

In [4]:
genre_predictions_1300_5521 = pd.read_csv("./result_df_batch_27_onwards5521.csv")

In [7]:
genre_predictions_2600[1300:]

Unnamed: 0.1,Unnamed: 0,prediction
1300,0,Drainage basin
1301,1,Mississippi River
1302,2,Allegheny Plateau
1303,3,"Belmont County, Ohio"
1304,4,"Harrison County, Ohio"
...,...,...
2595,95,Princes Highway
2596,96,Dual carriageway
2597,97,"Bass Highway, Western Australia"
2598,98,"Cranbourne, Victoria"


In [6]:
genre_predictions_1300_5521

Unnamed: 0.1,Unnamed: 0,prediction
0,0,Drainage basin
1,1,Mississippi River
2,2,Allegheny Plateau
3,3,"Belmont County, Ohio"
4,4,"Harrison County, Ohio"
...,...,...
5516,16,Miko
5517,17,Rattan
5518,18,Gold leaf
5519,19,Male


In [17]:
len(genre_predictions_1300_5521), len(genre_predictions_2600)

(5521, 2600)

In [18]:
6821-2600

4221

In [19]:
5521-4221

1300

Because the first file had entries from 0 to 2600 and the second file had entries from 1300 to 6821

In [20]:
genre_predictions = pd.concat([genre_predictions_2600, genre_predictions_1300_5521[1300:]], ignore_index=True)

In [21]:
len(genre_predictions)

6821

In [23]:
genre_predictions.drop('Unnamed: 0', axis=1, inplace=True)

In [25]:
genre_predictions.to_csv("genre_predictions.csv")

In [31]:
wned = pd.read_csv("./wned_wiki_dataset.csv")

In [32]:
wned['genre_results'] = genre_predictions['prediction']

In [33]:
acc_genre = len(wned[wned['entity']==wned['genre_results']])/len(wned)

In [34]:
acc_genre

0.7880076235156136

In [36]:
wned.drop(['Unnamed: 0.1','Unnamed: 0'], axis=1, inplace=True)

In [37]:
wned

Unnamed: 0,mention,entity,uri,lookup_results,redis_results,genre_results
0,Tasmanian Legislative Council,Tasmanian Legislative Council,http://dbpedia.org/resource/Tasmanian_Legislat...,http://dbpedia.org/resource/Members_of_the_Tas...,http://dbpedia.org/resource/Tasmanian_Legislat...,Tasmanian Legislative Council
1,upper house,Tasmanian Legislative Council,http://dbpedia.org/resource/Tasmanian_Legislat...,http://dbpedia.org/resource/Upper_house,http://dbpedia.org/resource/Upper_house,Western Australian Legislative Council
2,Murchison,Electoral division of Murchison,http://dbpedia.org/resource/Electoral_division...,http://dbpedia.org/resource/Roderick_Murchison,http://dbpedia.org/resource/Murchison_bioregion,Electoral district of Murchison
3,Pipers River,"Pipers River, Tasmania","http://dbpedia.org/resource/Pipers_River,_Tasm...",http://dbpedia.org/resource/Pipers_River,http://dbpedia.org/resource/Pipers_River,"Pipers River, Tasmania"
4,Scottsdale,"Scottsdale, Tasmania","http://dbpedia.org/resource/Scottsdale,_Tasmania","http://dbpedia.org/resource/Scottsdale,_Arizona","http://dbpedia.org/resource/Scottsdale,_Arizona","Scottsdale, Tasmania"
...,...,...,...,...,...,...
6816,Miko,Miko,http://dbpedia.org/resource/Miko,http://dbpedia.org/resource/Kannazuki_no_Miko,http://dbpedia.org/resource/Miko,Miko
6817,Rattan,Rattan,http://dbpedia.org/resource/Rattan,http://dbpedia.org/resource/Rattan,http://dbpedia.org/resource/Rattan,Rattan
6818,Gold leaf,Gold leaf,http://dbpedia.org/resource/Gold_leaf,http://dbpedia.org/resource/Team_Lotus,http://dbpedia.org/resource/Gold_leaf,Gold leaf
6819,male,Male,http://dbpedia.org/resource/Male,http://dbpedia.org/resource/Male,http://dbpedia.org/resource/Male,Male


In [38]:
wned.to_csv("./wned_wiki_dataset_results.csv")

### Exploring results from all methods

In [18]:
import pandas as pd
wned_data = pd.read_csv("./wned_wiki_dataset_results.csv")
wned_sentence_data = pd.read_csv("./genre_tagged_wned_dataset.csv")
wned_data['tagged_sentence'] = wned_sentence_data['tagged_sentence']
wned_data.to_csv("./wned_wiki_dataset_results.csv")

In [19]:
wned_data.head()

Unnamed: 0.1,Unnamed: 0,mention,entity,uri,lookup_results,redis_results,genre_results,tagged_sentence
0,0,Tasmanian Legislative Council,Tasmanian Legislative Council,http://dbpedia.org/resource/Tasmanian_Legislat...,http://dbpedia.org/resource/Members_of_the_Tas...,http://dbpedia.org/resource/Tasmanian_Legislat...,Tasmanian Legislative Council,The Electoral division of Apsley is one of the...
1,1,upper house,Tasmanian Legislative Council,http://dbpedia.org/resource/Tasmanian_Legislat...,http://dbpedia.org/resource/Upper_house,http://dbpedia.org/resource/Upper_house,Western Australian Legislative Council,It is the second-largest [START_ENT] upper hou...
2,2,Murchison,Electoral division of Murchison,http://dbpedia.org/resource/Electoral_division...,http://dbpedia.org/resource/Roderick_Murchison,http://dbpedia.org/resource/Murchison_bioregion,Electoral district of Murchison,It is the second-largest upper house electorat...
3,3,Pipers River,"Pipers River, Tasmania","http://dbpedia.org/resource/Pipers_River,_Tasm...",http://dbpedia.org/resource/Pipers_River,http://dbpedia.org/resource/Pipers_River,"Pipers River, Tasmania",Towns within the division include: [START_ENT]...
4,4,Scottsdale,"Scottsdale, Tasmania","http://dbpedia.org/resource/Scottsdale,_Tasmania","http://dbpedia.org/resource/Scottsdale,_Arizona","http://dbpedia.org/resource/Scottsdale,_Arizona","Scottsdale, Tasmania",Towns within the division include: Pipers Rive...


In [20]:
# As GENRE had the highest accuracy, let us see some cases where
# GEBRE was wrong but other two (or either of them) could be correct
genre_was_wrong = wned_data[
    (wned_data['genre_results']!=wned_data['entity']) & 
    ((wned_data['lookup_results']==wned_data['uri']) | (wned_data['redis_results']==wned_data['uri']))
    ]

In [21]:
only_lookup_was_right = wned_data[
    (wned_data['lookup_results']==wned_data['uri']) &
    (
        (wned_data['genre_results']!=wned_data['entity']) & (wned_data['redis_results']!=wned_data['uri'])
    )
]
len(only_lookup_was_right)

37

In [70]:
only_redis_was_right = wned_data[
    (wned_data['redis_results']==wned_data['uri']) &
    (
        (wned_data['genre_results']!=wned_data['entity']) & (wned_data['lookup_results']!=wned_data['uri'])
    )
]
len(only_redis_was_right)

168

In [22]:
only_genre_was_right = wned_data[
    (wned_data['genre_results']==wned_data['entity']) &
    (
        (wned_data['lookup_results']!=wned_data['uri']) & (wned_data['redis_results']!=wned_data['uri'])
    )
]
len(only_genre_was_right)

1314

In [75]:
all_were_wrong = wned_data[
    (wned_data['genre_results']!=wned_data['entity']) &
    (wned_data['lookup_results']!=wned_data['uri']) & 
    (wned_data['redis_results']!=wned_data['uri'])
]
len(all_were_wrong)

1107

In [77]:
all_correct_simultaneously = wned_data[
    (wned_data['genre_results']==wned_data['entity']) &
    (wned_data['lookup_results']==wned_data['uri']) & 
    (wned_data['redis_results']==wned_data['uri'])
]
len(all_correct_simultaneously)

2679

In [79]:
at_least_one_of_them_correct = wned_data[
    (wned_data['genre_results']==wned_data['entity']) |
    (wned_data['lookup_results']==wned_data['uri']) |
    (wned_data['redis_results']==wned_data['uri'])
]
len(at_least_one_of_them_correct)

5714

In [80]:
5714/6821

0.8377070810731564

In [44]:
def rowinfo(i, df):
    print("Mention:",df.iloc[i]['mention'])
    print("Entity:",df.iloc[i]['entity'])
    print("URI:",df.iloc[i]['uri'])
    print()
    print("Lookup:",df.iloc[i]['lookup_results'])
    print("Redis:",df.iloc[i]['redis_results'])
    print("GENRE:",df.iloc[i]['genre_results'])
    print("Sentence:",df.iloc[i]['tagged_sentence'])


In [29]:
rowinfo(50,only_genre_was_right)

Mention: Wales
Entity: Wales national rugby union team
URI: http://dbpedia.org/resource/Wales_national_rugby_union_team

Lookup: http://dbpedia.org/resource/Wales
Redis: http://dbpedia.org/resource/Wales
GENRE: Wales national rugby union team
Sentence: He then played against Scotland on 26 February and [START_ENT] Wales [END_ENT]  on 12 March.


In [61]:
rowinfo(600, only_genre_was_right)

Mention: Asian Five Nations
Entity: Asian Five Nations
URI: http://dbpedia.org/resource/Asian_Five_Nations

Lookup: http://dbpedia.org/resource/2008_Asian_Five_Nations
Redis: http://dbpedia.org/resource/Asia_Rugby_Championship
GENRE: Asian Five Nations
Sentence: The flagship tournament for promoting the sport in Asia is the [START_ENT] Asian Five Nations [END_ENT] , which launched in 2008, and which most recently in 2011 saw the national teams of Hong Kong (national team), Japan (national team), Kazakhstan (national team), Sri Lanka (national team) and the UAE, compete in the main tournament.


In [71]:
rowinfo(30, only_redis_was_right)

Mention: Fountains
Entity: Fountains Abbey
URI: http://dbpedia.org/resource/Fountains_Abbey

Lookup: http://dbpedia.org/resource/Fountains_of_Wayne
Redis: http://dbpedia.org/resource/Fountains_Abbey
GENRE: Fountains
Sentence: This part of Abbey Park consists of Bisham Drive, Dale Close, Newstead Drive,  Drive, [START_ENT] Fountains [END_ENT]  Close,  Close, and  Drive.
