In [5]:
%load_ext autoreload

In [21]:
%autoreload
from ELmethods import (
    EL_DBpedia_lookup, 
    EL_DBpedia_spotlight,
    EL_redis_db
    )

In [3]:
import spacy
import spacy_dbpedia_spotlight
# load your model as usual
# nlp = spacy.load('en_core_web_sm')
nlp = spacy_dbpedia_spotlight.create('en')

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# nlp.add_pipe('dbpedia_spotlight')
nlp.pipe_names

['dbpedia_spotlight']

In [5]:
from redis import StrictRedis
 
redis_forms = StrictRedis(host='172.17.0.2', port=7979, db=0)
redis_redir = StrictRedis(host='172.17.0.2', port=7979, db=1)

In [15]:
redis_forms.ping()

True

In [6]:
# sample
EL_DBpedia_lookup("USA", max_results=2)

['http://dbpedia.org/resource/United_States',
 'http://dbpedia.org/resource/Democratic_Party_(United_States)']

In [30]:
texts, uris = EL_DBpedia_spotlight("England won the cricket world cup", 
                                   nlp)
uris

{'England': ['http://dbpedia.org/resource/England']}

In [23]:
redires = EL_redis_db("England", redis_forms, redis_redir)

In [37]:
list(redires.index.values)

['England',
 'England_national_football_team',
 'England_cricket_team',
 'England_national_rugby_union_team',
 'The_Football_Association']

#### Flow:
1. We have a piece of text.
2. We will identify entities in it(for now manually, but later on using the ner module).
3. Then we pass this entity as a query to the lookup or the redis database.
4. For using spotlight, we need to pass it the entire text, it identifies the entities and also provides the dbpedia resource linked to each.

In [31]:
# Fix some 10 entities from some sentences
sent1 = "Lionel Messi is a football player from Argentina"
sent2 = "Colorado is in USA"
sent3 = "Hitler was the ruler of Germany during World-War-2"

In [32]:
entities = [
    "Lionel Messi",
    "Argentina",
    "Colorado",
    "USA",
    "Hitler",
    "Germany",
    "World-War-2"
]

In [34]:
candidate_mapping = {}
for e in entities:
    candidate_mapping[e]=[]

In [35]:
# Process using lookup
for e in entities:
    lookup_results = EL_DBpedia_lookup(e, max_results=3)
    candidate_mapping[e].extend(lookup_results)

In [36]:
# Check what lookup gave
candidate_mapping

{'Lionel Messi': ['http://dbpedia.org/resource/Lionel_Messi',
  'http://dbpedia.org/resource/Ronaldo–Messi_rivalry',
  'http://dbpedia.org/resource/List_of_career_achievements_by_Lionel_Messi'],
 'Argentina': ['http://dbpedia.org/resource/Argentina',
  'http://dbpedia.org/resource/Argentina_national_football_team',
  'http://dbpedia.org/resource/Time_in_Argentina'],
 'Colorado': ['http://dbpedia.org/resource/Colorado',
  'http://dbpedia.org/resource/Colorado_Rapids',
  'http://dbpedia.org/resource/Colorado_Springs,_Colorado'],
 'USA': ['http://dbpedia.org/resource/United_States',
  'http://dbpedia.org/resource/Democratic_Party_(United_States)',
  'http://dbpedia.org/resource/California'],
 'Hitler': ['http://dbpedia.org/resource/Adolf_Hitler',
  'http://dbpedia.org/resource/Nazi_Germany',
  'http://dbpedia.org/resource/Hitler_family'],
 'World-War-2': ['http://dbpedia.org/resource/World_War_II',
  'http://dbpedia.org/resource/Nazi_Germany',
  'http://dbpedia.org/resource/Eastern_Front_

In [33]:
EL_DBpedia_spotlight(sent3, nlp)

(['Hitler', 'Germany'],
 {'Hitler': ['http://dbpedia.org/resource/Adolf_Hitler'],
  'Germany': ['http://dbpedia.org/resource/Germany']})

In [38]:
# Process using redis database
for e in entities:
    redis_res = EL_redis_db(e, redis_forms, redis_redir)
    redis_res = ['http://dbpedia.org/resource/'+f for f in list(redis_res.index.values)]
    candidate_mapping[e].extend(redis_res)

In [39]:
candidate_mapping

{'Lionel Messi': ['http://dbpedia.org/resource/Lionel_Messi',
  'http://dbpedia.org/resource/Ronaldo–Messi_rivalry',
  'http://dbpedia.org/resource/List_of_career_achievements_by_Lionel_Messi',
  'http://dbpedia.org/resource/Lionel_Messi'],
 'Argentina': ['http://dbpedia.org/resource/Argentina',
  'http://dbpedia.org/resource/Argentina_national_football_team',
  'http://dbpedia.org/resource/Time_in_Argentina',
  'http://dbpedia.org/resource/Argentina',
  'http://dbpedia.org/resource/Argentina_national_football_team',
  'http://dbpedia.org/resource/Argentine_Football_Association',
  'http://dbpedia.org/resource/Argentina_national_rugby_union_team'],
 'Colorado': ['http://dbpedia.org/resource/Colorado',
  'http://dbpedia.org/resource/Colorado_Rapids',
  'http://dbpedia.org/resource/Colorado_Springs,_Colorado',
  'http://dbpedia.org/resource/Colorado',
  'http://dbpedia.org/resource/Colorado_Buffaloes_football',
  "http://dbpedia.org/resource/Colorado_Buffaloes_men's_basketball",
  'http:

In [40]:
final_mapping = {}
for can in candidate_mapping:
    final_mapping[can] = max(set(candidate_mapping[can]), key=candidate_mapping[can].count)

In [41]:
final_mapping

{'Lionel Messi': 'http://dbpedia.org/resource/Lionel_Messi',
 'Argentina': 'http://dbpedia.org/resource/Argentina',
 'Colorado': 'http://dbpedia.org/resource/Colorado',
 'USA': 'http://dbpedia.org/resource/United_States',
 'Hitler': 'http://dbpedia.org/resource/Adolf_Hitler',
 'World-War-2': 'http://dbpedia.org/resource/World_War_II'}

In [42]:
# Process using spotlight
sent_texts = []
sent_uris = []
for s in [sent1, sent2, sent3]:
    texts, uris = EL_DBpedia_spotlight(s, nlp)
    sent_uris.append(uris)
    sent_texts.append(texts)

In [43]:
sent_uris

[{'Lionel Messi': ['http://dbpedia.org/resource/Lionel_Messi'],
  'football': ['http://dbpedia.org/resource/Association_football'],
  'Argentina': ['http://dbpedia.org/resource/Argentina_national_football_team']},
 {'Colorado': ['http://dbpedia.org/resource/Colorado'],
  'USA': ['http://dbpedia.org/resource/United_States']},
 {'Hitler': ['http://dbpedia.org/resource/Adolf_Hitler'],
  'Germany': ['http://dbpedia.org/resource/Germany']}]