<a href="https://colab.research.google.com/github/carloscastillo10/kbs-scientific-publications/blob/development/notebooks/linked-articles-triples.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### Import prefixes of models that I will reuse

In [65]:
# pip install rdflib
from rdflib import URIRef, Namespace
from rdflib.namespace import RDF as rdf, OWL as owl, DCTERMS as dct

schema = Namespace('http://schema.org/')
bibo = Namespace('http://purl.org/ontology/bibo/')
dbo = Namespace('http://dbpedia.org/ontology/')
dbr = 'http://dbpedia.org/resource/'
dbc = 'http://dbpedia.org/resource/Category:'

### Get candidates with Spotligth

#### Candidate method
Returns a list of all matching candidate entities rather than only the top candidate.

In [66]:
# pip install pyspotlight 
import spotlight 

In [67]:
def set_candidate(candidate_name, resource):
  return {
    'name': candidate_name,
    'label': resource['label'],
    'uri': dbr + resource['uri'],
    'contextualScore': resource['contextualScore'],
    'finalScore': resource['finalScore'],
    'categories': resource['types']
  }

In [68]:
def get_candidates(text, filters, confidence=0.4, support=20):
  try:
    spotlight_uri = 'https://api.dbpedia-spotlight.org/en'
    candidates = spotlight.candidates(f'{spotlight_uri}/candidates', text, confidence=confidence, support=support, filters=filters)
    list_candidates = []
    for candidate in candidates:
      if type(candidate['resource']).__name__ == 'list':
        for resource in candidate['resource']:
          list_candidates.append(set_candidate(candidate['name'], resource))
      else:
        list_candidates.append(set_candidate(candidate['name'], candidate['resource']))
    return list_candidates
  except Exception as e:
    pass
    # print(e)

#### Method for set country relationships

In [69]:
def set_country_relationships(country_uri, candidates, graph):
  for candidate in candidates:
    candidate_uri = URIRef(candidate['uri'])
    graph.add((country_uri, owl.sameAs, candidate_uri))
  return graph

#### Method for set city relationships

In [70]:
def set_city_relationships(city_uri, candidates, graph):
  for candidate in candidates:
    candidate_uri = URIRef(candidate['uri'])
    graph.add((city_uri, owl.sameAs, candidate_uri))
  return graph

#### Load data from rdf file

In [71]:
from rdflib import Graph

graph = Graph()
graph.parse('articles.ttl')

<Graph identifier=N2e18db24c645478ca9f434f09f805393 (<class 'rdflib.graph.Graph'>)>

#### List of countries candidates to daframe

In [72]:
import pandas as pd
country_results = pd.DataFrame()

place_filter = {
  'types': 'DBpedia:Place'
}
# List of countries
for subject, predicate, obj in graph.triples((None, rdf['type'], dbo.Country)):
  candidates = get_candidates(subject, place_filter)
  if candidates:
      graph = set_country_relationships(subject, candidates, graph)
      country_results = country_results.append(candidates, ignore_index=True)

In [73]:
country_results.head(5)

Unnamed: 0,name,label,uri,contextualScore,finalScore,categories
0,Bahrain,Bahrain,http://dbpedia.org/resource/Bahrain,0.010974,0.97341,"Wikidata:Q6256, Schema:Place, Schema:Country, ..."
1,Estonia,Estonia,http://dbpedia.org/resource/Estonia,0.003276,0.976137,"Wikidata:Q6256, Schema:Place, Schema:Country, ..."
2,Gabon,Gabon,http://dbpedia.org/resource/Gabon,0.004754,0.958515,"Wikidata:Q6256, Schema:Place, Schema:Country, ..."
3,Greece,Greece,http://dbpedia.org/resource/Greece,0.001351,0.676972,"Wikidata:Q6256, Schema:Place, Schema:Country, ..."
4,Hungary,Hungary,http://dbpedia.org/resource/Hungary,0.004736,0.917794,"Wikidata:Q6256, Schema:Place, Schema:Country, ..."


#### List of cities candidates to daframe

In [74]:
import pandas as pd
city_results = pd.DataFrame()

place_filter = {
  'types': 'DBpedia:Place'
}
# List of cities
for subject, predicate, obj in graph.triples((None, rdf['type'], dbo.City)):
  candidates = get_candidates(subject, place_filter)
  if candidates:
    graph = set_city_relationships(subject, candidates, graph)
    city_results = city_results.append(candidates, ignore_index=True)

In [75]:
city_results.head(5)

Unnamed: 0,name,label,uri,contextualScore,finalScore,categories
0,Ajman,Ajman,http://dbpedia.org/resource/Ajman,0.99999,1.0,"Wikidata:Q515, Wikidata:Q486972, Schema:Place,..."
1,Alcorcon,Alcorc贸n,http://dbpedia.org/resource/Alcorc贸n,0.5,0.991632,"Wikidata:Q486972, Schema:Place, DBpedia:Popula..."
2,Alexandria,Alexandria,http://dbpedia.org/resource/Alexandria,0.004837,0.975198,"Wikidata:Q515, Wikidata:Q486972, Schema:Place,..."
3,Almada,Almada,http://dbpedia.org/resource/Almada,0.999909,1.0,"Wikidata:Q486972, Schema:Place, DBpedia:Popula..."
4,Annapolis,Annapolis Royal,http://dbpedia.org/resource/Annapolis_Royal,0.950715,0.649334,"Wikidata:Q486972, Wikidata:Q3957, Schema:Place..."


### Get annotations with Tagme API

In [76]:
import requests 

tagme_uri = 'https://tagme.d4science.org/tagme/tag'
tagme_token = '77c141ac-c6e2-448d-af84-b29eb9dc4f53-843339462'
tagme_payload = {
  'lang': 'en',
  'include_abstract': 'true',
  'include_categories': 'true',
  'gcube-token': tagme_token
}

tagme_headers = {
  'user-agent': 'Mozilla/5.0',
  'accept': 'application/json', 
  'content-type': 'application/json'
}


In [77]:
def set_annotation_category(article_id, annotation, category):
  return {
    'article_id': article_id,
    'annotation': dbr + annotation['spot'].capitalize().replace(' ', '_'),
    'rho': annotation['rho'],
    'link_probability': annotation['link_probability'],
    'category': dbc + category.capitalize().replace(' ', '_')
  }

In [78]:
def get_annotations(article_id, abstract):
  tagme_payload['text'] = abstract
  response = requests.get(tagme_uri, params=tagme_payload, headers=tagme_headers)
  categories = []
  if response.status_code == 200:
    annotations = response.json()['annotations']
    for annotation in annotations:
      if annotation['rho'] > 0.1 and annotation['link_probability'] > 0.1 and 'dbpedia_categories' in annotation.keys():
        for category in annotation['dbpedia_categories']:
          categories.append(set_annotation_category(article_id, annotation, category))
  return categories


#### Method to set relationships between annotation and category

In [79]:
def set_annotation_category_relationship(annotation_uri, category, graph):
  category_uri = URIRef(category)
  graph.add((annotation_uri, dct.subject, category_uri))
  return graph

#### Method to set relationships between article and annotation

In [80]:
def set_article_annotation_relationship(article_uri, annotations, graph):
  for annotation in annotations:
    annotation_uri = URIRef(annotation['annotation'])
    graph = set_annotation_category_relationship(annotation_uri, annotation['category'], graph)
    graph.add((article_uri, schema.mentions, annotation_uri))
  return graph

In [81]:
for article in graph.subjects(rdf['type'], bibo.Article):
  abstract = graph.value(article, bibo.abstract) # Get abstract
  if abstract:
    annotations = get_annotations(article, abstract)
    graph = set_article_annotation_relationship(article, annotations, graph)


In [83]:
graph.serialize(destination='linkedArticles.ttl')

<Graph identifier=N2e18db24c645478ca9f434f09f805393 (<class 'rdflib.graph.Graph'>)>