<a href="https://colab.research.google.com/github/carloscastillo10/kbs-scientific-publications/blob/development/notebooks/rdf-triplets.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# RDF Data Creation

In [None]:
from rdflib import Graph, Literal, URIRef

## 1. Set prefixes and import prefixes of models that I will reuse

In [None]:
from rdflib import Namespace
from rdflib.namespace import FOAF as foaf, RDF as rdf, RDFS as rdfs, \
  DCTERMS as dct, SKOS as skos, XSD as xsd

schema = Namespace('http://schema.org/')
bibo = Namespace('http://purl.org/ontology/bibo/')
prism = Namespace('http://prismstandard.org/namespaces/basic/2.0/')
dbo = Namespace('http://dbpedia.org/ontology/')
dbr = Namespace('http://dbpedia.org/resource/')
data = Namespace('http://example.org/data/')
vocab = Namespace('http://example.org/vocabulary/')

## 2. Load my data from a json dataset

In [None]:
import requests

In [None]:
response = requests.get("https://raw.githubusercontent.com/carloscastillo10/kbs-scientific-publications/development/data/articles.json")

In [None]:
if response.status_code == 200:
  articles = response.json()['content']

## 3. Add Triples to a graph

#### Normalize string

In [None]:
def normalize_string(value):
  a,b = 'ÅÃ­áàãâäăåéèêíÖóòöúüćçñńßšŽž','AA­aaaaaaaeeeiOooouuccnnbsZz'
  value = ''.join(char for char in value if char.isalnum()).replace('&amp', '')
  trans = str.maketrans(a,b)
  if value: value = value.translate(trans)

  return value

#### Normalize date

In [None]:
def normalize_date(date):
  return date.replace('-', '')

#### Add article literals to a graph

In [None]:
def create_article_literals(article, graph):
  graph.add((article_uri, rdf.type, bibo.Article))
  graph.add((article_uri, dct.identifier, Literal(article['identifier'])))
  graph.add((article_uri, dct.title, Literal(article['title'])))
  graph.add((article_uri, bibo.doi, Literal(article['doi'])))
  graph.add((article_uri, prism.url, Literal(article['url'], datatype=xsd.anyURI)))
  graph.add((article_uri, prism.publicationName, Literal(article['publication_name'])))
  graph.add((article_uri, vocab.openAccess, Literal(article['open_access'])))

  if article['article_number']: graph.add((article_uri, vocab.articleNumber, Literal(article['article_number'])))
  if article['created']: graph.add((article_uri, dct.created, Literal(article['created'], datatype=xsd.date)))
  if article['deposited']: graph.add((article_uri, schema.datePublished, Literal(article['deposited'], datatype=xsd.date)))
  if article['abstract']: graph.add((article_uri, bibo.abstract, Literal(article['abstract'])))
  if article['issn']: graph.add((article_uri, bibo.issn, Literal(article['issn'])))
  if article['volume']: graph.add((article_uri, bibo.volume, Literal(article['volume'])))
  if article['score']: graph.add((article_uri, vocab.publicationScore, Literal(article['score'], datatype=xsd.int)))

  return graph


#### Add article language relationship to a graph

In [None]:
def create_article_language_relationship(article_uri, language, graph):
  language_uri = URIRef(data + 'language' + normalize_string(language['acronym'].upper()))
  graph.add((language_uri, rdf.type, dbo.Language))
  graph.add((language_uri, dbo.acronym, Literal(language['acronym'])))
  graph.add((article_uri, dct.language, language_uri))
  
  return graph

#### Add article journal relationship to a graph

In [None]:
def create_article_journal_relationship(article_uri, journal, graph):
  journal_uri = URIRef(bibo + 'Journal/' + normalize_string(journal['name']))
  graph.add((journal_uri, rdf.type, dbo.Organization))
  graph.add((journal_uri, schema.name, Literal(journal['name'])))
  graph.add((article_uri, schema.publication, journal_uri))

  return graph

#### Add article citation relationship to a graph

In [None]:
def create_article_citation_relationship(article_uri, article_identifier, citations, graph):
  for cite in citations:
    citation_uri = URIRef(data + 'Citation' + article_identifier + normalize_string(cite['organization']['name'] + normalize_date(cite['date'])))
    graph.add((citation_uri, rdf.type, data.Citation))
    graph.add((citation_uri, vocab.extractionDate, Literal(cite['date'], datatype=xsd.date)))
    graph.add((citation_uri, dbo.number, Literal(cite['number'], datatype=xsd.int)))
    
    graph, organization_uri = create_organization_triples(cite['organization'], graph)
    graph.add((citation_uri, dct.source, organization_uri))
    graph.add((article_uri, vocab.citedBy, citation_uri))
  
  return graph

#### Add organization city relationship to a graph

In [None]:
def create_organization_city_relationship(organization_uri, city, graph):
  city_uri = URIRef(dbr + normalize_string(city['name']))
  graph.add((city_uri, rdf.type, dbo.City))
  graph.add((city_uri, foaf.name, Literal(city['name'])))
  graph.add((organization_uri, dbo.city, city_uri))

  return graph

#### Add organization country relationship to a graph

In [None]:
def create_organization_country_relationship(organization_uri, country, graph):
  country_uri = URIRef(dbr + normalize_string(country['name']))
  graph.add((country_uri, rdf.type, dbo.Country))
  graph.add((country_uri, foaf.name, Literal(country['name'])))
  graph.add((organization_uri, dbo.country, country_uri))

  return graph

#### Add article organization relationship to a graph

In [None]:
def create_organization_triples(organization, graph):
  organization_uri = URIRef(dbr + normalize_string(organization['name']))
  graph.add((organization_uri, rdf.type, dbo.Organization))
  graph.add((organization_uri, foaf.name, Literal(organization['name'])))
  if 'description' in organization.keys(): graph.add((organization_uri, rdfs.comment, Literal(organization['description'])))
  if organization['city']['name']: graph = create_organization_city_relationship(organization_uri, organization['city'], graph)
  if organization['country']['name']: graph = create_organization_country_relationship(organization_uri, organization['country'], graph)

  return graph, organization_uri

In [None]:
def create_article_organization_relationship(article_uri, organizations, graph):
  for organization in organizations:
    graph, organization_uri = create_organization_triples(organization, graph)
    graph.add((article_uri, schema.affiliation, organization_uri))
  
  return graph

In [None]:
def create_author_organization_relationship(author_uri, organizations, graph):
  for organization in organizations:
    graph, organization_uri = create_organization_triples(organization, graph)
    graph.add((author_uri, schema.affiliation, organization_uri))
  
  return graph

#### Add article author relationship to a graph

In [None]:
def create_article_author_relationship(article_uri, authors, graph):
  for author in authors:
    author_uri = URIRef(data + normalize_string(author['given_name'] + author['family_name']))
    graph.add((author_uri, rdf.type, foaf.Person))
    graph.add((author_uri, foaf.givenName, Literal(author['given_name'])))
    graph.add((author_uri, foaf.familyName, Literal(author['family_name'])))
    if author['organization']: graph = create_author_organization_relationship(author_uri, author['organization'], graph)
    graph.add((article_uri, dct.creator, author_uri))

  return graph

#### Add triples

In [None]:
len(articles)

1028

In [None]:
graph = Graph()
try:
  for article in articles:
    article_uri = URIRef(data + article['identifier'])
    graph = create_article_literals(article, graph)

    if article['language']: graph = create_article_language_relationship(article_uri, article['language'], graph)
    if article['journal']: graph = create_article_journal_relationship(article_uri, article['journal'], graph)
    if article['citations']: graph = create_article_citation_relationship(article_uri, article['identifier'], article['citations'], graph)
    if article['author']: graph = create_article_author_relationship(article_uri, article['author'], graph)
    if article['organization']: graph = create_article_organization_relationship(article_uri, article['organization'], graph)
    if article['subject']:
      for subject in article['subject']:
        subject_uri = URIRef(data + normalize_string(subject['name']))
        graph.add((subject_uri, rdf.type, skos.Concept))
        graph.add((subject_uri, foaf.name, Literal(subject['name'])))
        
        graph.add((article_uri, dct.subject, subject_uri))
except Exception as e:
  print(e)

## 4. Saving RDF

In [None]:
graph.serialize(destination='articles.ttl')

<Graph identifier=Nbf19631a1d7540f6bd33824ac8018d2e (<class 'rdflib.graph.Graph'>)>