# Creating graph for entities found in NEKG dataset

First, load essential libraries.

In [1]:
# Code Based On: https://github.com/r4isstatic/csv-to-ttl/blob/master/uber.py
import csv
from rdflib import URIRef, Literal, Namespace, Graph
from rdflib.namespace import RDF, RDFS, XSD
import uuid

Now we create a graph and prepare used namespaces.

In [2]:
bnode_g = Graph()

In [3]:
# namespaces
def create_namespace(graph, namespace, prefix):

    ns = Namespace(namespace)
    graph.namespace_manager.bind(prefix, namespace)
    
    return ns


hyp_namespace = create_namespace(bnode_g, "http://example.org/hypothesis_ontology/", 'hyp')
oa_namespace = create_namespace(bnode_g, "http://www.w3.org/ns/oa#", 'oa')
covidpr_namespace = create_namespace(bnode_g, "http://ns.inria.fr/covid19/property/", 'covidpr')
dct_namespace = create_namespace(bnode_g, "http://purl.org/dc/terms/", 'dct')
covid_namespace = create_namespace(bnode_g, "http://ns.inria.fr/covid19/", 'covid')
prov_namespace = create_namespace(bnode_g, 'http://www.w3.org/TR/prov-o/', 'prov')

Now we load data obtained from NEKG dataset by running a SPARQL query. This query contains only entities connected to abstracts that we work with.

In [4]:
ifile = open('../sparql-queries/bnode-instances.csv')
reader = csv.reader(ifile)

In [5]:
rownum = 0
for row in reader:
    if rownum == 0: 
        pass
    else:
        unique_id = str(uuid.uuid4())
        unique_NE_uri = URIRef(hyp_namespace + unique_id + '#namedEntity')
        bnode_g.add( (unique_NE_uri, oa_namespace.hasSource, URIRef(row[3])) )
        bnode_g.add( (unique_NE_uri, RDF.type, prov_namespace.Entity) )
        bnode_g.add( (unique_NE_uri, RDF.type, oa_namespace.Annotation) )
        bnode_g.add( (unique_NE_uri, oa_namespace.exact, Literal(row[1])) )
        bnode_g.add( (unique_NE_uri, oa_namespace.hasBody, URIRef(row[5])) )
        bnode_g.add( (unique_NE_uri, covidpr_namespace.confidence, Literal(row[4], datatype=XSD.decimal)) )
        subjects = row[2].split(',')
        for subj in subjects:
            bnode_g.add( (unique_NE_uri, dct_namespace.subject, Literal(subj)) )
    rownum += 1 

g = bnode_g.serialize('./outputs/bnode_graph.ttl', format='turtle')

ifile.close()