# Creating graph for keyword instances found by our pattern matching

First, we load essential libraries.

In [1]:
# Code Based On: https://github.com/r4isstatic/csv-to-ttl/blob/master/uber.py
import csv
from rdflib import URIRef, Literal, Namespace, Graph
from rdflib.namespace import RDF, RDFS
import uuid

Now we create a graph and prepare used namespaces.

In [2]:
datasetGraph = Graph()

In [3]:
# namespaces
hyp = Namespace("http://example.org/hypothesis_ontology/")
oa = Namespace("http://www.w3.org/ns/oa#")

In [4]:
datasetGraph.namespace_manager.bind('hyp', URIRef(hyp))
datasetGraph.namespace_manager.bind('oa', URIRef(oa))

In the end we load hypotheses' entities found by our NLP algorithm. We create a set of triples for each of them to allow us identification:
1. We create unique URL for each of the keywords.
2. We link keyword with its hypothesis.
3. As a value of this keyword we put string representing this keyword.
4. As a source for this entity we put abstract of the paper.

In [5]:
ifile = open('../Extracting-keywords/paper_hyp_entity_data.csv', 'rt',encoding="utf8")
reader = csv.reader(ifile)

In [6]:
rownum = 0
for row in reader:
    if rownum == 0: # if it's the first row, then ignore it, move on to the next one.
        pass
    else:
        num_of_keywords = len(row[8].split(','))
        for i in range(num_of_keywords):
            unique_id = str(uuid.uuid4())
            unique_keyword_uri = URIRef('http://example.org/hypothesis_ontology/' + unique_id + '#keyword')
            datasetGraph.add( (URIRef(row[2]), hyp.contains, unique_keyword_uri) )
            datasetGraph.add( (unique_keyword_uri, oa.hasTarget, Literal(row[9+i])) )
            datasetGraph.add( (URIRef(row[2]), RDF.value, Literal(row[6])) )
            datasetGraph.add( (URIRef(row[2]), oa.hasSource, URIRef(row[4])) ) # maybe dont need this line since it is already in HypothesisInstances.ttl
    rownum += 1 # advance the row number so we can loop through again with the next row

h = datasetGraph.serialize('./outputs/hypothesis-keywords-graph.ttl', format='turtle')

ifile.close()