In [62]:
import csv
import re
import spacy
from spacy import displacy

from owlready2 import *
import rdflib

import re

# pre-processing
def Filter(senSet):
    #remove content between [ ]
    print("Pre-processing...")
    for index in range(len(senSet)):
        while senSet[index].find('[')>=0:
            i_start = senSet[index].find('[')
            i_end = senSet[index].find(']')
            s = senSet[index][i_start:i_end+2]
            senSet[index] = senSet[index].replace(s, "")
            
# stopwords from parsing the whole sentence
def RemoveStopword1(phrase, doc, chunkStart, chunkEnd, stopList):
    result = phrase
    i_stop=0
    #start = chunk.start# to eliminate the condition when the first word of chunk is stop word
    for i_sen in range(chunkStart, chunkEnd):
        while i_stop < len(stopList) and stopList[i_stop] < i_sen-1:
            #print(str(stopList[i_stop]) + ' ' + str(i_sen))
            i_stop = i_stop+1
        # there is no stop word in current chunk
        if i_stop >= len(stopList):
            break;
        #print(i_sen)
        # finish going through the chunk
        if stopList[i_stop] > chunk.end-1:
            break
        # find the stop word and remove it
        if stopList[i_stop] == i_sen-1:
            #print(doc[i_sen-1])
            if i_sen-1 == chunkStart:
                result = result.replace(doc[i_sen-1].text + ' ', '')
                chunkStart = chunkStart+1
            else:
                result = result.replace(' ' + doc[i_sen-1].text, '')
    return result

# stopwords from parsing triple separately
def RemoveStopword2(inputPhrase):
    result = ''
    doc_phrase = nlp(str(inputPhrase))
    for token in doc_phrase:
        #print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,
        #       token.shape_, token.is_alpha, token.is_stop)
        if not token.is_stop:
            result = result + token.text + ' '
        #else:
        #    print(token.text + ', ', end = '')    
    return result

# transfer a phrase to a URI form
def FormatURI(phrase):
    #print('Before formatting:  ' + phrase)
    chars = list(phrase)
    if len(chars) > 0:
        chars[0] = chars[0].upper()
    for i in range(len(chars)):
        if chars[i] == ' ' and i+1 < len(chars):
            chars[i+1] = chars[i+1].upper()
    phrase = ''.join(chars)
    phrase = phrase.replace(' ', '')
    phrase = re.sub(r'[^a-zA-Z0-9\s]', '', phrase)
    print('After formatting:  ' + phrase)
    return phrase

#load data
file = open("shortdataset.csv", "r")
#file = open("newdataset_formatted.csv", "r")
reader = csv.reader(file)

senSet = []
for item in reader:
    #format sentences in item as string
    fullP = "".join(item)
    splitP = fullP.split(";", 3);
    splitS = splitP[3][1:len(splitP[3])].split(".");
    #print(splitS)
    for sen in splitS:
        senSet.append(sen)#store the sentence into an array

file.close()
print("Total sentences: " + str(len(senSet)))

#pre-processing
Filter(senSet)

'''for sentence in senSet:
    print(senSet)'''

nlp = spacy.load('en_core_web_sm')

for index in range(len(senSet)):
    #index = 13

    # parse sentence
    doc = nlp(str(senSet[index]))
    print('\n' + str(index) + ': ' + senSet[index])

    # process result
    sub = ""
    pred = ""
    obj = ""
    stopList = []

    print('stop words: ', end='')
    for token in doc:
        #print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,
        #      token.shape_, token.is_alpha, token.is_stop)

        # record the index of stop words
        if token.is_stop:
            print(token.text + ', ', end='')
            stopList.append(token.i)
        if re.match('nsubj', token.dep_):   
            subj = token.text
        if re.match('ROOT', token.dep_): 
            pred = token.lemma_
            pred_orig = token.text
        if re.match('dobj', token.dep_): 
            obj = token.text
            '''#an earlier solution that I find not necessary
            obj = token.lemma_
            # to avoid cases like "-PRON-"
            if obj[0] == '-':
                obj = token.text'''
    print('\n')

    subj_1 = subj
    obj_1 = obj
    # using chunk to update subject and object
    for chunk in doc.noun_chunks:
        if chunk.root.head.text == pred_orig and re.match('nsubj', chunk.root.dep_):
            subj = chunk.text
            # remove stop words
            subj_1 = RemoveStopword1(subj, doc, chunk.start, chunk.end, stopList)

        if chunk.root.head.text == pred_orig and re.match('dobj|attr', chunk.root.dep_):
            obj = chunk.text
            # remove stop words
            obj_1 = RemoveStopword1(obj, doc, chunk.start, chunk.end, stopList)
        #print(chunk.text + ' ' + str(chunk.start))
        #print(chunk.text, chunk.root.text, chunk.root.dep_, chunk.root.head.text)

    print('Before : ' + subj + ' - ' + pred + ' - ' + obj)
    print('Method1: ' + subj_1 + ' - ' + pred + ' - ' + obj_1)

    # second method to remove stop words
    subj_2 = RemoveStopword2(subj)
    obj_2 = RemoveStopword2(obj)
    print('Method2: ' + subj_2 + '- ' + pred + ' - ' + obj_2 + '\n')

    ## visualize the semantic tree
    #options = {'compact': True, 'color': 'blue'}
    #displacy.serve(doc, style='dep', options=options)
    #displacy.serve(doc, style='dep')

    # query the triples in dbpd with SPARQL
    m_world = World()# Owlready2 stores every triples in a ‘World’ object
    onto = m_world.get_ontology("dbpedia.owl").load()

    graph = m_world.as_rdflib_graph()

    prefix = """
    PREFIX rdf:<http://www.w3.org/2000/01/rdf-schema#>
    PREFIX dbpd:<http://dbpedia.org/ontology/>
    """
    subj = "provinceLink"
    #pred = "range"
    #obj = "Province"

    qSelect = prefix + """
    SELECT ?sub WHERE {
      ?sub rdf:""" + FormatURI(pred) + """ dbpd:""" + FormatURI(obj) + """.
    }"""

    qAsk = prefix + """
    ASK {
        dbpd:""" + FormatURI(subj) + """ rdf:""" + FormatURI(pred) + """ dbpd:""" + FormatURI(obj) + """.
    }"""
    
    r = list(graph.query(qAsk))

    print(r)

Total sentences: 46
Pre-processing...

0: Sure um start off - I'm a Purdue grad
stop words: off, a, 

Before : I - be - a Purdue grad
Method1: I - be - Purdue grad
Method2: I - be - Purdue grad 





After formatting:  Be
After formatting:  APurdueGrad
After formatting:  ProvinceLink
After formatting:  Be
After formatting:  APurdueGrad
[False]

1:   Um my title is director of landscape architecture so um  for the ah staff of people
stop words: my, is, of, so, for, the, of, 

Before : my title - be - director
Method1: title - be - director
Method2: title - be - director 





After formatting:  Be
After formatting:  Director
After formatting:  ProvinceLink
After formatting:  Be
After formatting:  Director
[False]

2:   Um we manage all the construction and innovation projects for the university across the state
stop words: we, all, the, and, for, the, across, the, 

Before : we - manage - all the construction and innovation projects
Method1: we - manage - construction innovation projects
Method2: - manage - construction innovation projects 





After formatting:  Manage
After formatting:  AllTheConstructionAndInnovationProjects
After formatting:  ProvinceLink
After formatting:  Manage
After formatting:  AllTheConstructionAndInnovationProjects
[False]

3:    Some projects we handle in-house we design these others we're managing and consulting on your architecture 
stop words: we, in, we, these, others, we, and, on, your, 

Before : we - design - these others
Method1: we - design - others
Method2: - design - 





After formatting:  Design
After formatting:  TheseOthers
After formatting:  ProvinceLink
After formatting:  Design
After formatting:  TheseOthers
[False]

4:   Um and we've done quite a bit in here in the last five years or so including 
stop words: and, we, done, quite, a, in, here, in, the, last, five, or, so, 

Before : we - do - 
Method1: we - do - 
Method2: - do - 





After formatting:  Do
After formatting:  
After formatting:  ProvinceLink
After formatting:  Do
After formatting:  
[False]

5:   Um typically this camp will contact me and say 'We have this project we want to do
stop words: this, will, me, and, say, have, this, we, to, do, 

Before : this camp - contact - me
Method1: camp - contact - me
Method2: camp - contact - 





After formatting:  Contact
After formatting:  Me
After formatting:  ProvinceLink
After formatting:  Contact
After formatting:  Me
[False]

6:   How do we go about getting it accomplished?'  Um that's when I decide our staff will handle this do the -
stop words: do, we, go, about, it, that, when, our, will, this, do, the, 

Before : staff - be - the
Method1: staff - be - the
Method2: staff - be - 





After formatting:  Be
After formatting:  The
After formatting:  ProvinceLink
After formatting:  Be
After formatting:  The
[False]

7: - design in-house or if it's big enough that we need some outside help we call up XYZ consultants and bring them in to work on it
stop words: in, or, if, it, enough, that, we, some, we, call, up, and, them, in, to, on, it, 

Before : we - design - them
Method1: we - design - them
Method2: - design - 





After formatting:  Design
After formatting:  Them
After formatting:  ProvinceLink
After formatting:  Design
After formatting:  Them
[False]

8:   Um and then I either oversee my own staff's production plans or development plans or code compliance um design compliance um pretty high aesthetic standard
stop words: and, then, either, my, own, or, or, 

Before : I - oversee - my own staff's production plans
Method1: I - oversee - staff's production plans
Method2: I - oversee - staff 's production plans 





After formatting:  Oversee
After formatting:  MyOwnStaffsProductionPlans
After formatting:  ProvinceLink
After formatting:  Oversee
After formatting:  MyOwnStaffsProductionPlans
[False]

9:   Um it may vary at different campuses
stop words: it, may, at, 

Before : it - vary - 
Method1: it - vary - 
Method2: - vary - 





After formatting:  Vary
After formatting:  
After formatting:  ProvinceLink
After formatting:  Vary
After formatting:  
[False]

10:   It varies here
stop words: here, 

Before :   It - vary - 
Method1:   It - vary - 
Method2:    It - vary - 





After formatting:  Vary
After formatting:  
After formatting:  ProvinceLink
After formatting:  Vary
After formatting:  
[False]

11:   Um camp location and we want to feel like we are in the woods
stop words: and, we, to, we, are, in, the, 

Before : we - location - 
Method1: we - location - 
Method2: - location - 





After formatting:  Location
After formatting:  
After formatting:  ProvinceLink
After formatting:  Location
After formatting:  
[False]

12:   Um we construct things differently here
stop words: we, here, 

Before : we - construct - things
Method1: we - construct - things
Method2: - construct - things 





After formatting:  Construct
After formatting:  Things
After formatting:  ProvinceLink
After formatting:  Construct
After formatting:  Things
[False]

13:   Um tree protection is critically important to us um in whatever we're doing on any of our campuses but especially here in the woods
stop words: is, to, us, in, whatever, we, doing, on, any, of, our, but, here, in, the, 

Before : tree protection - be - whatever
Method1: tree protection - be - whatever
Method2: tree protection - be - 





After formatting:  Be
After formatting:  Whatever
After formatting:  ProvinceLink
After formatting:  Be
After formatting:  Whatever
[False]

14:   Um we're very selective about tree removal
stop words: we, very, about, 

Before : we - be - 
Method1: we - be - 
Method2: - be - 





After formatting:  Be
After formatting:  
After formatting:  ProvinceLink
After formatting:  Be
After formatting:  
[False]

15:   Um so just a minute ago we were talking about selectively removing some trees um that would be very selectively   we wouldn't think - have too much reservation about removing ah -
stop words: so, just, a, we, were, about, some, that, would, be, very, we, would, have, too, much, about, 

Before : we - have - too much reservation
Method1: we - have - reservation
Method2: - have - reservation 





After formatting:  Have
After formatting:  TooMuchReservation
After formatting:  ProvinceLink
After formatting:  Have
After formatting:  TooMuchReservation
[False]

16: undergrowth   some scrub growth at this time of year especially if it blocks your views down to the water flow
stop words: some, at, this, of, if, it, your, down, to, the, 

Before : it - undergrowth - some scrub growth
Method1: it - undergrowth - scrub growth
Method2: - undergrowth - scrub growth 





After formatting:  Undergrowth
After formatting:  SomeScrubGrowth
After formatting:  ProvinceLink
After formatting:  Undergrowth
After formatting:  SomeScrubGrowth
[False]

17:   But we start talking about 10- 12- and 30-inch trees
stop words: we, about, and, 

Before : we - start - 
Method1: we - start - 
Method2: - start - 





After formatting:  Start
After formatting:  
After formatting:  ProvinceLink
After formatting:  Start
After formatting:  
[False]

18:  um probably gonna look to another solution and find a better better place where we can get a view
stop words: to, another, and, a, where, we, can, get, a, 

Before : we - go - view
Method1: we - go - view
Method2: - go - view 





After formatting:  Go
After formatting:  View
After formatting:  ProvinceLink
After formatting:  Go
After formatting:  View
[False]

19:   Um but I will say um there are really striking views there and there are some great possibilities there that give the sense of almost ah an overlook similar to some of these others that you have up on the bluff
stop words: but, will, say, there, are, really, there, and, there, are, some, there, that, give, the, of, almost, an, to, some, of, these, others, that, you, have, up, on, the, 

Before : I - say - um there are really striking views
Method1: I - say - um striking views
Method2: I - say - um striking views 





After formatting:  Say
After formatting:  UmThereAreReallyStrikingViews
After formatting:  ProvinceLink
After formatting:  Say
After formatting:  UmThereAreReallyStrikingViews
[False]

20:   Um its its variance um with good design you may be able to integrate that with a resting interval uh get up halfway and then you push out um and then you go back up to go on up to what would be the the larger tree house space
stop words: its, its, with, you, may, be, to, that, with, a, get, up, and, then, you, out, and, then, you, go, back, up, to, go, on, up, to, what, would, be, the, the, 

Before : you - go - that
Method1: you - go - that
Method2: - go - 





After formatting:  Go
After formatting:  That
After formatting:  ProvinceLink
After formatting:  Go
After formatting:  That
[False]

21:   Um this is what I do day in and day out
stop words: this, is, what, do, in, and, out, 

Before : I - be - what
Method1: I - be - what
Method2: I - be - 





After formatting:  Be
After formatting:  What
After formatting:  ProvinceLink
After formatting:  Be
After formatting:  What
[False]

22:   Um talk to designers and dream up solutions whether it's huge or um sports facilities or it's-
stop words: to, and, up, whether, it, or, or, 

Before : it - talk - solutions
Method1: it - talk - solutions
Method2: - talk - solutions 





After formatting:  Talk
After formatting:  Solutions
After formatting:  ProvinceLink
After formatting:  Talk
After formatting:  Solutions
[False]

23: a lotta fun to come to into the woods and change gears and do things differently
stop words: a, to, to, into, the, and, and, do, 

Before : provinceLink - fun - things
Method1: provinceLink - fun - things
Method2: provinceLink - fun - things 





After formatting:  Fun
After formatting:  Things
After formatting:  ProvinceLink
After formatting:  Fun
After formatting:  Things
[False]

24:   If it's site design and it goes through my office 
stop words: it, and, it, through, my, 

Before : it - be - site design
Method1: it - be - site design
Method2: - be - site design 





After formatting:  Be
After formatting:  SiteDesign
After formatting:  ProvinceLink
After formatting:  Be
After formatting:  SiteDesign
[False]

25: 
stop words: 

Before : provinceLink -  - 
Method1: provinceLink -  - 
Method2: provinceLink -  - 





After formatting:  
After formatting:  
After formatting:  ProvinceLink
After formatting:  
After formatting:  
[False]

26: So we really envisioned the tree house to be a multipurpose but almost a loading area for the zip line
stop words: we, really, the, to, be, a, but, almost, a, for, the, 

Before : we - envision - the tree house
Method1: we - envision - tree house
Method2: - envision - tree house 





After formatting:  Envision
After formatting:  TheTreeHouse
After formatting:  ProvinceLink
After formatting:  Envision
After formatting:  TheTreeHouse
[False]

27:  It's kind of where that decision was made with the plan zip line you have seen the zip line platform standing on that platform and hope to launch to your right across the valley is what our hope is that will then end up in the 
stop words: of, where, that, was, made, with, the, you, have, the, on, that, and, to, to, your, across, the, is, what, our, is, that, will, then, up, in, the, 

Before : our hope - be - what
Method1: hope - be - what
Method2: hope - be - 





After formatting:  Be
After formatting:  What
After formatting:  ProvinceLink
After formatting:  Be
After formatting:  What
[False]

28: 
stop words: 

Before : provinceLink -  - 
Method1: provinceLink -  - 
Method2: provinceLink -  - 





After formatting:  
After formatting:  
After formatting:  ProvinceLink
After formatting:  
After formatting:  
[False]

29: 
stop words: 

Before : provinceLink -  - 
Method1: provinceLink -  - 
Method2: provinceLink -  - 





After formatting:  
After formatting:  
After formatting:  ProvinceLink
After formatting:  
After formatting:  
[False]

30: 
stop words: 

Before : provinceLink -  - 
Method1: provinceLink -  - 
Method2: provinceLink -  - 





After formatting:  
After formatting:  
After formatting:  ProvinceLink
After formatting:  
After formatting:  
[False]

31: 
stop words: 

Before : provinceLink -  - 
Method1: provinceLink -  - 
Method2: provinceLink -  - 





After formatting:  
After formatting:  
After formatting:  ProvinceLink
After formatting:  
After formatting:  
[False]

32:   Um so really incorporating the tree house as a staging area one but coupled zip line out of the tree house
stop words: so, really, the, as, a, one, but, out, of, the, 

Before : provinceLink - incorporate - the tree house
Method1: provinceLink - incorporate - tree house
Method2: provinceLink - incorporate - tree house 





After formatting:  Incorporate
After formatting:  TheTreeHouse
After formatting:  ProvinceLink
After formatting:  Incorporate
After formatting:  TheTreeHouse
[False]

33:   That's -
stop words: 

Before : That - be - 
Method1: That - be - 
Method2: That - be - 





After formatting:  Be
After formatting:  
After formatting:  ProvinceLink
After formatting:  Be
After formatting:  
[False]

34: that's really the vision to make it a multi multipurpose tree house as well
stop words: that, really, the, to, make, it, a, as, well, 

Before : it - be - the vision
Method1: it - be - vision
Method2: - be - vision 





After formatting:  Be
After formatting:  TheVision
After formatting:  ProvinceLink
After formatting:  Be
After formatting:  TheVision
[False]

35:   Um so that can really be envisioned and that that's gonna take some work to get through that valley
stop words: so, that, can, really, be, and, that, that, take, some, to, get, through, that, 

Before : that - envision - work
Method1: that - envision - work
Method2: - envision - work 





After formatting:  Envision
After formatting:  Work
After formatting:  ProvinceLink
After formatting:  Envision
After formatting:  Work
[False]

36:   
stop words: 

Before : provinceLink -    - 
Method1: provinceLink -    - 
Method2: provinceLink -    - 





After formatting:  
After formatting:  
After formatting:  ProvinceLink
After formatting:  
After formatting:  
[False]

37:   That's what we're hoping for
stop words: what, we, for, 

Before : we - be - 
Method1: we - be - 
Method2: - be - 





After formatting:  Be
After formatting:  
After formatting:  ProvinceLink
After formatting:  Be
After formatting:  
[False]

38:   And that would be you know those - a certain amount of just functionality with the zip line um in terms of you know it's gotta be open
stop words: that, would, be, you, those, a, amount, of, just, with, the, in, of, you, it, be, 

Before : it - get - those
Method1: it - get - those
Method2: - get - 





After formatting:  Get
After formatting:  Those
After formatting:  ProvinceLink
After formatting:  Get
After formatting:  Those
[False]

39:   You've got the cable above you
stop words: the, above, you, 

Before :   You - get - the cable
Method1:   You - get - cable
Method2:    You - get - cable 





After formatting:  Get
After formatting:  TheCable
After formatting:  ProvinceLink
After formatting:  Get
After formatting:  TheCable
[False]

40:   Um basically if you're in a chair or what we call like bag or sling um you've got the open switch gears and we're not running the zip line and we want it to be a tree house then there have got to be some type of security like a gate something like that that closes which also secures us
stop words: if, you, in, a, or, what, we, call, or, you, the, and, we, not, the, and, we, it, to, be, a, then, there, have, to, be, some, of, a, something, that, that, which, also, us, 

Before : you - get - the open switch gears
Method1: you - get - open switch gears
Method2: - get - open switch gears 





After formatting:  Get
After formatting:  TheOpenSwitchGears
After formatting:  ProvinceLink
After formatting:  Get
After formatting:  TheOpenSwitchGears
[False]

41:   Um if you guys have seen some of our challenge courses we have to have access prevention devices um which keeps people from just coming in off the street and climbing on our tower or getting up to a
stop words: if, you, have, some, of, our, we, have, to, have, which, from, just, in, off, the, and, on, our, or, up, to, a, 

Before : which - um - people
Method1: which - um - people
Method2: - um - people 





After formatting:  Um
After formatting:  People
After formatting:  ProvinceLink
After formatting:  Um
After formatting:  People
[False]

42: zip line and things like that
stop words: and, that, 

Before : provinceLink - line - 
Method1: provinceLink - line - 
Method2: provinceLink - line - 





After formatting:  Line
After formatting:  
After formatting:  ProvinceLink
After formatting:  Line
After formatting:  
[False]

43:  So for something like this where we want the tree house to be open and accessible we would have to be able to - for one safety purposes for for kids that are up there close off that opening when we're not running it um but two also for that access prevention
stop words: for, something, this, where, we, the, to, be, and, we, would, have, to, be, to, for, one, for, for, that, are, up, there, off, that, when, we, not, it, but, two, also, for, that, 

Before : we - have - it
Method1: we - have - it
Method2: - have - 





After formatting:  Have
After formatting:  It
After formatting:  ProvinceLink
After formatting:  Have
After formatting:  It
[False]

44: 
stop words: 

Before : provinceLink -  - 
Method1: provinceLink -  - 
Method2: provinceLink -  - 





After formatting:  
After formatting:  
After formatting:  ProvinceLink
After formatting:  
After formatting:  
[False]

45: Are you guys planning on doing the tree house design in your office or or 
stop words: you, on, doing, the, in, your, or, or, 

Before : you guys - plan - design
Method1: guys - plan - design
Method2: guys - plan - design 





After formatting:  Plan
After formatting:  Design
After formatting:  ProvinceLink
After formatting:  Plan
After formatting:  Design
[False]




In [49]:
# to remove WARNINGs from Owlready2
from IPython.display import HTML
HTML('''<script>
code_show_err=false; 
function code_toggle_err() {
 if (code_show_err){
 $('div.output_stderr').hide();
 } else {
 $('div.output_stderr').show();
 }
 code_show_err = !code_show_err
} 
$( document ).ready(code_toggle_err);
</script>
To toggle on/off output_stderr, click <a href="javascript:code_toggle_err()">here</a>.''')


In [59]:
# reference: https://pythonhosted.org/Owlready2/world.html
from owlready2 import *
import rdflib

my_world = World()# Owlready2 stores every triples in a ‘World’ object
onto = my_world.get_ontology("dbpedia.owl").load()

graph = my_world.as_rdflib_graph()
print(len(graph))

prefix = """
PREFIX rdf:<http://www.w3.org/2000/01/rdf-schema#>
PREFIX dbpd:<http://dbpedia.org/ontology/>
"""

'''r = list(graph.query(prefix + """
SELECT ?sub WHERE {
  ?sub rdf:range  dbpd:Province.
}"""))'''

'''r = list(graph.query(prefix + """
ASK {
  dbpd:provinceLink rdf:range  dbpd:Province.
}"""))'''

r = list(graph.query(prefix + """
DESCRIBE ?sub WHERE {
  ?sub rdf:range  dbpd:Province.
}"""))

print(r)



31050




Exception: DESCRIBE not implemented