In [20]:
import csv
import re
import spacy
from spacy import displacy

from owlready2 import *
import rdflib

import re

# pre-processing
def PreProcess(senSet):
    #remove content between [ ]
    print("Pre-processing...")
    for index in range(len(senSet)):
        while senSet[index].find('[')>=0:
            i_start = senSet[index].find('[')
            i_end = senSet[index].find(']')
            s = senSet[index][i_start:i_end+2]
            senSet[index] = senSet[index].replace(s, "")
            
# stopwords from parsing the whole sentence
def RemoveStopword1(phrase, doc, chunkStart, chunkEnd, stopList):
    result = phrase
    i_stop=0
    #start = chunk.start# to eliminate the condition when the first word of chunk is stop word
    for i_sen in range(chunkStart, chunkEnd):
        while i_stop < len(stopList) and stopList[i_stop] < i_sen-1:
            #print(str(stopList[i_stop]) + ' ' + str(i_sen))
            i_stop = i_stop+1
        # there is no stop word in current chunk
        if i_stop >= len(stopList):
            break;
        #print(i_sen)
        # finish going through the chunk
        if stopList[i_stop] > chunkEnd-1:
            break
        # find the stop word and remove it
        if stopList[i_stop] == i_sen-1:
            #print(doc[i_sen-1])
            if i_sen-1 == chunkStart:
                result = result.replace(doc[i_sen-1].text + ' ', '')
                chunkStart = chunkStart+1
            else:
                result = result.replace(' ' + doc[i_sen-1].text, '')
    return result

# stopwords from parsing triple separately
def RemoveStopword2(inputPhrase):
    result = ''
    doc_phrase = nlp(str(inputPhrase))
    for token in doc_phrase:
        #print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,
        #       token.shape_, token.is_alpha, token.is_stop)
        if not token.is_stop:
            result = result + token.text + ' '
        #else:
        #    print(token.text + ', ', end = '')    
    return result


# extract one triple from given sentence
def ExtractTriple(sen):
    # initialize the triple and stop word list
    subj = ""
    pred = ""
    obj = ""
    stopList = []
    
    # parse sentence
    doc = nlp(str(sen))
    print('\n' + str(index) + ': ' + senSet[index])
    
    ## visualize the semantic tree
    #options = {'compact': True, 'color': 'blue'}
    #displacy.serve(doc, style='dep', options=options)
    #displacy.serve(doc, style='dep')

    print('stop words: ', end='')
    for token in doc:
        #print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,
        #      token.shape_, token.is_alpha, token.is_stop)

        # record the index of stop words
        if token.is_stop:
            print(token.text + ', ', end='')
            stopList.append(token.i)
        if re.match('nsubj', token.dep_):   
            subj = token.text
        if re.match('ROOT', token.dep_): 
            pred = token.lemma_
            pred_orig = token.text
        if re.match('dobj', token.dep_): 
            obj = token.text
            '''#an earlier solution that I find not necessary
            obj = token.lemma_
            # to avoid cases like "-PRON-"
            if obj[0] == '-':
                obj = token.text'''
    print('\n')

    subj_1 = subj
    obj_1 = obj
    # using chunk to update subject and object
    for chunk in doc.noun_chunks:
        if chunk.root.head.text == pred_orig and re.match('nsubj', chunk.root.dep_):
            subj = chunk.text
            # remove stop words
            subj_1 = RemoveStopword1(subj, doc, chunk.start, chunk.end, stopList)

        if chunk.root.head.text == pred_orig and re.match('dobj|attr', chunk.root.dep_):
            obj = chunk.text
            # remove stop words
            obj_1 = RemoveStopword1(obj, doc, chunk.start, chunk.end, stopList)
        #print(chunk.text + ' ' + str(chunk.start))
        #print(chunk.text, chunk.root.text, chunk.root.dep_, chunk.root.head.text)

    #print('Before : ' + subj + ' - ' + pred + ' - ' + obj)
    #print('Method1: ' + subj_1 + ' - ' + pred + ' - ' + obj_1)

    # second method to remove stop words
    subj_2 = RemoveStopword2(subj)
    obj_2 = RemoveStopword2(obj)
    #print('Method2: ' + subj_2 + '- ' + pred + ' - ' + obj_2 + '\n')

    return [subj, pred, obj]

# transfer a phrase to a URI form
def FormatURI(phrase, isPred = False):
    #print('Before formatting:  ' + phrase)
    chars = list(phrase)
    if len(chars) > 0 and not isPred:
        chars[0] = chars[0].upper()
    for i in range(len(chars)):
        if chars[i] == ' ' and i+1 < len(chars):
            chars[i+1] = chars[i+1].upper()
    phrase = ''.join(chars)
    phrase = phrase.replace(' ', '')
    phrase = re.sub(r'[^a-zA-Z0-9\s]', '', phrase)
    #print('After formatting:  ' + phrase)
    return phrase

# query the given triple in the ontology with SPARQL
# return true/false as result
def QueryTriple(subj, pred, obj):
    prefix = """
    PREFIX rdf:<http://www.w3.org/2000/01/rdf-schema#>
    PREFIX dbpd:<http://dbpedia.org/ontology/>
    """
    #subj = "provinceLink"
    pred = "range"
    #obj = "Province"
    qSelect = prefix + """
    SELECT ?sub WHERE {
      ?sub rdf:""" + FormatURI(pred) + """ dbpd:""" + FormatURI(obj) + """.
    }"""
    qAsk = prefix + """
    ASK {
        dbpd:""" + FormatURI(subj) + """ rdf:""" + FormatURI(pred) + """ dbpd:""" + FormatURI(obj) + """.
    }"""
    
    r = list(m_graph.query(qAsk))
    return r

def ComponentQuery(subj, pred, obj):
    prefix = """
    PREFIX rdf:<http://www.w3.org/2000/01/rdf-schema#>
    PREFIX dbpd:<http://dbpedia.org/ontology/>
    """
    #subj = "provinceLink"
    #pred = "range"
    #obj = "province"
    
    qAsk = prefix + """
    ASK {
        dbpd:""" + FormatURI(subj) + """ rdf:""" + FormatURI(pred, True) + """ dbpd:""" + FormatURI(obj) + """.
    }"""
    
    qSelect_S = prefix + """
    SELECT ?sub WHERE {
      ?sub rdf:""" + FormatURI(pred, True) + """ dbpd:""" + FormatURI(obj) + """.
    }"""
    
    qSelect_P = prefix + """
    SELECT ?pred WHERE {
      dbpd:""" + FormatURI(subj) + """ ?pred dbpd:""" + FormatURI(obj) + """.
    }"""
    
    qSelect_O = prefix + """
    SELECT ?obj WHERE {
      dbpd:""" + FormatURI(subj) + """ rdf:""" + FormatURI(pred, True) + """ ?obj.
    }"""
    
    qSelect_S_P = prefix + """
    SELECT ?sub ?pred WHERE {
      ?sub ?pred dbpd:""" + FormatURI(obj) + """.
    }"""
    
    qSelect_S_O = prefix + """
    SELECT ?sub ?obj WHERE {
      ?sub rdf:""" + FormatURI(pred, True) + """ ?obj.
    }"""
    
    #print(qSelect_S_P)
    r = list(m_graph.query(qSelect_S_P))
    if r!=[]:
        print(r)
    return r

def PartialQuery(subj, pred, obj):
    prefix = """
    PREFIX rdf:<http://www.w3.org/2000/01/rdf-schema#>
    PREFIX dbpd:<http://dbpedia.org/ontology/>
    """
    
    doc = nlp(str(obj))
    for token in doc:
        print(token.text, token.lemma_, token.pos_, token.tag_, token.dep_,
              token.shape_, token.is_alpha, token.is_stop)
        #qAsk = prefix + """
        #ASK {
        #    dbpd:""" + FormatURI(subj) + """ rdf:""" + FormatURI(pred, True) + """ dbpd:""" + FormatURI(token.lemma) + """.
        #}"""
        #print(qAsk)
        r = ""
    
    return r

# load Spacy NLP dictionary
nlp = spacy.load('en_core_web_sm')

# load DBPD ontology and construct graph for query
m_world = World()# Owlready2 stores every triples in a ‘World’ object
m_onto = m_world.get_ontology("dbpedia.owl").load()
m_graph = m_world.as_rdflib_graph()

# load data
#file = open("shortdataset.csv", "r")
file = open("newdataset_formatted.csv", "r")
reader = csv.reader(file)
senSet = []
for item in reader:
    #format sentences in item as string
    fullP = "".join(item)
    splitP = fullP.split(";", 3);
    splitS = splitP[3][1:len(splitP[3])].split(".");
    #print(splitS)
    for sen in splitS:
        senSet.append(sen)#store the sentence into an array
file.close()
print("Total sentences: " + str(len(senSet)))

# pre-processing
PreProcess(senSet)

# parse and query each sentence
for index in range(len(senSet)):
    #index = 2

    # extract triple from current sentence
    [subj, pred, obj] = ExtractTriple(senSet[index])
    print('Triple for Query   : ' + subj + ' - ' + pred + ' - ' + obj)

    # query the triple in dbpd with SPARQL
    queryResult = QueryTriple(subj, pred, obj)
    # print('Triple Query Result: ' + str(queryResult))

    # query with only a part of the triple
    ComponentQuery(subj, pred, obj)
    #PartialQuery(subj, pred, obj)



Total sentences: 534
Pre-processing...

0: Yeah full detail with everyone here
stop words: full, with, everyone, here, 

Triple for Query   :  - detail - 

1:   So um you guys I'll just do introductions again
stop words: you, just, do, again, 

Triple for Query   : I - do - introductions

2:   Um you guys have met me
stop words: you, have, me, 

Triple for Query   : you guys - meet - me

3:   I'm Tyler
stop words: 

Triple for Query   :   I - be - Tyler

4:   Um I have worked here and I do the recruits department
stop words: have, here, and, do, the, 

Triple for Query   : I - do - the recruits department

5:   Um I also do some property management stuff for our south campus um I'm on call for emergencies and working with maintenance and stuff like that
stop words: also, do, some, for, our, on, call, for, and, with, and, that, 

Triple for Query   : I - be - stuff

6:   Um this Daniel and Janice from Champ Camp 
stop words: this, and, from, 

Triple for Query   :  - um - this Daniel

7


43:   A lot of the times since these kids when you ask them whether they wanna do things inside or outside more times than not they're gonna say they wanna do things outside because they're inside
stop words: of, the, since, these, when, you, them, whether, they, do, or, more, than, not, they, say, they, do, because, they, 

Triple for Query   : they - lot - things

44: 
stop words: 

Triple for Query   :  -  - 

45: And that's their life
stop words: that, their, 

Triple for Query   : that - be - their life

46:   So we want it to be different
stop words: we, it, to, be, 

Triple for Query   : we - want - 

47:   Um so when we talk to kids at camp our camp about what they want um what they hope to do
stop words: so, when, we, to, at, our, about, what, they, what, they, to, do, 

Triple for Query   : they - um - what

48:   They want to push beyond the limit 
stop words: to, beyond, the, 

Triple for Query   :   They - want - 

49: 
stop words: 

Triple for Query   :  -  - 

50: Sorry

Triple for Query   : he - go - thoughts

88: 
stop words: 

Triple for Query   :  -  - 

89: Um and and that's that's enabled Champ Camp to become what it is today because we don't put limitations on what these kids 
stop words: and, and, that, that, to, become, what, it, is, because, we, do, put, on, what, these, 

Triple for Query   : we - be - what

90:   Um people put limitations on 
stop words: put, on, 

Triple for Query   : people - put - limitations

91:   I try to I try to move around those limitation as best we can and that's what you're here 
stop words: to, to, move, around, those, as, we, can, and, that, what, you, here, 

Triple for Query   : you - be - what

92: 
stop words: 

Triple for Query   :  -  - 

93: Video cuts out briefly and comes back in 0:08:20 - different angle due to camera failure]
stop words: out, and, back, in, due, to, 

Triple for Query   : Video - cut - 

94: Yeah there's a lot to be seen there
stop words: there, a, to, be, there, 

Triple for Query 

Triple for Query   :  -  - 

144: 
stop words: 

Triple for Query   :  -  - 

145: 
stop words: 

Triple for Query   :  -  - 

146:   Um so really incorporating the tree house as a staging area one but coupled zip line out of the tree house
stop words: so, really, the, as, a, one, but, out, of, the, 

Triple for Query   :  - incorporate - the tree house

147:   That's -
stop words: 

Triple for Query   : That - be - 

148: that's really the vision to make it a multi multipurpose tree house as well
stop words: that, really, the, to, make, it, a, as, well, 

Triple for Query   : it - be - the vision

149:   Um so that can really be envisioned and that that's gonna take some work to get through that valley
stop words: so, that, can, really, be, and, that, that, take, some, to, get, through, that, 

Triple for Query   : that - envision - work
[(rdflib.term.URIRef('http://dbpedia.org/ontology/chiefEditor'), rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#domain')), (rdflib.term.URI

Triple for Query   : it - get - those

153:   You've got the cable above you
stop words: the, above, you, 

Triple for Query   :   You - get - the cable

154:   Um basically if you're in a chair or what we call like bag or sling um you've got the open switch gears and we're not running the zip line and we want it to be a tree house then there have got to be some type of security like a gate something like that that closes which also secures us
stop words: if, you, in, a, or, what, we, call, or, you, the, and, we, not, the, and, we, it, to, be, a, then, there, have, to, be, some, of, a, something, that, that, which, also, us, 

Triple for Query   : you - get - the open switch gears

155:   Um if you guys have seen some of our challenge courses we have to have access prevention devices um which keeps people from just coming in off the street and climbing on our tower or getting up to a
stop words: if, you, have, some, of, our, we, have, to, have, which, from, just, in, off, the, and, on,

Triple for Query   : kids - be - a lot

207:   Typically it's pretty common at least in our activities where we do ah some type of arts or arts and crafts
stop words: it, at, least, in, our, where, we, do, some, of, or, and, 

Triple for Query   : it - be - type
[(rdflib.term.URIRef('http://dbpedia.org/ontology/GovernmentType'), rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#subClassOf')), (rdflib.term.URIRef('http://dbpedia.org/ontology/DocumentType'), rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#subClassOf'))]

208:   So we we modify it everywhere so it's either ah -
stop words: we, we, it, everywhere, so, it, either, 

Triple for Query   : it - be - it

209: from making making mobile art or some areas of wheelchair art and there's a whole whole litany of 
stop words: from, or, some, of, and, there, a, whole, whole, of, 

Triple for Query   :  - from - art

210:   As long as we can incorporate it so they need see some type of activity like that
stop words: as, w


261: 
stop words: 

Triple for Query   :  -  - 

262: - yeah
stop words: 

Triple for Query   :  - yeah - 

263:   You know obviously there's there's some sense of you know not doing the finished design as Matt was talking about obviously there's weight considerations structural stuff that needs to be gone through
stop words: there, there, some, of, you, not, doing, the, as, was, about, there, that, to, be, through, 

Triple for Query   : that - be - weight considerations

264:   Um by all means rough designs or designs plural that incorporate some of the ideas I was hoping you guys could add and in terms of the kids have had
stop words: by, all, or, that, some, of, the, was, you, could, and, in, of, the, have, had, 

Triple for Query   : guys - have - some

265:  Some of the kids have had are awesome
stop words: of, the, have, had, are, 

Triple for Query   : Some - be - 

266: 
stop words: 

Triple for Query   :  -  - 

267: This is a big great space so design elements whether parti

Triple for Query   : I - think - 

326:   It's gonna be really long
stop words: be, really, 

Triple for Query   :   It - go - 

327:   So -
stop words: 

Triple for Query   :  - so - 

328: we have this really long entryway
stop words: we, have, this, really, 

Triple for Query   : we - have - this really long entryway

329:   In terms of safety is there a concern if the child would need to be evacuated from the tree house quickly - there needed to be quick route out that's not that really long ramp?
stop words: of, is, there, a, if, the, would, to, be, from, the, there, to, be, out, that, not, that, really, 

Triple for Query   : that - need - 

330: The zip line
stop words: 

Triple for Query   :  - line - 

331: 
stop words: 

Triple for Query   :  -  - 

332: Is that - - I can't make a call if that's acceptable
stop words: that, ca, make, a, call, if, that, 

Triple for Query   : that - be - call

333:   
stop words: 

Triple for Query   :  -    - 

334: I think the first thing wi


391: 
stop words: 

Triple for Query   :  -  - 

392: That could that could be possible
stop words: could, that, could, be, 

Triple for Query   : that - be - 

393:   You'd have a you'd have to have a one -
stop words: have, a, you, have, to, have, a, one, 

Triple for Query   : you - have - one

394: Video cuts out and comes back in 0:31:06]
stop words: out, and, back, in, 

Triple for Query   : Video - cut - 

395: Would we be able to see the treehouse and all of this when it is completed?
stop words: we, be, to, see, the, and, all, of, this, when, it, is, 

Triple for Query   : we - be - treehouse

396: Would you be able to?
stop words: you, be, to, 

Triple for Query   : you - be - 

397: Would we be able to like come back
stop words: we, be, to, back, 

Triple for Query   : we - be - 

398: 
stop words: 

Triple for Query   :  -  - 

399: Oh yeah
stop words: 

Triple for Query   :  - oh - 

400: 
stop words: 

Triple for Query   :  -  - 

401: Absolutely
stop words: 

Triple for

Triple for Query   : what - be - fireplace

458:   Some place where there can just be a a fire ring like you have in in a number of places
stop words: where, there, can, just, be, a, a, you, have, in, in, a, of, 

Triple for Query   : you - place - 

459:   It's very expensive but in this case since this - would that be a really exciting feature
stop words: very, but, in, this, since, this, would, that, be, a, really, 

Triple for Query   : that - in - 

460:   And then is that worth the number of dollars it would take for the masonry and construction ah involved
stop words: then, is, that, the, of, it, would, take, for, the, and, 

Triple for Query   : it - be - 

461:   Um always balancing those two things
stop words: always, those, two, 

Triple for Query   :  - balance - those two things

462:   But ah just thinking in terms of wow factors
stop words: just, in, of, 

Triple for Query   :  - think - 

463:   What's up there to make these kids you know experience something they aren'

Triple for Query   : I - be - adaptations

515:   You all can sit in it and make it work
stop words: all, can, in, it, and, make, it, 

Triple for Query   :   You - sit - 

516:    So as as an executive of my team my next question would be and so when can expect something back?
stop words: as, as, an, of, my, my, next, would, be, and, so, when, can, something, back, 

Triple for Query   : my next question - be - something

517: Good question
stop words: 

Triple for Query   :  - question - 

518: 
stop words: 

Triple for Query   :  -  - 

519: What kind of timeframe are you looking at on creating these ideas?
stop words: of, are, you, at, on, these, 

Triple for Query   : you - look - ideas

520: As far as our class goes I think we have until Friday Friday
stop words: as, our, we, have, until, 

Triple for Query   : I - think - 

521:   So Friday 
stop words: 

Triple for Query   :  - so - 

522:   
stop words: 

Triple for Query   :  -    - 

523: We have design review presentation t

In [4]:
# to remove WARNINGs from Owlready2
from IPython.display import HTML
HTML('''<script>
code_show_err=false; 
function code_toggle_err() {
 if (code_show_err){
 $('div.output_stderr').hide();
 } else {
 $('div.output_stderr').show();
 }
 code_show_err = !code_show_err
} 
$( document ).ready(code_toggle_err);
</script>
To toggle on/off output_stderr, click <a href="javascript:code_toggle_err()">here</a>.''')


In [59]:
# reference: https://pythonhosted.org/Owlready2/world.html
from owlready2 import *
import rdflib

my_world = World()# Owlready2 stores every triples in a ‘World’ object
onto = my_world.get_ontology("dbpedia.owl").load()

graph = my_world.as_rdflib_graph()
print(len(graph))

prefix = """
PREFIX rdf:<http://www.w3.org/2000/01/rdf-schema#>
PREFIX dbpd:<http://dbpedia.org/ontology/>
"""

'''r = list(graph.query(prefix + """
SELECT ?sub WHERE {
  ?sub rdf:range  dbpd:Province.
}"""))'''

'''r = list(graph.query(prefix + """
ASK {
  dbpd:provinceLink rdf:range  dbpd:Province.
}"""))'''

r = list(graph.query(prefix + """
DESCRIBE ?sub WHERE {
  ?sub rdf:range  dbpd:Province.
}"""))

print(r)



31050




Exception: DESCRIBE not implemented