In [1]:
#Task: Required Installations 

# !pip3 uninstall owlready2 
# !pip3 install -U sentence-transformers
# !pip3 install neo4j-driver
# !pip3 install simpledbf
# !pip3 install pysimplegui

In [6]:
#Task: Import Required Libraries

import re
from owlready2 import *
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import torch
import pprint
import types
from neo4j import GraphDatabase
from simpledbf import Dbf5
import PySimpleGUI as sg
import sqlite3



PyTables is not installed. No support for HDF output.


## Load Windows (File and Ontology Specification)

In [7]:
#Task: Upload Structured Data: CSV or SQL File and convert into Dataset
#Reference: https://www.pysimplegui.org/en/latest/
try:
    sg.theme('DarkAmber')  
    filepath = [sg.FileBrowse()]
    uri = [sg.InputText()]
    layout = [  [sg.Text('Ontology URI'), uri],
                [sg.Text('Data Source file path'), filepath],
                [sg.Button('Save')] ]

    #Task: Create the Window
    window = sg.Window('Ontology Mapping', layout)
    while True:
        event, values = window.read()
        if event == sg.WIN_CLOSED or event == 'Save': 
            break

    window.close()

    #Task: Upload Structured Data: CSV or SQL File and convert into Dataset
    filename = values["Browse"]
    df = pd.read_csv(filename, sep=r";", engine="python") if "csv" in filename else pd.read_sql(filename) 

    #Task: Load ontology
    onto = get_ontology(values[0])
    onto.load()

except Exception as e:
        print("Failed to create GUI:", e)

Failed to create GUI: argument of type 'NoneType' is not iterable


## Functions

In [11]:
def preprocessingCSV():
    #Task: Preprocessing the uploaded file. Clean up Column Names
    keywordsList =[]

    #Check if unique column exists. If not, create one
    if df.iloc[:,0].is_unique == False:
        tempstring = df.columns[0]+" "+df.columns[1]
        df[str(tempstring)] = df.iloc[:,0].astype(str) +" "+  df.iloc[:,1].astype(str) 
        first_col = df.pop(tempstring)
        df.insert(0, str(tempstring), first_col)
        df = df.drop([str(df.columns[1]),str(df.columns[2])], axis=1)
        for i in range(len(df)):
            df[str(tempstring)][i] = str(i)+" "+df[str(tempstring)][i]

    keywordsList = [re.sub("[:,.;'><-=]*","",clUpperCase) for clUpperCase in df.columns]
    df.columns = keywordsList
    tempstring = df.columns[0]

    return keywordsList

def removespaces(var):
    var.replace(" ","")
    return var

def fetchOntologyClasses():
    #Task: Selects All Classes and human readable labels in ontology
    classes = list(onto.classes())
    print(classes)
    newClasses = []
    for i in range(len(classes)):newClasses.append(str(classes[i]))

    return newClasses

def SBert(keywordsList):
    
    newClasses = fetchOntologyClasses()
    
    #Task: Use S-Bert to match keywords to class names
    #Reference: https://www.sbert.net/docs/usage/semantic_textual_similarity.html
    model = SentenceTransformer('all-MiniLM-L6-v2') #preTrained model

    #Compute embedding for both lists
    embeddings1 = model.encode(keywordsList, convert_to_tensor=True)
    embeddings2 = model.encode(newClasses, convert_to_tensor=True)

    #Compute cosine-similarities
    cosine_scores = util.cos_sim(embeddings1, embeddings2)

    #Task: Pick Top Scoring label that matches with keyword as per the cos_sim function, set a threshold and create new class if threshold is not met. 
    result = dict() #Dictionary with Result for SBert
    createClass = dict()
    choices = []

    k = torch.topk(cosine_scores, 5, dim=1, largest=True, sorted=True)
    for i in range(len(keywordsList)):
        for index in range(5):
    #         print("Key Word:{} \tSuggested Class(label):{} \tScore:{:.4f}".format(keywordsList[i], newClasses[k.indices[i][index]], k.values[i][index]))    
            createClass.update({keywordsList[i]:keywordsList[i].replace(" ","")}) if k.values[i][0] < 0.4 else choices.append([keywordsList[i], newClasses[k.indices[i][index]], k.values[i][index]])

    return result, createClass, choices

def rightClassMapping(choices):
    tempdictionary = {}
    layout=[]
    if any(v is not None for v in choices):
        for i in range(len(choices)):
            tempkeyword = choices[i][0]
            if tempkeyword in tempdictionary:
                tempdictionary[tempkeyword].append(choices[i][1])
            else:
                tempdictionary[tempkeyword] = [choices[i][1]]

        layout.append([sg.Text('Confirm if the matched Keyword-Class pair is right',font=(12))])

        for i in tempdictionary:
            layout.append([[sg.Text(i, p=(0, 0 or (5, 0)))],
                          [sg.Combo([tempdictionary[i][0],tempdictionary[i][1],tempdictionary[i][2],
                                    tempdictionary[i][3],tempdictionary[i][4]], default_value=tempdictionary[i][0],key=i,
                                    size=(20,20))]])

        if any(v is not None for v in createClass.keys()):
            layout.append([sg.Text('New classes made for keyword/class pairings:',font=(12),p=(0, 0 or (5, 5)))])
            for newval in createClass:
                layout.append([sg.Text(newval)])

        layout.append([sg.Button('SAVE', font=(12))])

        #Define Window
        win =sg.Window('Customise your Journey',layout)
        e,value=win.read()
        win.close()

    else:
        layout.append([sg.Text('New classes made for keyword/class pairings:',font=(12),p=(0, 0 or (5, 5)))])
        for newval in createClass:
            layout.append([sg.Text(newval)])
        layout.append([sg.Button('SAVE', font=(12))])
        win =sg.Window('Customise your Journey',layout)
        e,value=win.read()
        win.close()

    for i in value:
        result.update({i:value[i]})
    
    return tempdictionary

def createNewClass(createClass):
    # To do: Create New Class
    y = list(createClass.values())
    SuperClass =  onto.Thing #static value (root class)
    for index, val in enumerate(createClass):
        with onto:
            yvalue = y[index]
            NewClass = types.new_class(yvalue, (Thing,))

def specifyDomainAndRange(result, createClass):
    #To do: Add relationship to class: Specify Domain and Range. Everything should be related to the unique column in the database
    combinedDicitonary = {**result, **createClass}
    targetdomain = removespaces(tempstring)
    # m = re.sub("^[^.]*.","",combinedDicitonary[targetdomain])
    targetvalue = combinedDicitonary[targetdomain]
    targetdomainclass = onto.search_one(iri="*"+(targetdomain.replace(".","#")))
    excludecolumn = df.columns[0]

    for index, val in enumerate(createClass):
        if val is not excludecolumn:  
            with onto:
                val = removespaces(str(val))
                class has(targetdomainclass >> onto[str(val)]): 
                    pass
    for index2, val2 in enumerate(result):
        if val2 is not excludecolumn:
            rangeclass = onto.search_one(iri="*"+result[val2])
            if rangeclass is None:
                rangeclass = onto.search_one(is_a=onto[str(re.sub("^[^.]*.","",result[val2]))])
                with onto:
                    class has(targetdomainclass >> rangeclass): 
                        pass

                    
def addInstancesToClasses(result):
    #To do: Add instances to class (get classes, match right key word to the class and add instance to it)
    objectProperties = list(onto.object_properties())
    for index, val in enumerate(result):
        temp = re.sub("^[^.]*.","",result[val])
        rangeclass = onto.search_one(iri="*"+result[val])
        if rangeclass is None:
            rangeclass = onto.search_one(is_a=onto[str(re.sub("^[^.]*.","",result[val]))])
        for col in df:
            if col == val:
                for i, row_value in df[col].iteritems():
                    if (row_value is not None) and (row_value != 0) and (row_value != 0.0):
                        insertvalue = rangeclass(str(row_value))
                        insertvalue.label = str(insertvalue)
                        if col is not excludecolumn:
                            targetdomainclass.instances()[i].has.append(insertvalue)
                            for indexObjectProperty in range(len(objectProperties)):
                                x = objectProperties[indexObjectProperty].domain
                                y = objectProperties[indexObjectProperty].range
                                if (x is not None) and (len(x) == 1) and (x[0] == rangeclass):
                                    indexPosition.append([x[0],insertvalue,i])

def addInstancesToNewClasses(createClass):
    # To do: Add instances to the new classes (get classes, match right key word to the class and add instance to it)
    if bool(createClass) == True:
        for index, val in enumerate(createClass):
              for col in df:
                if col == val:
                    for i, row_value in df[col].iteritems():
                        if (row_value is not None) and (row_value != 0) and (row_value != 0.0):
                            val = removespaces(val)
                            x = onto[str(val)]
                            temp =  x(str(row_value))
                            temp.label = str(temp)
                            if col is not excludecolumn:
                                targetdomainclass.instances()[i].has.append(temp)

In [None]:
def CSVtoOntology():
    keywordsList = preprocessingCSV()
    result, createClass, choices = SBert(onto,keywordsList)
    tempdictionary = rightClassMapping(choices)
    createNewClass(createClass)
    specifyDomainAndRange(result, createClass)
    indexPosition = []
    if targetdomainclass == list(result.values())[0]:
        addInstancesToClasses(result)
        addInstancesToNewClasses(createClass)
    else:
        addInstancesToNewClasses(createClass)
        addInstancesToClasses(result)

In [None]:
CSVtoOntology()

## Save Ontology

In [17]:
# #To do:Save ontology
try:
    onto.save(file = "./ontology_template.rdf", format = "rdfxml")
except ValueError:
    print(ValueError)

## Upload to Neo4j

In [20]:
#Upload ontology to Neo4j for graphing 
class Neo4j_connection: #neo4jconnection #used to be COnnect2Neo4j

    def __init__(self, uri, user, pwd, database_name="neo4j"): #connect to the server; create constraint if not exist; default database is "Neo4j"
        self.__uri = uri
        self.__user = user
        self.__pwd = pwd
        self.__driver = None
        self.__database_name = database_name

        try:
            self.__driver = GraphDatabase.driver(self.__uri, auth=(self.__user, self.__pwd))
        except Exception as e:
            print("Failed to create driver:", e)

        # check if the neccessary config already existed. if not, create.
        query_string = '''
        CALL db.constraints()
                 '''
        a = self.query(query_string, db= self.__database_name)
        constr = str(a)
        try:
            b = constr[constr.find("description='")+13:constr.find(' ON')]
        except Exception as e:
            pass
        if b == 'CONSTRAINT':
            print("Constraint already existed")
        else:
            try:
                query_string = '''
                    CREATE CONSTRAINT n10s_unique_uri FOR (r:Resource) REQUIRE r.uri IS UNIQUE  
                            '''
                self.query(query_string, db = self.__database_name)
            except Exception as e:
                print("Failed to create constraint - check again", e)

        # check if the neccessary config already existed. if not, create.
        query_string = '''
        match(n) return count(n)
        '''
        a = self.query(query_string, db= self.__database_name)
        strin = str(a[0])
        config_check = int(strin[strin.find("=")+1:strin.find(">")])
        if config_check != 0:
            print("Config already existed")
        else:
            try: #setting up neccessary config for neosemantics
                query_string = '''
                    CALL n10s.graphconfig.init()
                                '''
                self.query(query_string, db= self.__database_name)

            except Exception as e:
                print("Failed to initiate graphconfig: ", e)


    def uploading_orx(self, address, db="neo4j"): #uploading OWL, RDF, and XML #i am trying to
        query_string = (
                "call n10s.rdf.import.fetch('"+address+"','RDF/XML',"+"{"+"verifyUriSyntax: false"+"}"+")")
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try:
            session = self.__driver.session(database=db) if db is not None else self.__driver.session()
            response = list(session.run(query_string))
            print("Sucessfully uploaded XML/RDF/OWL file ")
        except Exception as e:
            print("Upload failed:", e)
        finally:
            if session is not None:
                session.close()
        return response


    def uploading_ttl(self, address, db=None): #uploading Turtle files
        query_string = (
                "call n10s.rdf.import.fetch(" + f'{address}'+  ',"Turtle",{verifyUriSyntax: false})')
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try:
            session = self.__driver.session(database=db) if db is not None else self.__driver.session()
            response = list(session.run(query_string))
            print("Sucessfully uploaded TTL file ")
        except Exception as e:
            print("Query failed:", e)
        finally:
            if session is not None:
                session.close()
        return response


    def close(self):
        if self.__driver is not None:
            self.__driver.close()

    def query(self, query, db=None): #query commands of choice.
        assert self.__driver is not None, "Driver not initialized!"
        session = None
        response = None
        try:
            session = self.__driver.session(database=db) if db is not None else self.__driver.session()
            response = list(session.run(query))
        except Exception as e:
            print("Query failed:", e)
        finally:
            if session is not None:
                session.close()
        return response

a = Neo4j_connection("bolt://localhost:7687", "neo4j", "1234567")
a.uploading_orx("file:///Users/jhealynsamson/AP1/ontology_template.rdf","neo4j")

Constraint already existed
Sucessfully uploaded XML/RDF/OWL file 


[<Record terminationStatus='OK' triplesLoaded=2732 triplesParsed=2732 namespaces={'owl': 'http://www.w3.org/2002/07/owl#', 'ns0': 'http://www.semanticweb.org/ontologies/2011/9/Ontology1318785573683.owl#', 'rdfs': 'http://www.w3.org/2000/01/rdf-schema#'} extraInfo='' callParams={'verifyUriSyntax': False}>]