PLP PM - Conversational natural language query of relational and non-relational databases.

Notebook for text2GQL inference

# Load Graph DB

## Load modules

In [None]:
#!pip install neo4j

In [None]:
from neo4j import GraphDatabase
from neo4j.exceptions import ServiceUnavailable
import logging
import spacy
print(spacy.__version__)

3.4.0


## Connect to DB

You will need to serve the Neo4j graph DB first (e.g. using Neo4j desktop), then connect the notebook to the DB as follows:

In [None]:
graph = GraphDatabase.driver(
    "neo4j://localhost:7687",
    auth=("neo4j", "password")
)

In [None]:
#reset DB

query = (
        "MATCH (all_nodes)"
        "OPTIONAL MATCH (all_nodes)-[all_rels]->()"
        "DELETE all_nodes, all_rels"
    )
with graph.session() as session:
    result = session.run(query)

## Insert data into DB

In [None]:
import pandas as pd

df = pd.read_excel(r'QArelview 26Oct v2.xlsx')
#print(df)

In [None]:
df = df[df.Remove != 1]
df = df.reset_index()
QArel = []
for index, row in df.iterrows():
    QArel.append({'Sentence':row['Sentence'],'Question': row['Question'],'Relation': row['Relation'],'Headspan': row['Headspan'],'Tailspan': row['Tailspan']})
print(QArel[0])

{'Sentence': 'IRANIAN SUPPORT \nIsrael sees an outside catalyst for the violence - Iran, which both Hamas and its sometime ally Islamic Jihad say has pledged unlimited assistance for them as the Syrian civil war, where Tehran deployed reinforcements for Damascus, winds down. \n', 'Question': 'What did Tehran deploy reinforcements for?', 'Relation': 'twinned administrative body', 'Headspan': 'Tehran', 'Tailspan': 'Damascus'}


In [None]:
# Selected relation classes

topkrel = ['founded by', 'inception', 'parent organization', 'employer',  'headquarters location',  'located in the administrative territorial entity']

In [None]:
for i,v in enumerate(QArel):
    if i>=0 and (v['Relation'] in topkrel):
        query = (
                "MERGE (node: Entity {name: $name})"
                "RETURN node"
            )
        with graph.session() as session:
            result = session.run(query, name=v['Headspan'])

        query = (
                "MERGE (node: Entity {name: $name})"
                "RETURN node"
            )
        with graph.session() as session:
            result = session.run(query, name=v['Tailspan'])

        query = (
                "MATCH (n1:Entity {name: $name1})"
                "MATCH (n2:Entity {name: $name2})"
                "MERGE (n1) - [r: "+v['Relation'].replace(' ','_')+" ] -> (n2)"
                "RETURN n1, n2, r"
            )
        with graph.session() as session:
            result = session.run(query, name1=v['Headspan'], name2=v['Tailspan'])



# Inference

## Load modules and model files

In [None]:
#!pip install tensorflow==2.6.0
#!pip install keras==2.6

In [None]:
#!python -m spacy download en_core_web_sm

In [None]:
import tensorflow as tf
from tensorflow import keras
import spacy
nlp = spacy.load("en_core_web_sm")
print(tf.__version__)

2.6.0


In [None]:
gql_model = keras.models.load_model("gqlmodel.h5")

In [None]:
import pickle

with open('enc_class.pkl','rb') as f:
    enc_class = pickle.load(f)



In [None]:
import numpy as np
from transformers import AutoTokenizer, AutoModel, pipeline

model = AutoModel.from_pretrained('bert-base-uncased')
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased')
fe = pipeline('feature-extraction', model=model, tokenizer=tokenizer)

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


## Input Question and run inference

Input natural language question

In [None]:
# input_question = ['When was Facebook founded?']
# input_question = ['What company does Google belong to?']
# input_question = ['Who is the founder of Tesla?']

maxlen = [50,768]
input_vec = np.zeros([1,maxlen[0],maxlen[1]])
features = fe(input_question)
features = np.squeeze(features)
input_vec[0,0:features.shape[0],:] = features

Extraction of Node1 and prediction of relation

In [None]:
ques_ent = nlp(input_question[0])
ent_pred = ques_ent.ents[0].text
print(ent_pred)

y_prediction = gql_model.predict(input_vec)
y_prediction = np.argmax(y_prediction, axis = 1)
rel_pred = enc_class.classes_[y_prediction[0]]
print(rel_pred)

Tesla
founded by


Graph query for Node2 as answer

In [None]:
query = (
        "MATCH (n1:Entity {name: $ent1})-[rel:"+rel_pred.replace(" ","_")+"] -> (n2:Entity)"
        "RETURN n1, n2, rel"
    )
with graph.session() as session:
    results = session.run(query, ent1=ent_pred)
    for result in results:
        answer = result['n2']['name']
        print(answer)



Elon Musk
