# This notebook uses the NEO4J graph database to associate acts and regulations 
### The purpose of using a graph database is to understand how graphs can be connected for better retrieval. 
#### There are a few advantages of using a graph database over traditional databasese and this notebook tries to explore more advantages
- It is easier to grow the data in this database without any complex migration scripts or ORM
- Much easier to link different data

This notebook is inspired byy content from these sources:
- Deep learning Knowledge graph for RAG - https://www.deeplearning.ai/short-courses/knowledge-graphs-rag/
- https://python.langchain.com/docs/integrations/providers/neo4j
- https://neo4j.com/developer-blog/advanced-rag-strategies-neo4j/
- Take a step back: https://arxiv.org/pdf/2310.06117.pdf
- Open AI blog - https://cookbook.openai.com/examples/rag_with_graph_db

In [None]:
!pip install langchain
!pip install neo4j
!pip install bs4
!pip install llama-index
#!pip uninstall -y trulens_eval
!pip install trulens-eval==0.25.1
!pip install llmlingua

In [None]:
import os
from bs4 import BeautifulSoup
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain.text_splitter import RecursiveCharacterTextSplitter, SentenceTransformersTokenTextSplitter
from langchain.chains import RetrievalQAWithSourcesChain
import warnings
from langchain_community.document_loaders import DirectoryLoader
from llama_index.core import SimpleDirectoryReader, StorageContext
warnings.filterwarnings("ignore")
from langchain_community.embeddings import HuggingFaceEmbeddings
from sentence_transformers import CrossEncoder
import numpy as np
from trulens_eval import Tru
from trulens_eval.tru_custom_app import instrument
from llmlingua import PromptCompressor
#from llama_index.indices.postprocessor import SentenceTransformerRerank

In [None]:
def word_wrap(string, n_chars=72):
    # Wrap a string at the next space after n_chars
    if len(string) < n_chars:
        return string
    else:
        return string[:n_chars].rsplit(' ', 1)[0] + '\n' + word_wrap(string[len(string[:n_chars].rsplit(' ', 1)[0])+1:], n_chars)

In [None]:
definition = None

In [None]:
NEO4J_URI = 'bolt://' + os.getenv('NEO4J_HOST') + ':7687'
NEO4J_USERNAME = os.getenv('NEO4J_USER')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
NEO4J_DATABASE = 'neo4j' #os.getenv('NEO4J_DB')
print(NEO4J_URI)
print(NEO4J_DATABASE)

In [None]:
kg = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE
)

In [None]:
from trulens_eval import Tru
from trulens_eval.tru_custom_app import instrument
TRULENS_USER = os.getenv('TRULENS_USER')
TRULENS_PASSWORD = os.getenv('TRULENS_PASSWORD')
TRULENS_DB = os.getenv('TRULENS_DB')
TRULENS_PORT = os.getenv('TRULENS_PORT')
TRULENS_HOST = os.getenv('TRULENS_HOST')

TRULENS_CONNECTION_STRING = f'postgresql+psycopg2://{TRULENS_USER}:{TRULENS_PASSWORD}@{TRULENS_HOST}:{TRULENS_PORT}/{TRULENS_DB}'
tru = Tru(database_url=TRULENS_CONNECTION_STRING)

In [None]:
cypher = """
  MATCH (n) 
  RETURN count(n)
  """

In [None]:
result = kg.query(cypher)
result

In [None]:
# Fetch all HTML
file_metadata = lambda x: {"filename": x}
Acts_documents = SimpleDirectoryReader("./HTML_Acts",file_metadata=file_metadata).load_data()
Regulations_documents = SimpleDirectoryReader("./HTML_Regulations",file_metadata=file_metadata).load_data()
print((len(Acts_documents)))
print((len(Regulations_documents)))

In the next section we try to loop through all the Acts

In [None]:
print(Regulations_documents[1].metadata['filename'])
#for key in Regulations_documents[100]:
#   print ((key))
print(Regulations_documents[100].get_text())

In [None]:
def get_title(soup):
    title = soup.find_all("h2")
    title = title[0].get_text().strip()
    return title

In [None]:
def get_definitions(sections):
    for index, section in enumerate(sections):
        heading = section.find("h4")
        print(heading.get_text())
        if 'Definition' in heading.get_text():
            definition = section
            return definition

In [None]:
def get_preamble(soup):
    preamble = soup.find_all("div", class_='preamble')
    if preamble:
        print(preamble[0].get_text())

In [None]:
def split_data_from_file(file, soup):
    chunks_with_metadata = [] # use this to accumlate chunk records
    #print(f'Processing {file}') 
    item_text = file #file_as_object[item] # grab the text of the item
    item_text_chunks = text_splitter.split_text(item_text) # split the text into chunks
    token_splitter = SentenceTransformersTokenTextSplitter(chunk_overlap=20, tokens_per_chunk=256)
    token_split_texts = []
    for text in item_text_chunks:
        token_split_texts += token_splitter.split_text(text)
    #print(word_wrap(token_split_texts[10]))
    title  = get_title(soup)
    print(f"\nTitle:{title} Total chunks:{len(token_split_texts)}")
    #print(title)
    chunk_seq_id = 0
    for chunk in token_split_texts: # only take the first 20 chunks
        #form_id = file[file.rindex('/') + 1:file.rindex('.')] # extract form id from file name
        # finally, construct a record with metadata and the chunk text
        chunks_with_metadata.append({
            'text': chunk, 
            # metadata from looping...
            'chunkSeqId': chunk_seq_id,
            'chunkId': f'{title}-chunk{chunk_seq_id:04d}',
            'ActId': f'{title}',
            # constructed metadata...
            # metadata from file...
        })
        chunk_seq_id += 1
        #print(f'\tSplit into {chunk_seq_id} chunks')
    return chunks_with_metadata

In [None]:
merge_chunk_node_query = """
MERGE(mergedChunk:Chunk {chunkId: $chunkParam.chunkId})
    ON CREATE SET 
        mergedChunk.chunkSeqId = $chunkParam.chunkSeqId, 
        mergedChunk.text = $chunkParam.text,
        mergedChunk.ActId = $chunkParam.ActId
RETURN mergedChunk
"""

In [None]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [None]:
for index, Acts in enumerate(Acts_documents):
    soup = BeautifulSoup(Acts.get_text(), 'html.parser')
    #sections = soup.find_all("div", class_='section')
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 1000,
        chunk_overlap  = 200,
        length_function = len,
        separators=["\n\n", "\n", ". ", " ", ""],
    )
    item1_text = soup.get_text()
   # item1_text_chunks = text_splitter.split_text(item1_text)
    first_file_chunks = split_data_from_file(item1_text, soup)
    #print(first_file_chunks[0])
    kg.query(merge_chunk_node_query, 
         params={'chunkParam':first_file_chunks[0]})
    kg.query("""
CREATE CONSTRAINT unique_chunk IF NOT EXISTS 
    FOR (c:Chunk) REQUIRE c.chunkId IS UNIQUE
""")
    node_count = 0
    for chunk in first_file_chunks:
        #print(f"Creating `:Chunk` node for chunk ID {chunk['chunkSeqId']}")
        kg.query(merge_chunk_node_query, 
                params={
                    'chunkParam': chunk
                })
        node_count += 1
    #print(f"Created {node_count} nodes")
    kg.query("""
         MATCH (n)
         RETURN count(n) as nodeCount
         """)
    kg.query("""
         CREATE VECTOR INDEX `Acts_chunks` IF NOT EXISTS
          FOR (c:Chunk) ON (c.textEmbedding) 
          OPTIONS { indexConfig: {
            `vector.dimensions`: 384,
            `vector.similarity_function`: 'cosine'    
         }}
""")
    # Create the embeddings
    for chunk in first_file_chunks:
        query_result = embeddings.embed_query(chunk['text'])
        #print(chunk['chunkId'])
        match =        kg.query("""
        MATCH (chunk:Chunk) WHERE
        chunk.textEmbedding IS NULL
        AND chunk.chunkId = $chunkId
        AND chunk.chunkSeqId = $chunkSeqId
        RETURN chunk
        """,
        params={"chunkSeqId": chunk['chunkSeqId'], "chunkId": chunk['chunkId'], "ActId":chunk['ActId'] })
        #print(match)
        kg.query("""
        MATCH (chunk:Chunk) WHERE
        chunk.textEmbedding IS NULL
        AND chunk.chunkSeqId = $chunkSeqId
        AND chunk.chunkId = $chunkId
        CALL db.create.setNodeVectorProperty(chunk, "textEmbedding", $vector)
    """, 
    params={"chunkSeqId": chunk['chunkSeqId'], "chunkId": chunk['chunkId'], "ActId":chunk['ActId'], "vector": query_result} )
    kg.query("SHOW INDEXES")
    #break;

In [None]:
kg.refresh_schema()
print(kg.schema)

In [None]:
def neo4j_vector_search(question):
  """Search for similar nodes using the Neo4j vector index"""
  query_embedding = embeddings.embed_query(question)  
  vector_search_query = """
    CALL db.index.vector.queryNodes($index_name, $top_k, $question) yield node, score
    RETURN score, node.ActId, node.RegId, node.text AS text
  """
  similar = kg.query(vector_search_query, 
                     params={
                      'question': query_embedding, 
                      'index_name':'Acts_chunks', 
                      'top_k': 10})
  return similar

In [None]:
query = 'When an employee is fired what needs to be done next?'
search_results = neo4j_vector_search(query)

### Doing a rerank using cross-encoder

In [None]:
search_results[0]['node.ActId']

In [None]:
cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

In [None]:
# BAAI/bge-reranker-base
# link: https://huggingface.co/BAAI/bge-reranker-base
#rerank = SentenceTransformerRerank(
#    top_n=2, model="BAAI/bge-reranker-base"
#)

In [None]:
pairs = [[query, doc['text']] for doc in search_results]
#print(pairs)
scores = cross_encoder.predict(pairs)
print("Scores:")
for score in scores:
    print(score)

In [None]:
print("New Ordering:")
print(np.argsort(scores)[::-1][0])
for o in np.argsort(scores)[::-1]:
    print(o)
    print(search_results[o]['text'])
    break


# Get the LLM Model

In [None]:
import openllm

llm = openllm.LLM(
    "google/flan-t5-base",
    temperature=0.1,
    repetition_penalty=1.2,
    backend='pt'
)

In [None]:
import asyncio

import nest_asyncio



nest_asyncio.apply()


async def main(query):
  previous_texts = ''  
  async for gen in llm.generate_iterator(query, max_new_tokens=1024):
      print(gen.outputs[0].text, flush=True, end='')
      previous_texts += gen.outputs[0].text
  return previous_texts


asyncio.run(main("What age can we drink in B.C?"))

In [None]:
def retrieval(query_str):
    search_results = neo4j_vector_search(query_str)
    return search_results

In [None]:
def rerank(search_results):
    pairs = [[query, doc['text']] for doc in search_results]
    scores = cross_encoder.predict(pairs)
    print("New Ordering:")
    for o in np.argsort(scores)[::-1]:
        print(o)   
        #print(search_results[o])
    return "( " + search_results[np.argsort(scores)[::-1][0]]['node.ActId']  + ')\n' + search_results[np.argsort(scores)[::-1][0]]['text'] + "\n\n( " + search_results[np.argsort(scores)[::-1][1]]['node.ActId']  + ')\n ' + search_results[np.argsort(scores)[::-1][1]]['text'] + "\n\n( " + search_results[np.argsort(scores)[::-1][2]]['node.ActId']  + ' )\n' + search_results[np.argsort(scores)[::-1][2]]['text']

In [None]:
## Or use LLMLingua-2-small model
llm_lingua = PromptCompressor(
    model_name="microsoft/llmlingua-2-bert-base-multilingual-cased-meetingbank",
    use_llmlingua2=True, # Whether to use llmlingua-2,
    device_map="cpu"
)

In [None]:
class RAG_from_scratch:
    @instrument
    def retrieve(self, query: str) -> list:
        """
        Retrieve relevant text from vector store.
        """
        return retrieval(query)

    @instrument
    def reranked(self, search_results) -> str:
        return rerank(search_results)

    def genprompt(self, query: str, context_str: list) -> str:
        """
        Generate answer from context.
        """
        messages=f"""
Use the following pieces of information to answer the user's question.
Laws and Acts can be used interchangeably.
If the answer is not in the documents, just say that you don't know. 
Don't try to make up an answer.

Context: 

{context_str}

Question: 

{query}
Only return the helpful answer below and nothing else.
                    """
        return messages
        
    @instrument 
    def promptcompression(self, prompt, query) ->str:
        compressed_prompt = llm_lingua.compress_prompt(prompt, instruction="", question="", target_token=200)
        return compressed_prompt['compressed_prompt']

    @instrument
    def generate_completion(self, compressed_prompt:str) -> str:
        print(compressed_prompt)
        completion = asyncio.run(main(compressed_prompt))
        return completion

    @instrument
    def query(self, query: str) -> str:
        context_str = self.retrieve(query)
        rerank = self.reranked(context_str)
        prompt = self.genprompt(query, rerank)
        print(prompt)
        compressed_prompt = self.promptcompression(prompt, query)
        completion = self.generate_completion(compressed_prompt)
        return completion

rag = RAG_from_scratch()

In [None]:
rag.query("I’m looking to dispute a will, which laws are applied?")

In [None]:
from trulens_eval import TruCustomApp
tru_rag = TruCustomApp(rag,
    app_id = 'NEO_4J',
    #feedbacks = [f_groundedness, f_answer_relevance, f_context_relevance]
    )

In [None]:
with tru_rag as recording:
    rag.query("I’m looking to dispute a will, which laws are applied?")

In [None]:
from ipywidgets import Button, HBox, VBox
from IPython.display import display

thumbs_up_button = Button(description='👍')
thumbs_down_button = Button(description='👎')

human_feedback = None

def on_thumbs_up_button_clicked(b):
    global human_feedback
    human_feedback = 1

def on_thumbs_down_button_clicked(b):
    global human_feedback
    human_feedback = 0

thumbs_up_button.on_click(on_thumbs_up_button_clicked)
thumbs_down_button.on_click(on_thumbs_down_button_clicked)

HBox([thumbs_up_button, thumbs_down_button])

In [None]:
record = recording.get()
print(human_feedback)
tru.add_feedback(
    name="Human Feedack",
    record_id=record.record_id,
    app_id=tru_rag.app_id,
    result=human_feedback
)

In [None]:
records, feedback = tru.get_records_and_feedback(app_ids=[])
records.tail(5)

In [None]:

tru.run_dashboard(address='0.0.0.0', force=True)

### Loop through all laws, associate all the chunks, make a parent ACT node and attaching the children chunks to the corresponding parent chunk

In [None]:
def act_info_list_fn(actId):
    cypher = """
      MATCH (anyChunk:Chunk) 
      WHERE anyChunk.ActId = $ActId
      WITH anyChunk LIMIT 1
      RETURN anyChunk { .ActId } as ActInfo
    """
    act_info_list = kg.query(cypher, params={'ActId': actId})
    return act_info_list

### Connect chunks to their parent form with a PART_OF relationship

In [None]:
def create_parent_act_node(act_info):
    cypher = """
        MERGE (f:Act {ActId: $formInfoParam.ActId })
          ON CREATE 
            SET f.ActId = $formInfoParam.ActId
            """
    kg.query(cypher, params={'formInfoParam': act_info})

### Add a NEXT relationship between subsequent chunks
- Use the `apoc.nodes.link` function from Neo4j to link ordered list of `Chunk` nodes with a `NEXT` relationship
- Do this for just the "Item 1" section to start

In [None]:
def create_chunk_relationship(act_info):
    cypher = """
  MATCH (from_same_section:Chunk)
  WHERE from_same_section.ActId = $ActParam
  WITH from_same_section
    ORDER BY from_same_section.chunkSeqId ASC
  WITH collect(from_same_section) as section_chunk_list
    CALL apoc.nodes.link(
        section_chunk_list, 
        "NEXT", 
        {avoidDuplicates: true}
    )  // NEW!!!
  RETURN size(section_chunk_list)
"""
    kg.query(cypher, params={'ActParam': act_info['ActId']})

In [None]:
def connect_chunk_to_parent():
    cypher = """
      MATCH (c:Chunk), (f:Act)
        WHERE c.ActId = f.ActId
      MERGE (c)-[newRelationship:PART_OF]->(f)
      RETURN count(newRelationship)
    """
    kg.query(cypher)

In [None]:
for index, Acts in enumerate(Acts_documents):
    soup = BeautifulSoup(Acts.get_text(), 'html.parser')
    title  = get_title(soup)
    act_info_lists = act_info_list_fn(title)
    for act_info_list in act_info_lists:
        act_info = act_info_list['ActInfo']        
        create_parent_act_node(act_info)
        create_chunk_relationship(act_info)
        connect_chunk_to_parent()

In [None]:
kg.query("SHOW INDEXES")

## Doing the same steps as above for Regulations

In [None]:
merge_regchunk_node_query = """
MERGE(mergedChunk:Chunk {chunkId: $chunkParam.chunkId})
    ON CREATE SET 
        mergedChunk.chunkSeqId = $chunkParam.chunkSeqId, 
        mergedChunk.text = $chunkParam.text,
        mergedChunk.ActId = $chunkParam.ActId,
        mergedChunk.RegId = $chunkParam.RegId
RETURN mergedChunk
"""

In [None]:
def split_regdata_from_file(file, soup):
    chunks_with_metadata = [] # use this to accumlate chunk records
    #print(f'Processing {file}') 
    item_text = file #file_as_object[item] # grab the text of the item
    item_text_chunks = text_splitter.split_text(item_text) # split the text into chunks
    token_splitter = SentenceTransformersTokenTextSplitter(chunk_overlap=20, tokens_per_chunk=256)
    token_split_texts = []
    for text in item_text_chunks:
        token_split_texts += token_splitter.split_text(text)
    #print(word_wrap(token_split_texts[10]))
    title = soup.find_all("h2")
    act_name = reg_name = title[0].get_text().strip()
    reg_name = title[1].get_text().strip()
    print(f"\nAct:{act_name} Reg:{reg_name} Total chunks:{len(token_split_texts)}")
    #print(title)
    chunk_seq_id = 0
    for chunk in token_split_texts: # only take the first 20 chunks
        #form_id = file[file.rindex('/') + 1:file.rindex('.')] # extract form id from file name
        # finally, construct a record with metadata and the chunk text
        chunks_with_metadata.append({
            'text': chunk, 
            # metadata from looping...
            'chunkSeqId': chunk_seq_id,
            'chunkId': f'{act_name}-{reg_name}-chunk{chunk_seq_id:04d}',
            'ActId': f'{act_name}',
            'RegId':f'{reg_name}',
            # constructed metadata...
            # metadata from file...
        })
        chunk_seq_id += 1
        #print(f'\tSplit into {chunk_seq_id} chunks')
    return chunks_with_metadata

In [None]:
def check_Act_exists(act_id):
    cypher = """
    MATCH (n:Act) WHERE
    n.ActId CONTAINS $ActId
    RETURN n
    """
    return kg.query(cypher, params={'ActId': act_id})

In [None]:
act_exists = check_Act_exists("Transport")
print(len(act_exists))

In [None]:
total_regs = []
for index, Regulation in enumerate(Regulations_documents):
    # Check if ACt exists, if not we skip
    soup = BeautifulSoup(Regulation.get_text(), 'html.parser')
    # Check if ACt exists, if not we skip
    act = get_title(soup)
    check_act = check_Act_exists(act)
    if not len(check_act):
        print(f"\n{act} does not exist. Skipping ...")
        continue;
    else:
        print(f"\n{act} exists")
        
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 1000,
        chunk_overlap  = 200,
        length_function = len,
        separators=["\n\n", "\n", ". ", " ", ""],
    )
    item1_text = soup.get_text()
    first_file_chunks = split_regdata_from_file(item1_text, soup)

    kg.query(merge_regchunk_node_query, 
     params={'chunkParam':first_file_chunks[0]})
    kg.query("""
CREATE CONSTRAINT unique_chunk IF NOT EXISTS 
    FOR (c:Chunk) REQUIRE c.chunkId IS UNIQUE
""")
    node_count = 0
    for chunk in first_file_chunks:
        #print(f"Creating `:Chunk` node for chunk ID {chunk['chunkSeqId']}")
        kg.query(merge_regchunk_node_query, 
                params={
                    'chunkParam': chunk
                })
        node_count += 1
    #print(f"Created {node_count} nodes")
    kg.query("""
         MATCH (n)
         RETURN count(n) as nodeCount
         """)
    kg.query("""
         CREATE VECTOR INDEX `Acts_chunks` IF NOT EXISTS
          FOR (c:Chunk) ON (c.textEmbedding) 
          OPTIONS { indexConfig: {
            `vector.dimensions`: 384,
            `vector.similarity_function`: 'cosine'    
         }}
""")
    # Create the embeddings
    for chunk in first_file_chunks:
        query_result = embeddings.embed_query(chunk['text'])
        #print(chunk['chunkId'])
        match =        kg.query("""
        MATCH (chunk:Chunk) WHERE
        chunk.textEmbedding IS NULL
        AND chunk.chunkId = $chunkId
        AND chunk.chunkSeqId = $chunkSeqId
        AND chunk.ActId = $ActId 
        AND chunk.RegId = $RegId
        RETURN chunk
        """,
        params={"chunkSeqId": chunk['chunkSeqId'], "chunkId": chunk['chunkId'], "ActId":chunk['ActId'], "RegId":chunk['RegId'] })
        #print(match)
        kg.query("""
        MATCH (chunk:Chunk) WHERE
        chunk.textEmbedding IS NULL
        AND chunk.chunkSeqId = $chunkSeqId
        AND chunk.chunkId = $chunkId
        AND chunk.ActId = $ActId 
        AND chunk.RegId = $RegId
        CALL db.create.setNodeVectorProperty(chunk, "textEmbedding", $vector)
    """, 
    params={"chunkSeqId": chunk['chunkSeqId'], "chunkId": chunk['chunkId'], "ActId":chunk['ActId'], "vector": query_result, "RegId":chunk['RegId']} )
    kg.query("SHOW INDEXES")
    #break;
    #act = title  = get_title(soup)
    #title = soup.find_all("h2")
    #reg_name = title[1].get_text()
    #total_acts.append(act)
    #print(act)
    #print(reg_name.strip() + '\n\n')

    


In [None]:
def create_parent_reg_node(reg_info):
    cypher = """
        MERGE (f:Reg {RegId: $formInfoParam.RegId })
          ON CREATE SET 
          f.RegId = $formInfoParam.RegId,
          f.ActId = $formInfoParam.ActId
            """
    kg.query(cypher, params={'formInfoParam': reg_info})

In [None]:
def reg_info_list_fn(regId):
    cypher = """
      MATCH (anyChunk:Chunk) 
      WHERE anyChunk.RegId = $RegId
      WITH anyChunk LIMIT 1
      RETURN anyChunk { .RegId, .ActId } as RegInfo
    """
    reg_info_list = kg.query(cypher, params={'RegId': regId})
    return reg_info_list

In [None]:
def create_regchunk_relationship(reg_info):
    cypher = """
  MATCH (from_same_section:Chunk)
  WHERE from_same_section.RegId = $RegParam
  WITH from_same_section
    ORDER BY from_same_section.chunkSeqId ASC
  WITH collect(from_same_section) as section_chunk_list
    CALL apoc.nodes.link(
        section_chunk_list, 
        "NEXT", 
        {avoidDuplicates: true}
    )  // NEW!!!
  RETURN size(section_chunk_list)
"""
    kg.query(cypher, params={'RegParam': reg_info['RegId']})

In [None]:
def connect_regchunk_to_parent():
    cypher = """
      MATCH (c:Chunk), (f:Reg)
        WHERE c.RegId = f.RegId
      MERGE (c)-[newRelationship:PART_OF]->(f)
      RETURN count(newRelationship)
    """
    kg.query(cypher)

In [None]:
for index, Reg in enumerate(Regulations_documents):
    soup = BeautifulSoup(Reg.get_text(), 'html.parser')
    title = soup.find_all("h2")
    reg_name = title[1].get_text().strip()
    reg_info_lists = reg_info_list_fn(reg_name)
    print(reg_info_lists)
    for reg_info_list in reg_info_lists:
        reg_info = reg_info_list['RegInfo']        
        create_parent_reg_node(reg_info)
        create_regchunk_relationship(reg_info)
        connect_regchunk_to_parent()

# Create the law node and connect the Regulations and Act

In [None]:
def create_parent_law_node(law_info):
    cypher = """
        MERGE (f:Law {LawId: $formInfoParam.ActId })
          ON CREATE SET 
          f.LawId = $formInfoParam.ActId
            """
    kg.query(cypher, params={'formInfoParam': law_info})

In [None]:
def law_info_list_fn(LawId):
    cypher = """
      MATCH (anyAct:Act) 
      WHERE anyAct.ActId = $LawId
      WITH anyAct LIMIT 1
      RETURN anyAct { .ActId } as LawInfo
    """
    law_info_list = kg.query(cypher, params={'LawId': LawId})
    return law_info_list

In [None]:
def connect_act_parentlaw():
    cypher = """
      MATCH (c:Act), (f:Law)
        WHERE c.ActId = f.LawId
      MERGE (c)-[newRelationship:ACT]->(f)
      RETURN count(newRelationship)
    """
    kg.query(cypher)

In [None]:
def connect_reg_parentlaw():
    cypher = """
      MATCH (c:Reg), (f:Law)
        WHERE c.ActId = f.LawId
      MERGE (c)-[newRelationship:REGULATIONS]->(f)
      RETURN count(newRelationship)
    """
    kg.query(cypher)

In [None]:
for index, Acts in enumerate(Acts_documents):
    soup = BeautifulSoup(Acts.get_text(), 'html.parser')
    title  = get_title(soup)
    law_info_lists = law_info_list_fn(title)
    for law_info_list in law_info_lists:
        law_info = law_info_list['LawInfo']        
        create_parent_law_node(law_info)
connect_act_parentlaw()
connect_reg_parentlaw()

# THE END