# Leiden Algorithm

## First need to project a graph using gds library

MATCH (source:UpdatedChunk)-[r:RELATED_TERMS]->(target:UpdatedChunk)
RETURN gds.graph.project(
    'glossaryGraph',
    source,
    target,
    {
    sourceNodeProperties: source { .chunkSeqId },
    targetNodeProperties: target { .chunkSeqId },
    relationshipProperties: r { .id }
  },
    { undirectedRelationshipTypes: ['*'] }
)


This will make the relationship undirected as well

## Run leiden


In [175]:
from langchain_community.graphs import Neo4jGraph
from langchain_community.embeddings import HuggingFaceEmbeddings
import boto3
import json

In [176]:
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [177]:
# NEO4J_URI = 'bolt://citz-imb-ai-neo4j-svc:7687'
NEO4J_URI = 'bolt://' + 'localhost'+':7687'
NEO4J_USERNAME = 'neo4j'
NEO4J_PASSWORD = 'neo4j'
NEO4J_DATABASE = 'neo4j' #os.getenv('NEO4J_DB')
print(NEO4J_URI)
print(NEO4J_DATABASE)

bolt://localhost:7687
neo4j


In [None]:
kg = Neo4jGraph(
    url=NEO4J_URI, username=NEO4J_USERNAME, password=NEO4J_PASSWORD, database=NEO4J_DATABASE
)

## First need to project a graph using gds library

In [None]:
project_graph = """
MATCH (source:UpdatedChunk)-[r:RELATED_TERMS]->(target:UpdatedChunk)
RETURN gds.graph.project(
    'glossaryGraph',
    source,
    target,
    {
    sourceNodeProperties: source { .chunkSeqId },
    targetNodeProperties: target { .chunkSeqId },
    relationshipProperties: r { .id }
  },
    { undirectedRelationshipTypes: ['*'] }
)
"""
kg.query(project_graph)

In [None]:
run_lieden = """
CALL gds.leiden.write('glossaryGraph', { writeProperty: 'communityId'})
YIELD communityCount, nodePropertiesWritten
"""
kg.query(run_lieden)

### Add new property for number of references for all the nodes in gloassry

In [None]:
MATCH (member:UpdatedChunk {type: "glossary"})
WITH member
MATCH (member)-[:RELATED_TERMS]->(related:UpdatedChunk {type: "glossary"})
SET member.referenced_to = related.glossaryTerm

WITH member
MATCH (related:UpdatedChunk {type: "glossary"})<-[:RELATED_TERMS]-(member) 
SET related.referenced_by = member.glossaryTerm

In [180]:
add_ref_query = """
        MATCH (n:UpdatedChunk {type: "glossary"})
        WITH n
        MATCH (m)-[:RELATED_TERMS]->(n) 
        WITH n, COUNT(m) AS num_incoming
        SET n.numNodesPointingTo = num_incoming
        RETURN n
        """
kg.query(add_ref_query)

[{'n': {'numNodesPointingTo': 4,
   'referenced_to': 'Third Reading',
   'textEmbedding': [-0.015165435150265694,
    -0.00013344270701054484,
    -0.024857692420482635,
    0.009755806066095829,
    -0.016037750989198685,
    0.07617887854576111,
    -0.0349394753575325,
    0.004181193187832832,
    -0.004921596962958574,
    0.02552284114062786,
    0.0032329075038433075,
    -0.0075487191788852215,
    -0.026460273191332817,
    -0.03298557922244072,
    0.07342258095741272,
    -0.0438891239464283,
    -0.006022701971232891,
    -0.029799269512295723,
    -0.013157534413039684,
    0.12993758916854858,
    0.09344042092561722,
    0.12252805382013321,
    -0.07319258898496628,
    0.08922839909791946,
    -0.04407505318522453,
    -0.013263904489576817,
    -0.020910317078232765,
    -0.06699471920728683,
    -0.03804730623960495,
    -0.015420947223901749,
    0.058692816644907,
    0.04350658133625984,
    0.06702747195959091,
    0.016738809645175934,
    0.015889309346675873,


### create a new community graph


In [182]:
create_new_community_graph = """
    MATCH (n:UpdatedChunk {type: "glossary"})
    WHERE n.communityId IS NOT NULL
    WITH n.communityId AS communityId, COLLECT(n) AS members
    MERGE (c:UpdatedChunk {communityId: communityId, type: 'community'})
    SET c.summary = [node IN members | 
        'Glossary Term: ' + node.glossaryTerm + 
        ' | Text: ' + node.text + 
        ' | Number of references: ' + toString(coalesce(node.numNodesPointingTo, 0))
    ]
    WITH c, members
    UNWIND members AS member
    MERGE (member)-[:BELONGS_TO]->(c)
    """

kg.query(create_new_community_graph)

[]

In [183]:
AWS_ACCESS_KEY_ID = "AKIA6ODU75WCYANWIYUN"
AWS_SECRET_ACCESS_KEY = "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"

session = boto3.Session(
    aws_access_key_id=AWS_ACCESS_KEY_ID,
    aws_secret_access_key=AWS_SECRET_ACCESS_KEY,
)
bedrock_runtime = session.client("bedrock-runtime", region_name="us-east-1")


def get_mixtral_kwargs(prompt):
    kwargs = {
        "modelId": "mistral.mixtral-8x7b-instruct-v0:1",
        "contentType": "application/json",
        "accept": "*/*",
        "body": json.dumps(
            {
                "prompt": prompt,
                "max_tokens": 1024,
                "temperature": 0.5,
                "top_p": 0.9,
                "top_k": 50,
            }
        ),
    }
    return kwargs


def get_lama3_kwargs(prompt):
    kwargs = {
        "modelId": "meta.llama3-8b-instruct-v1:0",
        "contentType": "application/json",
        "accept": "application/json",
        "body": json.dumps(
            {
                "prompt": prompt,
                "temperature": 0.5,
                "top_p": 0.9,
                "max_gen_len": 1024,
            }
        ),
    }
    return kwargs


def get_response(prompt):
    kwargs = get_mixtral_kwargs(prompt)
    response = bedrock_runtime.invoke_model(**kwargs)
    response_body = json.loads(response.get("body").read())
    return response_body["outputs"][0]["text"]


In [184]:
get_response("summerize the following: Glossary Term: Coming into force | Text: coming into force : the time when an act or regulation takes effect and becomes the law. acts come into force on the date of royal assent, unless the act specifies otherwise in a commencement section. different provisions of the act may come into force on different dates. the commencement section details how the act or different provisions of the act are to come into force : either on a specified date, under specified circumstances, or by regulation. regulations usually come into force on the date of deposit, unless there is another date specified in the regulation. | Number of references: 0,Glossary Term: Commencement Section | Text: commencement section : sometimes an act has a commencement section ( usually the last section in the act ) which details how and when the act ( or different provisions of the act ) comes into force. if there is no commencement section, the act comes into force on the date of royal assent. | Number of references: 1,Glossary Term: Royal Assent | Text: royal assent : after a bill has passed third reading in the legislative assembly, the lieutenant governor, on behalf of the queen, assents to the bill by signing it. the bill is thereby enacted and becomes an act, i. e. a law. unless an act contains a stated effective date, or a provision requiring that it be brought into force by regulation, it comes into force on the day royal assent is given. | Number of references: 2")

'\n\nComing into force refers to the time when an act or regulation takes effect and becomes the law. Acts come into force on the date of royal assent, unless the act specifies otherwise in a commencement section. Regulations usually come into force on the date of deposit, unless there is another date specified in the regulation. The commencement section in an act details how the act or different provisions of the act are to come into force, either on a specified date, under specified circumstances, or by regulation. Royal assent is the process by which the lieutenant governor, on behalf of the queen, signs a bill after it has passed third reading in the legislative assembly, thereby enacting it and making it a law. Unless an act contains a stated effective date or a provision requiring that it be brought into force by regulation, it comes into force on the day royal assent is given.\n\nReference(s):\nnone'

In [214]:
# Function to call the LLM for summarization
def get_summary_from_llm(text):
    COMMUNITY_REPORT_SUMMARIZATION_PROMPT = f"""

    # Goal: Write a comprehensive very detailed summary of a community. this should be detailed enough to understand the complete picture not more than 256 token
    
    # Structure of community text: each community text contains multiple nodes, each node has a gloassy term, text [text explaining the glossary term], Number of references [tells you how many nodes are referencing this glossary term], referenced by & referenced to [gives you the information about the relation ship between nodes]
    
    # Your task: Extract the whole idea from the nodes and summerize.
    
    # output structure: 
    Title: Give a title to the community text perticular to the glossary term and their relationship
    Summary: Summary of the whole community text based on the community text and relation ship between them. 
    Number of Nodes: list the number of nodes in the community
    [ list with decending order of number of references example below]
    Abc has x references
    bcb has v refernces
    
    # Community Text:
    {text}
    
    Based on above provided instruction give me the output as instructed nothing else, keep it under 256 tokens 
    """

    response = get_response(COMMUNITY_REPORT_SUMMARIZATION_PROMPT)
    return response


In [213]:
# Function to call the LLM for summarization
def get_summary_from_llm_2(text):
    COMMUNITY_REPORT_SUMMARIZATION_PROMPT_2 = f"""
    
    # Your task: Summary of the whole text and also taking about the prominent glossary [which has most number of references among the all the summeries]
    
    # TEXT to summerize:
    {text}
    
    Based on above provided instruction give me the output as instructed nothing else, KEEP your response UNDER 256 TOKEN
    """

    response = get_response(COMMUNITY_REPORT_SUMMARIZATION_PROMPT_2)
    return response


In [202]:
# Function to process each community and its members
def process_communities():
        communities = kg.query("MATCH (n:UpdatedChunk {type: 'community'}) RETURN n")


        for community in communities:
            community_node = community["n"]
            community_id = community_node["communityId"]

            # Get members of the community
            members = kg.query("""
                MATCH (member)-[:BELONGS_TO]->(n:UpdatedChunk {type: 'community', communityId: $community_id})
                RETURN member
            """, params = {'community_id':community_id})

        

            # Gather information to summarize
            member_texts = []
            for member in members:
                member_data = member["member"]            
                text_info = f"Glossary Term: {member_data['glossaryTerm']}, Text: {member_data['text']}, Number of References: {member_data.get('numNodesPointingTo', 0)}, Referenced By: {member_data.get('referenced_by', "null")}, Referenced to: {member_data.get('referenced_to', "null")}"
                member_texts.append(text_info)

            # Create a single text block to summarize
            combined_text = "\n".join(member_texts)

            # Get summary from LLM
            summary = get_summary_from_llm(combined_text)
            print(summary)
            if summary:
                # Get embeddings for the summary
                vector = embeddings.embed_query(summary)
                if vector is not None:
                    # Update community node with the embedding
                    kg.query("""
                        MATCH (n:UpdatedChunk {type: 'community', communityId: $community_id})
                        SET n.text = $summary
                        WITH n, $vector AS vector
                        CALL db.create.setNodeVectorProperty(n, "textEmbedding", vector)
                    """, params={'community_id': community_id, 'vector': vector, 'summary': summary})

In [215]:
process_communities()


    Title: Understanding the Relationship between Acts and Regulations
    Summary: An act, also known as a statute, is a law that has passed third reading in the legislative assembly and received royal assent. Acts provide the policy framework, while regulations under the act provide the details to give effect to the policy. Acts can be amended by amending bills or amending regulations. When a bill becomes an act, it is assigned a chapter number and cited by its chapter number and year of passage. Not all acts have regulations.
    Number of Nodes: 7
    Act has 4 references
    Regulation has 2 references
    Amending Bill has 0 references
    Amending Regulation has 0 references
    Chapter Numbers has 0 references
    Statute has 0 references

    Title: Parliamentary Procedure
    Summary: Prorogation is the end of a parliamentary session, it abolishes all pending business and halts all committee work. It can lead to adjournment which temporarily ends a sitting or a session. Diss

// Create the head node
MERGE (head:HeadNode {name: 'glossary communities'})

// Link each community node to the head node
MATCH (c:Community)
MERGE (head)-[:HAS_COMMUNITY]->(c)

In [216]:
def process_head():
    head = kg.query("MATCH (head:UpdatedChunk { type: 'head'}) RETURN head")
    
    for h in head:
            print(h)
            h_node = h["head"]
            h_name = h_node["name"]

            # Get members of the community
            members = kg.query("""
                MATCH (h:UpdatedChunk {name: $h_name})-[:HAS_COMMUNITY]->(member)
                RETURN member
            """, params = {'h_name':h_name})


            # Gather information to summarize
            member_texts = []
            for member in members:
                member_data = member["member"] 
                if member_data:
                    member_texts.append(member_data['summary'])

            # Create a single text block to summarize
            combined_text = "\n".join(member_texts)


            # Get summary from LLM
            summary = get_summary_from_llm_2(combined_text)
            print(summary)
            if summary:
                # Get embeddings for the summary
                vector = embeddings.embed_query(summary)
                if vector is not None:
                    # Update community node with the embedding
                    kg.query("""
                        MATCH (h:UpdatedChunk { type: 'head'})
                        SET h.text = $summary
                        WITH h, $vector AS vector
                        CALL db.create.setNodeVectorProperty(h, "textEmbedding", vector)
                    """, params={'vector': vector, 'summary': summary})

In [217]:
process_head()

{'head': {'summary': '\n    Comprehensive Summary:\n    The legislative process involves various stages, including the introduction of a bill, amendments, and passing. The process begins with the first reading bill, followed by the second reading, committee, report, and third reading. After passing through these stages, the bill receives royal assent to become law. The official versions of statutes and regulations in British Columbia are published by the King\'s Printer in various formats, including annual bound statutes, BC Gazette Part II, consolidated statutes and regulations (looseleaf), and individual consolidated statutes and regulations. Acts and regulations come into force on the date of royal assent or by regulation, as specified in the commencement section. The most recent revision of public statutes was published in 1996 as the Revised Statutes of British Columbia (R.S.B.C.). The prominent glossary term with the most number of references among the summaries is "regulation," 