In [1]:
import json
import os

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from pydantic import BaseModel,Field
from langchain_openai import ChatOpenAI
from langchain_groq import ChatGroq
from langchain_ollama import ChatOllama
from lightrag import LightRAG, QueryParam
# from lightrag.llm import ollama_model_complete
from dotenv import load_dotenv

load_dotenv()

ImportError: cannot import name 'EmbeddingFunc' from 'lightrag.utils' (C:\Users\Kristila\Desktop\Asus\Desktop\Final Year\Research\CodeSpace\Research\venv\Lib\site-packages\lightrag\utils\__init__.py)

In [2]:
# OpenAI api define
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

model = ChatOpenAI(model="gpt-4o",base_url=os.getenv("base_url"))

# Groq api define
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
llm = ChatGroq(model="llama-3.3-70b-versatile")

# Ollama api define
ollama_llm = ChatOllama(model="llama3.1:latest")


In [3]:
# Define the directory containing the text file
file_path = os.path.join("./books", "romeo_and_juliet_copy.txt")

In [4]:
print(file_path)

./books\romeo_and_juliet_copy.txt


In [5]:
# Read the text content from the file
loader = TextLoader(file_path,encoding='UTF-8')
documents = loader.load()

In [6]:
rec_char_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000, chunk_overlap=100)
rec_char_docs = rec_char_splitter.split_documents(documents)

In [7]:
rec_char_docs = rec_char_docs[:10]

In [8]:
for index, char in enumerate(rec_char_docs):
    if index == 0:  # Check if it's the first item
        print(char)  # Print the first item
        break

page_content='﻿The Project Gutenberg eBook of Romeo and Juliet
    
This ebook is for the use of anyone anywhere in the United States and
most other parts of the world at no cost and with almost no restrictions
whatsoever. You may copy it, give it away or re-use it under the terms
of the Project Gutenberg License included with this ebook or online
at www.gutenberg.org. If you are not located in the United States,
you will have to check the laws of the country where you are located
before using this eBook.

Title: Romeo and Juliet

Author: William Shakespeare

Release date: November 1, 1998 [eBook #1513]
                Most recently updated: June 27, 2023

Language: English

Credits: the PG Shakespeare Team, a team of about twenty Project Gutenberg volunteers


*** START OF THE PROJECT GUTENBERG EBOOK ROMEO AND JULIET ***




THE TRAGEDY OF ROMEO AND JULIET

by William Shakespeare




Contents

THE PROLOGUE.' metadata={'source': './books\\romeo_and_juliet_copy.txt'}


In [9]:
# Pydantic class definition
# class for named_entity definition
class EntityExtraction(BaseModel):
    entities:str=Field(description="all the entities in a provided content")
 
# class for named_entity relationship structure     
class NER_structure(BaseModel):
    Subject:str=Field(description="The entity being described.")
    Predicate:str=Field(description="The property or relationship associated with the subject.") 
    Object:str=Field(description=" The value or entity to which the predicate applies.")
    
# class for get named_entity relationships     
class EntityAndRelationship_Extraction(BaseModel):
    relationship:list[NER_structure] = Field(description ="these are the relationship between extracted named entities")

In [10]:
# extract entities
entity_structured_output_model = llm.with_structured_output(schema=EntityExtraction)

In [11]:
# extract named_entities along with the relationships
relationship_structured_output_model = llm.with_structured_output(schema=EntityAndRelationship_Extraction)

In [12]:
entities_output = []
relationship_output = []

In [13]:
for chunk in rec_char_docs:
    entities_output.append(entity_structured_output_model.invoke(
        [
            {"role":"system",
             "content":"""
             Named entities are special names of people, places, or things that stand out in a sentence. You need to extract named entities from the given content. Get idea from below provided examples.
              
              Example 1: 
              'In Silicon Valley, tech giants like Apple and Google create new technologies. Apple Park, their headquarters in Cupertino, is super cool.'
              
              Named entities: Silicon Valley (a place), Apple (a company), Google (a company), Apple Park (a place), Cupertino (a place)
                
              Example 2: 
              'In New York City, the Statue of Liberty is a symbol of freedom. It was a gift from France and sits on Liberty Island, near the Hudson River'
              
              Named entities: New York City (a place), Statue of Liberty (a famous statue), France (a country), Liberty Island (a place), Hudson River (a river)
              
              Example 3:
              'In Paris, the Eiffel Tower remains an iconic symbol of French culture and engineering brilliance. Lots of people visit the Louvre Museum to see amazing art like the Mona Lisa'
              
              Named entities: Paris (a place), Eiffel Tower (a famous building), Louvre Museum (a place), Mona Lisa (a famous painting)
   
                """},   
            {"role":"user",
             "content":chunk.page_content}
        ]))
    

INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:groq._base_client:Re

In [14]:
entities_output

[EntityExtraction(entities='United States, Project Gutenberg, Project Gutenberg License, www.gutenberg.org, Romeo and Juliet, William Shakespeare, English, PG Shakespeare Team, Project Gutenberg'),
 EntityExtraction(entities='Romeo and Juliet, William Shakespeare, Capulet, Friar Lawrence, Juliet, Mantua'),
 EntityExtraction(entities='ESCALUS, MERCUTIO, PARIS, MONTAGUE, LADY MONTAGUE, ROMEO, BENVOLIO, ABRAM, BALTHASAR, CAPULET, LADY CAPULET, JULIET, TYBALT, CAPULET’S COUSIN, NURSE, PETER, SAMPSON, GREGORY, FRIAR LAWRENCE, FRIAR JOHN, Apothecary, CHORUS, Verona, Montagues, Capulets, Franciscan'),
 EntityExtraction(entities='Verona, Mantua, Chorus, Sampson, Gregory'),
 EntityExtraction(entities='Sampson, Gregory, Montague'),
 EntityExtraction(entities='Gregory, Sampson, Montagues, Abram, Balthasar'),
 EntityExtraction(entities='Gregory, Sampson, Abram, Benvolio, Tybalt'),
 EntityExtraction(entities='Tybalt, Benvolio, Citizens, Capulet, Lady Capulet, Montague, Lady Montague, Prince Escalus

In [15]:
for i,chunk in enumerate(rec_char_docs):
    relationship_output.append(relationship_structured_output_model.invoke(
        [{"role":"system",
          "content":"""Extract the relationship between identified named entities from the given content
              Example 1: 
              'In Silicon Valley, tech giants like Apple and Google create new technologies. Apple Park, their headquarters in Cupertino, is super cool'
              
              Relationship:
              Apple Park is the headquarters of Apple, located in Cupertino, within Silicon Valley.
              
                               
              Example 2: 
              'In New York City, the Statue of Liberty is a symbol of freedom. It was a gift from France and sits on Liberty Island, near the Hudson River'
              
              Relationships: 
              Statue of Liberty is located in New York City and situated on Liberty Island.
              Statue of Liberty was a gift from France to the United States.
              Liberty Island, where the Statue of Liberty stands, overlooks the Hudson River. 
              
              
              Example 3:
              'In Paris, the Eiffel Tower remains an iconic symbol of French culture and engineering brilliance. Lots of people visit the Louvre Museum to see amazing art like the Mona Lisa'
              
              Relationships: 
              Eiffel Tower is an iconic landmark located in Paris, symbolizing French culture and engineering brilliance. 
              Louvre Museum is situated in Paris and houses the famous Mona Lisa, attracting art enthusiasts.
          """},
         
         {"role":"user",
          "content": chunk.page_content + entities_output[i].entities}]))
    

INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 429 Too Many Requests"
INFO:groq._base_client:Retrying request to /openai/v1/chat/completions in 2.000000 seconds
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST 

In [16]:
relationship_output

[EntityAndRelationship_Extraction(relationship=[NER_structure(Subject='Romeo and Juliet', Predicate='written by', Object='William Shakespeare'), NER_structure(Subject='Project Gutenberg eBook', Predicate='includes', Object='Romeo and Juliet'), NER_structure(Subject='Project Gutenberg', Predicate='located in', Object='United States'), NER_structure(Subject='Project Gutenberg License', Predicate='applies to', Object='Romeo and Juliet'), NER_structure(Subject='PG Shakespeare Team', Predicate='credited for', Object='Romeo and Juliet')]),
 EntityAndRelationship_Extraction(relationship=[NER_structure(Subject='Romeo and Juliet', Predicate='written by', Object='William Shakespeare'), NER_structure(Subject='Romeo', Predicate='loves', Object='Juliet'), NER_structure(Subject='Juliet', Predicate='lives in', Object='Capulet’s House'), NER_structure(Subject='Friar Lawrence', Predicate='lives in', Object='Friar Lawrence’s Cell'), NER_structure(Subject='Romeo', Predicate='visits', Object='Capulet’s Ga

In [17]:
formated_entities=[]

for entity in entities_output:
    for _ in entity.entities.split(","):    
        ent = {"entity_name":_, 
               "entity_type":"",
               "description":"",
               "source_id":""
               }
        formated_entities.append(ent)

In [18]:
formated_entities

[{'entity_name': 'United States',
  'entity_type': '',
  'description': '',
  'source_id': ''},
 {'entity_name': ' Project Gutenberg',
  'entity_type': '',
  'description': '',
  'source_id': ''},
 {'entity_name': ' Project Gutenberg License',
  'entity_type': '',
  'description': '',
  'source_id': ''},
 {'entity_name': ' www.gutenberg.org',
  'entity_type': '',
  'description': '',
  'source_id': ''},
 {'entity_name': ' Romeo and Juliet',
  'entity_type': '',
  'description': '',
  'source_id': ''},
 {'entity_name': ' William Shakespeare',
  'entity_type': '',
  'description': '',
  'source_id': ''},
 {'entity_name': ' English',
  'entity_type': '',
  'description': '',
  'source_id': ''},
 {'entity_name': ' PG Shakespeare Team',
  'entity_type': '',
  'description': '',
  'source_id': ''},
 {'entity_name': ' Project Gutenberg',
  'entity_type': '',
  'description': '',
  'source_id': ''},
 {'entity_name': 'Romeo and Juliet',
  'entity_type': '',
  'description': '',
  'source_id': '

In [19]:
formated_relationship=[]

for relationship in relationship_output:
    for _ in relationship.relationship:
        relation = {"src_id":_.Subject,
                    "tgt_id":_.Object,
                    "description":_.Predicate,
                    "keywords":"",
                    "weight":1.0,
                    "source_id":""
                    }
        formated_relationship.append(relation)

In [20]:
formated_relationship

[{'src_id': 'Romeo and Juliet',
  'tgt_id': 'William Shakespeare',
  'description': 'written by',
  'keywords': '',
  'weight': 1.0,
  'source_id': ''},
 {'src_id': 'Project Gutenberg eBook',
  'tgt_id': 'Romeo and Juliet',
  'description': 'includes',
  'keywords': '',
  'weight': 1.0,
  'source_id': ''},
 {'src_id': 'Project Gutenberg',
  'tgt_id': 'United States',
  'description': 'located in',
  'keywords': '',
  'weight': 1.0,
  'source_id': ''},
 {'src_id': 'Project Gutenberg License',
  'tgt_id': 'Romeo and Juliet',
  'description': 'applies to',
  'keywords': '',
  'weight': 1.0,
  'source_id': ''},
 {'src_id': 'PG Shakespeare Team',
  'tgt_id': 'Romeo and Juliet',
  'description': 'credited for',
  'keywords': '',
  'weight': 1.0,
  'source_id': ''},
 {'src_id': 'Romeo and Juliet',
  'tgt_id': 'William Shakespeare',
  'description': 'written by',
  'keywords': '',
  'weight': 1.0,
  'source_id': ''},
 {'src_id': 'Romeo',
  'tgt_id': 'Juliet',
  'description': 'loves',
  'keywo

In [21]:
formated_chunks = []
for chunk in rec_char_docs:
    _ch = {
        "content":chunk.page_content,
        "source_id":""
    }
    formated_chunks.append(_ch)

In [22]:
formated_chunks

[{'content': '\ufeffThe Project Gutenberg eBook of Romeo and Juliet\n    \nThis ebook is for the use of anyone anywhere in the United States and\nmost other parts of the world at no cost and with almost no restrictions\nwhatsoever. You may copy it, give it away or re-use it under the terms\nof the Project Gutenberg License included with this ebook or online\nat www.gutenberg.org. If you are not located in the United States,\nyou will have to check the laws of the country where you are located\nbefore using this eBook.\n\nTitle: Romeo and Juliet\n\nAuthor: William Shakespeare\n\nRelease date: November 1, 1998 [eBook #1513]\n                Most recently updated: June 27, 2023\n\nLanguage: English\n\nCredits: the PG Shakespeare Team, a team of about twenty Project Gutenberg volunteers\n\n\n*** START OF THE PROJECT GUTENBERG EBOOK ROMEO AND JULIET ***\n\n\n\n\nTHE TRAGEDY OF ROMEO AND JULIET\n\nby William Shakespeare\n\n\n\n\nContents\n\nTHE PROLOGUE.',
  'source_id': ''},
 {'content': 'T

In [23]:
custom_kg = {"entities":formated_entities,
             "relationships":formated_relationship,
             "chunks":formated_chunks}
custom_kg

{'entities': [{'entity_name': 'United States',
   'entity_type': '',
   'description': '',
   'source_id': ''},
  {'entity_name': ' Project Gutenberg',
   'entity_type': '',
   'description': '',
   'source_id': ''},
  {'entity_name': ' Project Gutenberg License',
   'entity_type': '',
   'description': '',
   'source_id': ''},
  {'entity_name': ' www.gutenberg.org',
   'entity_type': '',
   'description': '',
   'source_id': ''},
  {'entity_name': ' Romeo and Juliet',
   'entity_type': '',
   'description': '',
   'source_id': ''},
  {'entity_name': ' William Shakespeare',
   'entity_type': '',
   'description': '',
   'source_id': ''},
  {'entity_name': ' English',
   'entity_type': '',
   'description': '',
   'source_id': ''},
  {'entity_name': ' PG Shakespeare Team',
   'entity_type': '',
   'description': '',
   'source_id': ''},
  {'entity_name': ' Project Gutenberg',
   'entity_type': '',
   'description': '',
   'source_id': ''},
  {'entity_name': 'Romeo and Juliet',
   'entit

In [24]:
from lightrag.llm import ollama_embedding
from lightrag.utils import EmbeddingFunc 
from inserter import insert_custom_kg_safely,initialize_lightrag

In [25]:
# Initialize RAG
rag = initialize_lightrag()

INFO:lightrag:Logger initialized for working directory: ./test
INFO:lightrag:Load KV llm_response_cache with 1 data
INFO:lightrag:Load KV full_docs with 0 data
INFO:lightrag:Load KV text_chunks with 10 data
INFO:lightrag:Loaded graph from ./test\graph_chunk_entity_relation.graphml with 113 nodes, 79 edges
INFO:nano-vectordb:Load (52, 768) data
INFO:nano-vectordb:Init {'embedding_dim': 768, 'metric': 'cosine', 'storage_file': './test\\vdb_entities.json'} 52 data
INFO:nano-vectordb:Load (87, 768) data
INFO:nano-vectordb:Init {'embedding_dim': 768, 'metric': 'cosine', 'storage_file': './test\\vdb_relationships.json'} 87 data
INFO:nano-vectordb:Load (10, 768) data
INFO:nano-vectordb:Init {'embedding_dim': 768, 'metric': 'cosine', 'storage_file': './test\\vdb_chunks.json'} 10 data


In [26]:
insert_custom_kg_safely(rag, custom_kg)

INFO:lightrag:Inserting 10 vectors to chunks
Generating embeddings:   0%|          | 0/1 [00:00<?, ?batch/s]INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embeddings "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embeddings "HTTP/1.1 200 OK"
Generating embeddings: 100%|████

In [27]:
print(rag.query("who is romeo?", param=QueryParam(mode="hybrid")))

Romeo is a central character in *"The Tragedy of Romeo and Juliet"*, a play written by William Shakespeare. He is a young man from the Montague family, one of the two noble households in Verona involved in a longstanding feud with the Capulets. As the son of Lord and Lady Montague, Romeo plays a significant role in the story's plot and themes, embodying the intense passion and impulsive nature of young love.

### Key Relationships and Role
Romeo is introduced as a romantic idealist, deeply infatuated with the idea of love. Initially, he pines for Rosaline, a woman who does not return his affections. However, his affections transfer swiftly and intensely when he meets Juliet, the daughter of Lord and Lady Capulet, during a chance encounter at a Capulet feast. This meeting sparks a whirlwind romance, and Romeo quickly becomes Juliet's lover and husband in secret.

Several crucial relationships shape Romeo's character throughout the play:
- **Juliet**: Juliet is the love of Romeo's life. 

In [28]:
# Visualize the knowledge graph
import networkx as nx
from pyvis.network import Network

In [30]:
# Load the GraphML file
G = nx.read_graphml('./test/graph_chunk_entity_relation.graphml')

# Create a Pyvis network
net = Network(notebook=True,bgcolor="#B2BEB5",font_color="black")

# Convert NetworkX graph to Pyvis network
net.from_nx(G)

# Save and display the network
net.show('knowledge_graph.html')

knowledge_graph.html
