In [1]:
from langchain_huggingface import HuggingFacePipeline
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.output_parsers import PydanticOutputParser, RegexParser
from pydantic import BaseModel
# from langchain_community.document_loaders import PyPDFLoader
from typing import List, Dict, Any
import json
import torch
import re

from knowledgegraph import KnowledgeGraph

In [2]:
with open("hf.key") as f:
    hf_token = f.read()

In [3]:
BYTES_IN_GB = 1000_000_000

def print_mem(msg = ""):
    (free, total) = torch.cuda.mem_get_info()
    used = total - free
    
    perc_usaged = round(used / total * 100.0, 1)
    used_gb = round(used / BYTES_IN_GB, 1)
    total_gb = round(total / BYTES_IN_GB, 1)
    print(f'CUDA mem usage: {used_gb}/{total_gb}GB ({perc_usaged}%)')

print_mem()

CUDA mem usage: 0.8/12.5GB (6.4%)


In [4]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)

# MODEL_NAME = "Trelis/Mistral-7B-Instruct-v0.1-Summarize-16k"
MODEL_NAME= "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"

quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=False,
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=quantization_config,
    token=hf_token
)
model.config.use_cache = False

tokenizer = AutoTokenizer.from_pretrained(
    MODEL_NAME,
    token=hf_token
)

pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=1024,
    pad_token_id=tokenizer.eos_token_id,
    do_sample=True,
    temperature=0.1,
    repetition_penalty=1.2
)

llm = HuggingFacePipeline(pipeline=pipe)

print_mem()

`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


CUDA mem usage: 8.9/12.5GB (71.1%)


In [5]:
class Relationship(BaseModel):
    source: str
    target: str
    relationship: str
    
class QueryResponse(BaseModel):
    relationships: List[Relationship]

parser = PydanticOutputParser(pydantic_object=QueryResponse)

print(parser.get_format_instructions())

The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"$defs": {"Relationship": {"properties": {"source": {"title": "Source", "type": "string"}, "target": {"title": "Target", "type": "string"}, "relationship": {"title": "Relationship", "type": "string"}}, "required": ["source", "target", "relationship"], "title": "Relationship", "type": "object"}}, "properties": {"relationships": {"items": {"$ref": "#/$defs/Relationship"}, "title": "Relationships", "type": "array"}}, "required": ["relationships"]}
```


In [6]:
class KnowledgeGraphLLM:
    def __init__(self):
        self.kg = KnowledgeGraph()
        self.kg.clear()
        self.llm = llm
        
        # Define prompts for different operations
        self.relation_prompt = PromptTemplate(
            input_variables=["subject", "context"],
            partial_variables={
                "format_instructions": parser.get_format_instructions()
            },
            template="""[INST] Given the following information about {subject}, identify potential relationships for each person.
                If you don't know the answer, return an empty JSON object instead.
            
                [CONTEXT]
                {context}
                [/CONTEXT]

                [FORMAT]                
                {format_instructions}
                [/FORMAT]
            
                [/INST]""")
        
        self.query_prompt = PromptTemplate(
            input_variables=["question", "graph_data"],
            template="""[INST] Answer the following question using only the provided knowledge graph data.
            If you cannot answer with certainty, say "I cannot determine this from the available data."
            
            [GRAPH DATA]
            {graph_data}
            [/GRAPH_DATA]
            
            [QUESTION]
            {question}
            [/QUESTION]
            
            [/INST]"""
        )
        
        # Create LangChain chains
        self.relation_chain = self.relation_prompt | self.llm # | parser
        self.query_chain = self.query_prompt | self.llm
    
    def extract_relationships(self, subject: str, context: str) -> List[Dict[str, str]]:
        """Extract relationships from unstructured text using LLM."""
        try:
            # Get LLM's analysis
            output = self.relation_chain.invoke(input={
                "subject": subject, "context": context
            })

            print(output)
            result = re.findall(r'```json(.*?)```', output, re.DOTALL)[-1]
            
            relationships = json.loads(result.strip())["relationships"]
            
            # Add all extracted relationships to the knowledge graph
            for rel in relationships:
                self.kg.add_node(rel["source"])
                self.kg.add_node(rel["target"])
                self.kg.add_edge(rel["source"], rel["target"], rel["relationship"])
            
            return relationships
        
        except json.JSONDecodeError:
            print("Error: Could not parse LLM response as JSON")
            return []

    def graph_data(self):
        """Dump graph data to JSON"""
        return json.dumps(self.kg.dump(), indent=2)
        
    def smart_query(self, question: str) -> str:
        """Query the knowledge graph using LLM-powered reasoning."""
        # Get LLM's analysis
        return self.query_chain.invoke(input={
            "question": question,
            "graph_data": self.graph_data()
        })
    
    def get_graph_summary(self) -> Dict[str, Any]:
        """Return a summary of the knowledge graph."""
        return {
            "node_count": len(self.kg.nodes),
            "edge_count": sum(len(edges) for edges in self.kg.edges.values()),
            "data": self.graph_data()
        }

# Example usage
def query_llm(kg_llm, contexts, subject, questions):
    # Add information through unstructured text
    # Extract and add relationships

    for ctx in contexts:
        relationships = kg_llm.extract_relationships(subject, ctx)
        print("Extracted relationships:", json.dumps(relationships, indent=2))
    
    # Query the enhanced knowledge graph
    for question in questions:
        answer = kg_llm.smart_query(question).partition("[/INST]")[-1]
        print(f"###\n\nQ: {question}\nA: {answer}\n\n###")
    
    # Get graph summary
    print("\nKnowledge Graph Summary:")
    print(json.dumps(kg_llm.get_graph_summary(), indent=2))

In [8]:
context = """
John is a senior software engineer at TechCorp. He graduated from MIT
in 2015 with a degree in Computer Science. He currently leads the 
machine learning team and mentors junior engineers like Sarah and Mike.
"""
    
questions = [
    "What is John's role at TechCorp?",
    "Who does John mentor?",
    "Where did John study?",
    "What was John's graduation year?"
]
    
kg_llm = KnowledgeGraphLLM()

query_llm(kg_llm, [context], "John", questions)

[INST] Given the following information about John, identify potential relationships for each person.
                If you don't know the answer, return an empty JSON object instead.
            
                [CONTEXT]
                
John is a senior software engineer at TechCorp. He graduated from MIT
in 2015 with a degree in Computer Science. He currently leads the 
machine learning team and mentors junior engineers like Sarah and Mike.

                [/CONTEXT]

                [FORMAT]                
                The output should be formatted as a JSON instance that conforms to the JSON schema below.

As an example, for the schema {"properties": {"foo": {"title": "Foo", "description": "a list of strings", "type": "array", "items": {"type": "string"}}}, "required": ["foo"]}
the object {"foo": ["bar", "baz"]} is a well-formatted instance of the schema. The object {"properties": {"foo": ["bar", "baz"]}} is not well-formatted.

Here is the output schema:
```
{"$defs": {"Re

# Conclusion

The LLM chain correctly answers the questions regarding the relationships.  
In some test runs the LLM answers the first question, about John's role, as that of a mentor.

It cannot answer the other questions, because we (explicitly) did not feed those back into the LLM.  
This to showcase how it was really fetching the data from the graph that was built and not from the context.

In [7]:
def parse_book():
    # Initialize the system
    kg_llm = KnowledgeGraphLLM()
    
    with open("data/dracula.txt") as f:
        context = f.read()

    chapters = re.split(r"\n\nCHAPTER\ .*\n", context)
    (print(len(c)) for c in chapters)

    questions = [
        "Who are the main characters in this story?",
        "What is their relation to each other?",
        "Who is the antagonist of the story?",
        "How does the antagonist end up?"
    ]
    
    query_llm(kg_llm, chapters, "Dracula", questions)

In [8]:
parse_book()

[INST] Given the following information about Dracula, identify potential relationships for each person.
                If you don't know the answer, return an empty JSON object instead.
            
                [CONTEXT]
                The Project Gutenberg eBook of Dracula
    
This ebook is for the use of anyone anywhere in the United States and
most other parts of the world at no cost and with almost no restrictions
whatsoever. You may copy it, give it away or re-use it under the terms
of the Project Gutenberg License included with this ebook or online
at www.gutenberg.org. If you are not located in the United States,
you will have to check the laws of the country where you are located
before using this eBook.

Title: Dracula

Author: Bram Stoker

Release date: October 1, 1995 [eBook #345]
                Most recently updated: November 12, 2023

Language: English

Credits: Chuck Greif and the Online Distributed Proofreading Team


*** START OF THE PROJECT GUTENBERG EBOOK DRAC

OutOfMemoryError: CUDA out of memory. Tried to allocate 4.45 GiB. GPU 0 has a total capacity of 11.60 GiB of which 351.50 MiB is free. Including non-PyTorch memory, this process has 10.64 GiB memory in use. Of the allocated memory 9.90 GiB is allocated by PyTorch, and 562.06 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)