# LLM Graph Transformer
The LLMGraphTransformer converts text documents into structured graph documents by leveraging a LLM to parse and categorize entities and their relationships. 

In [1]:
import os
import openai

from openai import AzureOpenAI
from langchain_openai import AzureChatOpenAI

from langchain.embeddings.openai import OpenAIEmbeddings
from langchain_openai import AzureOpenAIEmbeddings

from langchain_experimental.graph_transformers import LLMGraphTransformer

In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
api_key = os.getenv("openai_api_key")
api_base =  os.getenv("azure_endpoint")
api_type = os.getenv("openai_api_type")
api_version = os.getenv("openai_api_version")

deployment_id = os.getenv("openai_model_gpt_name")

gpt_model = os.getenv("openai_model_gpt_name")
model_engine = os.getenv("openai_model_gpt_name")
embd_model = os.getenv("openai_model_embd_name")

# os.environ["OPENAI_API_KEY"] = os.getenv("api_key")
os.environ["AZURE_OPENAI_ENDPOINT"] = api_base
# os.environ["OPENAI_API_TYPE"] = os.getenv("api_type")
os.environ["OPENAI_API_VERSION"] = api_version

In [4]:
llm = AzureChatOpenAI(
    api_key=api_key,
    openai_api_version="2023-05-15",
    azure_deployment=gpt_model,
)

#### Next, we need to define Neo4j credentials and connection. Follow these installation steps to set up a Neo4j database.



In [5]:
import os

from langchain_community.graphs import Neo4jGraph

os.environ["NEO4J_URI"] = "bolt://localhost:7687"
os.environ["NEO4J_USERNAME"] = "neo4j"
os.environ["NEO4J_PASSWORD"] = "password"

graph = Neo4jGraph()

#### The LLMGraphTransformer converts text documents into structured graph documents by leveraging a LLM to parse and categorize entities and their relationships. 

In [6]:
llm_transformer = LLMGraphTransformer(llm=llm)

In [7]:
from langchain_core.documents import Document

text = """
Marie Curie, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.
She was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields.
Her husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first-ever married couple to win the Nobel Prize and launching the Curie family legacy of five Nobel Prizes.
She was, in 1906, the first woman to become a professor at the University of Paris.
"""
documents = [Document(page_content=text)]

In [8]:
graph_documents = llm_transformer.convert_to_graph_documents(documents)
print(f"Nodes:{graph_documents[0].nodes}")
print(f"Relationships:{graph_documents[0].relationships}")

Nodes:[Node(id='Marie Curie', type='Person'), Node(id='Pierre Curie', type='Person'), Node(id='Nobel Prize', type='Award'), Node(id='University Of Paris', type='Organization'), Node(id='Curie Family', type='Family'), Node(id='Radioactivity', type='Scientific field')]
Relationships:[Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='Radioactivity', type='Scientific field'), type='CONDUCTED_RESEARCH_ON'), Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='Nobel Prize', type='Award'), type='WON'), Relationship(source=Node(id='Pierre Curie', type='Person'), target=Node(id='Nobel Prize', type='Award'), type='WON'), Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='University Of Paris', type='Organization'), type='BECAME_PROFESSOR_AT'), Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='Curie Family', type='Family'), type='PART_OF'), Relationship(source=Node(id='Pierre Curie', type='Person'), tar

## Storing to graph database

In [20]:
graph.add_graph_documents(graph_documents)

### Note that the graph construction process is non-deterministic since we are using LLM. Therefore, you might get slightly different results on each execution.

In [21]:
llm_transformer_filtered = LLMGraphTransformer(
    llm=llm,
    allowed_nodes=["Person", "Country", "Organization"],
    allowed_relationships=["NATIONALITY", "LOCATED_IN", "WORKED_AT", "SPOUSE"],
)
graph_documents_filtered = llm_transformer_filtered.convert_to_graph_documents(
    documents
)
print(f"Nodes:{graph_documents_filtered[0].nodes}")
print(f"Relationships:{graph_documents_filtered[0].relationships}")

Nodes:[Node(id='Marie Curie', type='Person'), Node(id='Pierre Curie', type='Person'), Node(id='Poland', type='Country'), Node(id='France', type='Country'), Node(id='University Of Paris', type='Organization')]
Relationships:[Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='Poland', type='Country'), type='NATIONALITY'), Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='France', type='Country'), type='NATIONALITY'), Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='Pierre Curie', type='Person'), type='SPOUSE'), Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='University Of Paris', type='Organization'), type='WORKED_AT')]


In [23]:
graph.add_graph_documents(graph_documents_filtered)