<!-- @format -->

# Constructing knowledge graphs


In [20]:
import os

from dotenv import load_dotenv
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_openai import AzureChatOpenAI
from langchain_community.graphs import Neo4jGraph
from phoenix.trace.langchain import LangChainInstrumentor
from langchain_community.document_loaders import WikipediaLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [21]:
load_dotenv("../.env")

True

In [22]:
os.environ["PHOENIX_PROJECT_NAME"] = "3 constructing knowledge graphs"
LangChainInstrumentor().instrument()

Attempting to instrument while already instrumented


In [23]:
graph = Neo4jGraph()

In [24]:
# jsonレスポンスで変換
model = AzureChatOpenAI(
    azure_deployment="gpt-4o",
    azure_endpoint=os.environ["AZURE_OPENAI_ENDPOINT"],
    api_version=os.environ["OPENAI_API_VERSION"],
    temperature=0.0,
)

llm_transformer = LLMGraphTransformer(
    llm=model,
    # allowed_nodes=["Person", "Country", "Organization"],
    # allowed_relationships=["NATIONALITY", "LOCATED_IN", "WORKED_AT", "SPOUSE"],
)

In [32]:
# load documents
raw_documents = WikipediaLoader(query="Shohei Ohtani", load_max_docs=5).load()
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1024,
    chunk_overlap=256,
)
documents = text_splitter.split_documents(raw_documents[:3])
print(documents)
print(len(documents))

[Document(page_content='Shohei Ohtani (大谷 翔平 (おおたに しょうへい), Ōtani Shōhei, [oːtaɲi ɕoːheː]; born July 5, 1994) is a Japanese professional baseball pitcher and designated hitter for the Los Angeles Dodgers of Major League Baseball (MLB). Nicknamed "Shotime", he has previously played in MLB for the Los Angeles Angels and the Hokkaido Nippon-Ham Fighters of Nippon Professional Baseball (NPB). Because of his elite contributions as a hitter and as a pitcher, a rarity for two-way players, Ohtani\'s 2022 and 2023 seasons are considered among the greatest in baseball history, with some comparing them favorably to the early career of Babe Ruth.\nConsidered early on as an elite two-way player, Ohtani was the first pick of the Fighters in the 2012 draft. He played in NPB for the Fighters from 2013 through 2017 as a pitcher and an outfielder, and won the 2016 Japan Series with them. The Fighters posted Ohtani to MLB after the 2017 season, and he signed with the Angels, soon winning the 2018 American

In [33]:
# build graph by llm
graph_documents = llm_transformer.convert_to_graph_documents(documents)
print(f"Nodes:{graph_documents[0].nodes}")
print(f"Relationships:{graph_documents[0].relationships}")

Nodes:[Node(id='Shohei Ohtani', type='Person'), Node(id='Los Angeles Dodgers', type='Organization'), Node(id='Major League Baseball', type='Organization'), Node(id='Los Angeles Angels', type='Organization'), Node(id='Hokkaido Nippon-Ham Fighters', type='Organization'), Node(id='Nippon Professional Baseball', type='Organization'), Node(id='Babe Ruth', type='Person'), Node(id='2012 Draft', type='Event'), Node(id='2013', type='Date'), Node(id='2017', type='Date'), Node(id='2016 Japan Series', type='Event'), Node(id='2018', type='Date'), Node(id='American League Rookie Of The Year Award', type='Award')]
Relationships:[Relationship(source=Node(id='Shohei Ohtani', type='Person'), target=Node(id='Los Angeles Dodgers', type='Organization'), type='PLAYS_FOR'), Relationship(source=Node(id='Shohei Ohtani', type='Person'), target=Node(id='Major League Baseball', type='Organization'), type='PLAYS_IN'), Relationship(source=Node(id='Shohei Ohtani', type='Person'), target=Node(id='Los Angeles Angels',

In [34]:
graph.add_graph_documents(graph_documents)

In [36]:
graph.refresh_schema()
print(graph.schema)

Node properties:
Person {id: STRING}
Organization {id: STRING}
Year {id: STRING}
Place {id: STRING}
Event {id: STRING}
Role {id: STRING}
Group {id: STRING}
Concept {id: STRING}
Date {id: STRING}
Award {id: STRING}
Nationality {id: STRING}
Contract {id: STRING}
Field {id: STRING}
Country {id: STRING}
Achievement {id: STRING}
Activity {id: STRING}
Location {id: STRING}
Language {id: STRING}
Profession {id: STRING}
Ethnicity {id: STRING}
Agreement {id: STRING}
Record {id: STRING}
Team {id: STRING}
Relationship properties:

The relationships:
(:Person)-[:PLAYER]->(:Organization)
(:Person)-[:PLAYED_FOR]->(:Organization)
(:Person)-[:COMPARED_TO]->(:Person)
(:Person)-[:WON]->(:Award)
(:Person)-[:WON]->(:Event)
(:Person)-[:STARTED_CAREER]->(:Date)
(:Person)-[:ENDED_CAREER]->(:Date)
(:Person)-[:PLAYS_FOR]->(:Organization)
(:Person)-[:PLAYS_IN]->(:Organization)
(:Person)-[:ACHIEVED]->(:Achievement)
(:Person)-[:WINNER]->(:Award)
(:Person)-[:WON_IN]->(:Year)
(:Person)-[:NAMED_TO]->(:Team)
(:Person