In [1]:
import dotenv
dotenv.load_dotenv()

True

In [2]:
import os
[k for k in os.environ.keys() if k.startswith("ASTRA")]

['ASTRA_DB_DATABASE_ID', 'ASTRA_DB_APPLICATION_TOKEN']

In [2]:
from knowledge_graph.cassandra_graph_store import CassandraGraphStore
graph_store = CassandraGraphStore()

# Extracting Knowledge Graph

## Using LLMGraphTansformer

In [4]:
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_openai import ChatOpenAI

# Prompt used by LLMGraphTransformer is tuned for Gpt4.
llm = ChatOpenAI(temperature=0, model_name="gpt-4")

llm_transformer = LLMGraphTransformer(llm=llm)

In [5]:
from langchain_core.documents import Document

text = """
Marie Curie, was a Polish and naturalised-French physicist and chemist who conducted pioneering research on radioactivity.
She was the first woman to win a Nobel Prize, the first person to win a Nobel Prize twice, and the only person to win a Nobel Prize in two scientific fields.
Her husband, Pierre Curie, was a co-winner of her first Nobel Prize, making them the first-ever married couple to win the Nobel Prize and launching the Curie family legacy of five Nobel Prizes.
She was, in 1906, the first woman to become a professor at the University of Paris.
"""
documents = [Document(page_content=text)]
graph_documents = llm_transformer.convert_to_graph_documents(documents)
print(f"Nodes:{graph_documents[0].nodes}")
print(f"Relationships:{graph_documents[0].relationships}")

Nodes:[Node(id='Marie Curie', type='Person'), Node(id='Pierre Curie', type='Person'), Node(id='Nobel Prize', type='Award'), Node(id='University Of Paris', type='Organization'), Node(id='Polish', type='Nationality'), Node(id='French', type='Nationality'), Node(id='Physicist', type='Profession'), Node(id='Chemist', type='Profession'), Node(id='Radioactivity', type='Scientific field'), Node(id='Curie Family', type='Family'), Node(id='Professor', type='Profession')]
Relationships:[Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='Polish', type='Nationality'), type='HAS_NATIONALITY'), Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='French', type='Nationality'), type='HAS_NATIONALITY'), Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='Physicist', type='Profession'), type='IS_A'), Relationship(source=Node(id='Marie Curie', type='Person'), target=Node(id='Chemist', type='Profession'), type='IS_A'), Relationship(sourc

In [6]:

graph_store.add_graph_documents(graph_documents)

### Optional: Predefine entities / relationships
```python
llm_transformer_filtered = LLMGraphTransformer(
    llm=llm,
    allowed_nodes=["Person", "Country", "Organization"],
    allowed_relationships=["NATIONALITY", "LOCATED_IN", "WORKED_AT", "SPOUSE"],
)
graph_documents_filtered = llm_transformer_filtered.convert_to_graph_documents(
    documents
)
print(f"Nodes:{graph_documents_filtered[0].nodes}")
print(f"Relationships:{graph_documents_filtered[0].relationships}")
```

# Querying

In [4]:
graph_store.query("Marie Curie", {})

AttributeError: 'CassandraGraphStore' object has no attribute 'invoke'

In [5]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model_name = "gpt-4")

In [6]:
QUERY_KEYWORD_EXTRACT_PROMPT = (
    "A question is provided below. Given the question, extract up to 5 "
    "keywords from the text. Focus on extracting the keywords that we can use "
    "to best lookup answers to the question. Avoid stopwords.\n"
    "---------------------\n"
    "{question}\n"
    "---------------------\n"
    "Provide keywords as a JSON list.\n"
)

from langchain_core.runnables import RunnablePassthrough
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import JsonOutputParser

keywords_chain = (
    ChatPromptTemplate.from_messages([QUERY_KEYWORD_EXTRACT_PROMPT])
    | llm
    | JsonOutputParser()
)

query_keyword_extraction = (
    { "question": RunnablePassthrough() }
    | keywords_chain
)

In [7]:
query_keyword_extraction.invoke("Who is Marie Curie?")

['Marie Curie']

In [13]:
from operator import itemgetter
from langchain_core.runnables import RunnableLambda

def _combine_triples(triple_strs):
    return "\n".join(triple_strs)

ANSWER_PROMPT = (
    "The original question is given below."
    "This question has been used to retrieve information from a knowledge graph."
    "The matching triples are shown below."
    "Use the information in the triples to answer the original question.\n\n"
    "Original Question: {question}\n\n"
    "Knowledge Graph Triples:\n{context}\n\n"
    "Response:"
)

chain = (
    { "question": RunnablePassthrough() }
    | RunnablePassthrough.assign(keywords = keywords_chain)
    | RunnablePassthrough.assign(triples = itemgetter("keywords") | graph_store.as_runnable())
    | RunnablePassthrough.assign(context = itemgetter("triples") | RunnableLambda(_combine_triples))
    | ChatPromptTemplate.from_messages([ANSWER_PROMPT])
    | llm
)

In [15]:
chain.invoke("How is Marie Curie related to Pierre Curie?")

AIMessage(content='Marie Curie is related to Pierre Curie as his wife.', response_metadata={'token_usage': {'completion_tokens': 14, 'prompt_tokens': 207, 'total_tokens': 221}, 'model_name': 'gpt-4', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None})