In [9]:
# Run this cell and restart the kernel if the libraries are not present in the environment
! pip install --upgrade --quiet  langchain==0.1.12 langchain-community==0.0.29 langchain-openai==0.1.1 langchain-experimental==0.0.54 neo4j==5.18.0 pypdf==4.1.0 wikipedia==1.4.0

In [10]:
%run /mnt/code/utils/graph_RAG_chain.ipynb

In [3]:
import os

# Common data processing
import json
import textwrap

# Langchain
from langchain_community.graphs import Neo4jGraph
from langchain_community.vectorstores import Neo4jVector
from langchain_openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQAWithSourcesChain
from langchain_openai import ChatOpenAI
from langchain_experimental.graph_transformers import LLMGraphTransformer
from langchain_core.pydantic_v1 import BaseModel, Field
from typing import Tuple, List, Optional
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.vectorstores.neo4j_vector import remove_lucene_chars
from langchain_core.prompts.prompt import PromptTemplate
from langchain_core.runnables import (
    RunnableBranch,
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)
from langchain_core.messages import AIMessage, HumanMessage
from langchain_community.chat_models import ChatMlflow
from langchain_community.document_loaders import PyPDFLoader

from langchain.document_loaders import WikipediaLoader
from langchain.text_splitter import TokenTextSplitter
from langchain_core.output_parsers import StrOutputParser


from neo4j import GraphDatabase

import csv
import pandas as pd
import time


# Warning control
import warnings
warnings.filterwarnings("ignore")

In [4]:
# Load from environment
NEO4J_URI = os.getenv('NEO4J_URI')
NEO4J_USERNAME = os.getenv('NEO4J_USERNAME')
NEO4J_PASSWORD = os.getenv('NEO4J_PASSWORD')
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')
NEO4J_DATABASE = os.getenv('NEO4J_DATABASE') or 'neo4j'

graph = Neo4jGraph()

In [18]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader("/mnt/code/data/activision.pdf")
pages = loader.load_and_split()

In [None]:
pages[0].page_content

In [97]:
# Read the wikipedia article
raw_documents = WikipediaLoader(query="Elon Musk").load()
# Define chunking strategy
text_splitter = TokenTextSplitter(chunk_size=512, chunk_overlap=24)
documents = text_splitter.split_documents(raw_documents[:3])

In [None]:
# Uncomment  to use the AI gateway, change the name of the endpoint as required
# llm = ChatMlflow(
#     target_uri=os.environ["DOMINO_MLFLOW_DEPLOYMENTS"],
#     endpoint="chat-gpt35turbo-sm",
# )

llm=ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-0125") # gpt-4-0125-preview occasionally has issues
llm_transformer = LLMGraphTransformer(llm=llm)

In [99]:
len(documents)

6

In [None]:
graph_documents = llm_transformer.convert_to_graph_documents(documents)
graph.add_graph_documents(
    graph_documents,
    baseEntityLabel=True,
    include_source=True
)

In [5]:
vector_index = Neo4jVector.from_existing_graph(
    OpenAIEmbeddings(),
    search_type="hybrid",
    node_label="Document",
    text_node_properties=["text"],
    embedding_node_property="embedding"
)

In [142]:
# Retriever

graph.query(
    "CREATE FULLTEXT INDEX entity IF NOT EXISTS FOR (e:__Entity__) ON EACH [e.id]")


[]

In [6]:
entity_chain.invoke({"question": "Where was Elon Musk born?"}).names

['Elon Musk']

In [7]:
print(structured_retriever("Where is Stanford?"))

Elon Musk - FOUNDER -> Spacex
Elon Musk - FOUNDER -> Tesla Inc.
Elon Musk - FOUNDER -> Boring Company
Elon Musk - FOUNDER -> Xai
Elon Musk - FOUNDER -> X.Com
Elon Musk - OWNER -> Spacex
Elon Musk - OWNER -> X Corp.
Elon Musk - OWNER -> X Corp
Elon Musk - EXECUTIVE_CHAIRMAN -> X Corp.
Elon Musk - CTO -> X Corp.
Elon Musk - CO-FOUNDER -> Neuralink
Elon Musk - CO-FOUNDER -> Openai
Elon Musk - CO-FOUNDER -> Zip2
Elon Musk - CO-FOUNDER -> Paypal
Elon Musk - CO-FOUNDER -> Solarcity
Elon Musk - PRESIDENT -> Musk Foundation
Elon Musk - MEMBER -> South African Musk Family
Elon Musk - BIRTH_PLACE -> Pretoria
Elon Musk - EDUCATION -> University Of Pretoria
Elon Musk - EDUCATION -> Queen'S University At Kingston
Elon Musk - EDUCATION -> University Of Pennsylvania
Elon Musk - IMMIGRATION -> Canada
Elon Musk - RELOCATION -> California
Elon Musk - ACQUISITION -> Compaq
Elon Musk - ACQUISITION -> Ebay
Elon Musk - ACQUISITION -> Twitter
Elon Musk - MERGER -> Confinity
Elon Musk - PROPOSER -> Hyperloop


In [129]:
chain.invoke({"question": "Where was Elon born?"})

Search query: Where was Elon born?


'Elon Musk was born in Pretoria, South Africa.'

In [8]:
chain.invoke(
    {
        "question": "When was he born?",
        "chat_history": [("Where was Elon ?", "Where was Elon born?")],
    }
)

Search query: When was Elon born?


'Elon Musk was born on June 28, 1971.'

In [132]:
# Now let's ingest the activision Form 10k

loader = PyPDFLoader("/mnt/code/data/activision.pdf")
pages = loader.load_and_split()

In [134]:
graph_documents = llm_transformer.convert_to_graph_documents(pages[:10])
graph.add_graph_documents(
    graph_documents,
    baseEntityLabel=True,
    include_source=True
)

In [150]:
chain.invoke({"question": "What is the core business of Activision Publishing, Inc?"})

Search query: What is the core business of Activision Publishing, Inc?


'The core business of Activision Publishing, Inc. is delivering content through both premium and free-to-play offerings, generating revenue from full-game and in-game sales, and licensing software to third-party companies. Their key product offerings include titles and content for the Call of Duty franchise.'

In [11]:
chain.invoke({"question": "Who has agreements for Diablo Immortal"})

Search query: Who has agreements for Diablo Immortal


'Activision Blizzard has agreements for Diablo Immortal with NetEase.'

In [151]:
chain.invoke({"question": "What is Activisions strategy and vision?"})

Search query: What is Activisions strategy and vision?


"Activision's strategy and vision are to connect and engage the world through epic entertainment by expanding audience reach, deepening consumer engagement, and increasing player investment. They focus on creating compelling content, offering experiences on multiple platforms, and driving engagement through social connectivity within their communities."

In [152]:
chain.invoke({"question": "What does King digital offer?"})

Search query: What does King digital offer?


'King Digital offers content through free-to-play offerings and primarily generates revenue from in-game sales and in-game advertising on mobile platforms. Their key product offerings include titles and content for Candy Crush™, a "match three" franchise.'

In [153]:
chain.invoke({"question": "How are King digital and World of warcraft related?"})

Search query: How are King digital and World of warcraft related?


'King Digital Entertainment is a subsidiary of Activision Blizzard, the parent company of Blizzard Entertainment, which develops the Warcraft franchise, including World of Warcraft.'

In [155]:
chain.invoke({"question": "Can I get World of warcraft subscription from King Digital?"})

Search query: Can I get World of warcraft subscription from King Digital?


'No, you cannot get a World of Warcraft subscription from King Digital.'