In [40]:
! pip install -U langchain-nomic langchain_community tiktoken chromadb langchainhub langchain langgraph tavily-python gpt4all firecrawl-py



In [41]:
import os
from dotenv import load_dotenv

load_dotenv()  # Take environment variables from .env.

langchain_api_key = os.getenv('LANG_KEY') # replace with your own key
jina_key = os.getenv('JINA_KEY') # replace with your own key 

os.environ['LANGCHAIN_TRACING_V2'] = 'true'
os.environ['LANGCHAIN_ENDPOINT'] = 'https://api.smith.langchain.com'
os.environ['LANGCHAIN_API_KEY'] = langchain_api_key

In [42]:
local_llm = 'llama3' # Using llama3 but you can use anything you want

In [43]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_community.vectorstores.utils import filter_complex_metadata
from langchain.docstore.document import Document
import requests

# Bunch of randomly generated URLs (restricting to 3 to not destroy API token limits)

urls = {
    'https://en.wikipedia.org/wiki/Knowledge_graph',
    'https://en.wikipedia.org/wiki/Semantic_technology',
    'https://en.wikipedia.org/wiki/Semantic_integration'
    # 'https://en.wikipedia.org/wiki/Logical_graph',
    # 'https://en.wikipedia.org/wiki/Knowledge_graph_embedding',
    # 'https://en.wikipedia.org/wiki/Graph_database',
    # 'https://en.wikipedia.org/wiki/Formal_semantics_(natural_language)',
    # 'https://en.wikipedia.org/wiki/Artificial_general_intelligence',
    # 'https://en.wikipedia.org/wiki/Recursive_self-improvement',
    # 'https://en.wikipedia.org/wiki/Automated_planning_and_scheduling',
    # 'https://en.wikipedia.org/wiki/Machine_learning',
    # 'https://en.wikipedia.org/wiki/Natural_language_processing'
}

headers = {
   'Accept': 'application/json',
   'Authorization': jina_key
}

base_url = 'https://r.jina.ai/'

docs = [requests.get(base_url+url, headers=headers).json() for url in urls]

docs_list = []

# Look up JINA API response format but essentially we are extracting the content and reconstructing metadata from the response
for doc in docs:
    metadata = {k: v for k, v in doc['data'].items() if k != 'content'}
    docs_list.append({"content": doc['data']['content'], "metadata": metadata})


In [44]:
# Split document into smaller chunks. Smaller chunk sizes are usually better (not too small) but your results will vary depending on the prompt and your local data. Will take longer to index if chunk size is small but will potentially alleviate loss-in-the-middle issues.

text_splitter = RecursiveCharacterTextSplitter().from_tiktoken_encoder(
    chunk_size=256, chunk_overlap=0
)
doc_splits = text_splitter.create_documents(texts=[doc['content'] for doc in docs_list], metadatas=[doc['metadata'] for doc in docs_list])


# Filter out metadata that comes as an array and restrict it to just primitive types
filtered_docs = []

for doc in doc_splits:
    if isinstance(doc, Document) and hasattr(doc, 'metadata'):
        if doc.metadata is not None:
            clean_metadata = {k: v for k, v in doc.metadata.items() if type(v) in [str, int, float, bool]}
        else:
            clean_metadata = {}
        filtered_docs.append(Document(page_content=doc.page_content, metadata=clean_metadata))


In [48]:
# Add to vector DB
model_name = "all-MiniLM-L6-v2.gguf2.f16.gguf"
gpt4all_kwargs = {'allow_download': 'True'}

vectorstore = Chroma.from_documents(
    documents=filtered_docs,
    collection_name="rag-chroma",
    embedding = GPT4AllEmbeddings(
        model_name=model_name,
        gpt4all_kwargs=gpt4all_kwargs
    )
)

retriever = vectorstore.as_retriever()

In [49]:
# Create a retrieval grader to determine if the document pulled is relevant to the user question

from langchain.prompts import PromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import JsonOutputParser

# Initialize the chat model
llm = ChatOllama(model=local_llm, format='json', temperature=0)

prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are a grader assessing relevance
    of a retrieved document to a user question. If the document contains keywords related to the user question,
    grade it as relevant. It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
    Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question. \n
    Provide the binary score as a JSON with a single key 'score' and no premable or explanation.
    <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here is the retrieved document: \n\n {document} \n\n
    Here is the user question: {question} \n <|eot_id|><|start_header_id|>assistant<|end_header_id|>
    """,
    input_variables=["question", "document"]
)


retrieval_grader = prompt | llm | JsonOutputParser()
question = "What is a knowledge graph?" # Test question to see if it can identify a relevant document from our store
docs = retriever.invoke(question)
doc_text = docs[1].page_content
print(retrieval_grader.invoke({"question": question, "document": doc_text}))

{'score': 'yes'}


In [52]:
#Assuming the above was relevant, let's now generate our response with the document that was retrieved

from langchain import hub
from langchain_core.output_parsers import StrOutputParser 

prompt = PromptTemplate(
    template="""<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an AI assistant tasked with generating a response to a user question. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use a maximum of 3 sentences and keep the answer concise <|eot_id|><|start_header_id|>user<|end_header_id|>
    Here is the user question: {question} 
    Here is the context: {context}
    Answer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>""",
    input_variables=["question", "document"],
)


llm = ChatOllama(model=local_llm, temperature=0)


# Post-processing
def format_docs(docs):
    return "\n\n".join([doc.page_content for doc in docs])

# Chain
rag_chain = prompt | llm | StrOutputParser()

# Run
question = "What is a knowledge graph?"
docs = retriever.invoke(question)
generation = rag_chain.invoke({"question": question, "context": docs})
print(generation)


## In the event that the retrieved document is not relevant, let's opt for a web search via Tavily