In [6]:
# Installations

%pip install --upgrade pip -q
%pip install python-dotenv -q

%pip install langchain langchain-core langchain-community langchain-openai -q

%pip install qdrant-client -q
%pip install tiktoken pymupdf -q

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [11]:
from dotenv import dotenv_values

environment_secrets = dotenv_values("../.env")

In [12]:

from langchain_openai import ChatOpenAI
from pydantic.v1 import SecretStr

openai_api_key = environment_secrets["OPENAI_API_KEY"]

assert openai_api_key != None, "OpenAI Api Key not found!"

openai_client = ChatOpenAI(
    api_key=SecretStr(openai_api_key)
)

In [8]:
from langsmith import Client

langsmith_client = Client(api_key=environment_secrets["LANGSMITH_API_KEY"])

In [9]:
from langchain.prompts import ChatPromptTemplate
from langchain.document_loaders import PyMuPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Qdrant
from langchain_openai import OpenAIEmbeddings
from langchain.schema.runnable import RunnableParallel

from operator import itemgetter

from tiktoken import encoding_for_model

def main():
	embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")
	gpt_3_4_turbo_encoding = encoding_for_model("gpt-3.5-turbo")

	def token_len(text: str) -> int:
		return len(gpt_3_4_turbo_encoding.encode(text))
	
	hitchhikers_guide = PyMuPDFLoader(
		"https://www.deyeshigh.co.uk/downloads/literacy/world_book_day/the_hitchhiker_s_guide_to_the_galaxy.pdf"
	).load()

	chunks = RecursiveCharacterTextSplitter(
		chunk_size=200,
		chunk_overlap=0,
		length_function=token_len
	).split_documents(hitchhikers_guide)

	vector_store = Qdrant.from_documents(
		chunks,
		embedding_model,
		location=":memory:",
		collection_name="Hitchhiker's Guide To The Galaxy"
	)

	qdrant_retriever = vector_store.as_retriever()

	rag_prompt = ChatPromptTemplate.from_template(
"""
CONTEXT:
{context}

QUERY:
{query}
"""
	)

	get_query = itemgetter("query")

	rag_qa_chain = (
		RunnableParallel(
			{"context": get_query | qdrant_retriever, "query": get_query}
		) | 
		{"response": rag_prompt | openai_client, "context": itemgetter("context")}
	)

	response = rag_qa_chain.invoke({"query" : "Where does Arthur Dent meet Marvin?"})

	print(response)

In [10]:

from langchain_core.tracers.context import tracing_v2_enabled
from uuid import uuid4

if __name__ == "__main__":
	with tracing_v2_enabled(project_name=f"Suds Ai - LangGraph - {uuid4().hex[0:8]}", client=langsmith_client):
		main()

{'response': AIMessage(content='Arthur Dent meets Marvin in a corridor as Marvin trudges on down the corridor, still moaning about the pain in all the diodes down his left-hand side. Arthur walks along beside him, engaging in conversation with Marvin.', additional_kwargs={'refusal': None}, response_metadata={'token_usage': {'completion_tokens': 46, 'prompt_tokens': 1822, 'total_tokens': 1868}, 'model_name': 'gpt-3.5-turbo-0125', 'system_fingerprint': None, 'finish_reason': 'stop', 'logprobs': None}, id='run-4e03165e-ab09-45ba-80ff-13b3a5e395ee-0', usage_metadata={'input_tokens': 1822, 'output_tokens': 46, 'total_tokens': 1868}), 'context': [Document(metadata={'source': 'https://www.deyeshigh.co.uk/downloads/literacy/world_book_day/the_hitchhiker_s_guide_to_the_galaxy.pdf', 'file_path': 'https://www.deyeshigh.co.uk/downloads/literacy/world_book_day/the_hitchhiker_s_guide_to_the_galaxy.pdf', 'page': 160, 'total_pages': 227, 'format': 'PDF 1.3', 'title': "Hitchhiker's Guide to the Galaxy"