In [1]:
!pip install -qU langchain langchain-core langchain-community langchain-openai
!pip install -qU langchain-groq
!pip install -qU langchain-qdrant
!pip install -qU transformers

In [2]:
import getpass
import os

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
QDRANT_URL = os.getenv("QDRANT_URL")

In [3]:
from langchain import hub
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_qdrant import Qdrant
from transformers import AutoTokenizer, AutoModel
import torch
import tqdm as notebook_tqdm

embedding_model = OpenAIEmbeddings(model="text-embedding-3-small")

# Initialize the tokenizer
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-70B")

  from .autonotebook import tqdm as notebook_tqdm
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [4]:
# Load the document

docs = PyMuPDFLoader("https://singjupost.com/wp-content/uploads/2014/07/Steve-Jobs-iPhone-2007-Presentation-Full-Transcript.pdf").load()

In [5]:
# Define the token length function
def llama3_token_len(text):
    return len(tokenizer.tokenize(text))

In [6]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=0,
    length_function=llama3_token_len,
)

split_chunks = text_splitter.split_documents(docs)

In [7]:
##### FOR INITIAL ANALYSIS ONLY:

# PRINT THE NUMBER OF CHUNKS
len(split_chunks)

85

In [8]:
##### FOR INITIAL ANALYSIS ONLY:

max_chunk_length = 0

for chunk in split_chunks:
  max_chunk_length = max(max_chunk_length, llama3_token_len(chunk.page_content))

print(max_chunk_length)

198


In [9]:
url = QDRANT_URL
api_key = QDRANT_API_KEY
qdrant = Qdrant.from_documents(
    split_chunks,
    embedding_model,
    url=url,
    prefer_grpc=True,
    api_key=api_key,
    collection_name="iphone_speech",
    force_recreate=True,
)

In [10]:
qdrant_retriever = qdrant.as_retriever()

In [11]:
# set the LANGCHAIN_API_KEY environment variable (create key in settings)
from langchain import hub
LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")

In [12]:
### set up LLM model
#### set up LLAMA3 RAG prompt

from langchain_groq import ChatGroq
from langchain import hub

llm = ChatGroq(model="llama3-70b-8192")
llama3_prompt = hub.pull("rlm/rag-prompt-llama3")

In [13]:
# display input_variables from prompt
print(f"prompt.input_variables: {llama3_prompt.input_variables}")
print(f"\nprompt.metadata: {llama3_prompt.metadata}")

# for each message in prompt.messages, display the message type (such as HumanMessagePromptTemplate) and the message itself
for message in llama3_prompt.messages:
    print(f"\nmessage type: {message.__class__.__name__}")
    print(f"message:\n{message}")

prompt.input_variables: ['context', 'question']

prompt.metadata: {'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt-llama3', 'lc_hub_commit_hash': '4bc799d6b3a36adebc6359db85ff42234b4648bd8502c6597d99b5c0dcbb9ed3'}

message type: HumanMessagePromptTemplate
message:
prompt=PromptTemplate(input_variables=['context', 'question'], template="<|begin_of_text|><|start_header_id|>system<|end_header_id|> You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise <|eot_id|><|start_header_id|>user<|end_header_id|> \nQuestion: {question} \nContext: {context} \nAnswer: <|eot_id|><|start_header_id|>assistant<|end_header_id|>\n")


In [14]:
#### verify simple prompt
simple_chain = llama3_prompt | llm

In [15]:
# display the graph
print(simple_chain.get_graph().draw_ascii())

    +-------------+    
    | PromptInput |    
    +-------------+    
           *           
           *           
           *           
+--------------------+ 
| ChatPromptTemplate | 
+--------------------+ 
           *           
           *           
           *           
     +----------+      
     | ChatGroq |      
     +----------+      
           *           
           *           
           *           
  +----------------+   
  | ChatGroqOutput |   
  +----------------+   


In [16]:
simple_response = simple_chain.invoke({"context": "France is a country in Europe.", "question": "What is the capital of France?"})
print(simple_response)

content="I don't know. The provided context only mentions that France is a country in Europe, but it doesn't provide the capital of France." response_metadata={'token_usage': {'completion_tokens': 28, 'prompt_tokens': 97, 'total_tokens': 125, 'completion_time': 0.080199539, 'prompt_time': 0.018759978, 'queue_time': None, 'total_time': 0.098959517}, 'model_name': 'llama3-70b-8192', 'system_fingerprint': 'fp_753a4aecf6', 'finish_reason': 'stop', 'logprobs': None} id='run-23ce10fe-4594-44c5-992b-584361af34d0-0'


In [17]:
from operator import itemgetter
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough

retrieval_augmented_qa_chain = (
    {"context": itemgetter("question") | qdrant_retriever, "question": itemgetter("question")}
    | RunnablePassthrough.assign(context=itemgetter("context"))
    | {"response": llama3_prompt | llm, "context": itemgetter("context")}
)

In [18]:
print(retrieval_augmented_qa_chain.get_graph().draw_ascii())

                      +---------------------------------+                        
                      | Parallel<context,question>Input |                        
                      +---------------------------------+                        
                           ****                   ****                           
                       ****                           ***                        
                     **                                  ****                    
+--------------------------------+                           **                  
| Lambda(itemgetter('question')) |                            *                  
+--------------------------------+                            *                  
                 *                                            *                  
                 *                                            *                  
                 *                                            *                  
     +----------

In [19]:
response = retrieval_augmented_qa_chain.invoke({"question" : "What is the most important thing about the iPhone?"})

In [20]:
response["response"].content

'The most important thing about the iPhone is that it runs on the OSX operating system, which provides a strong foundation and enables features like multi-tasking, networking, security, and desktop-class applications.'

In [21]:
for context in response["context"]:
  print("Context:")
  print(context)
  print("----")

Context:
page_content='what’s on any other phone. Now how do we do this? Well, we start with a strong foundation.\niPhone runs OSX.\nNow, why would we want to run such a sophisticated operating system on a mobile\ndevice? Well, because it’s got everything we need. It’s got multi-tasking. It’s got the best\nnetworking. It already knows how to power manage. We’ve been doing this on mobile\ncomputers for years. It’s got awesome security. And the right apps. It’s got everything from\nCocoa and the graphics and it’s got core animation built in and it’s got the audio and video\nthat OSX is famous for. It’s got all the stuff we want. And it’s built right in to iPhone. And\nthat has let us create desktop class applications and networking. Not the crippled stuff that\nyou find on most phones. This is real, desktop-class applications.' metadata={'subject': '', 'creator': '', 'total_pages': 22, 'keywords': '', 'modDate': "20200415062431+00'00'", 'trapped': '', 'format': 'PDF 1.4', 'creationDate':

In [22]:
response = retrieval_augmented_qa_chain.invoke({"question" : "What is the airspeed velocity of an unladen swallow?"})

In [23]:
print(response["response"].content)

I don't know.
