In [33]:
import openai 
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

**Set up logging in LangSmith**

In [34]:
import os
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.langchain.plus"
os.environ["LANGCHAIN_API_KEY"] = os.environ["LANGCHAIN_API_KEY"]

### Load Documents

In [35]:
from langchain.document_loaders import PyPDFLoader

In [36]:
loader = PyPDFLoader('/Users/ingrid/Downloads/compact-guide-to-large-language-models.pdf')
pages = loader.load()

### Split Documents


In [37]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [38]:
# define the text splitter
r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500,
    chunk_overlap=200, 
    separators=["\n\n", "\n", " ", ""]
)

In [39]:
# Create our splits from the PDF
docs = r_splitter.split_documents(pages)

### Create Embeddings & Vectorstore

In [40]:
from langchain.vectorstores import Qdrant

In [41]:
from langchain.embeddings.openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [42]:
qdrant = Qdrant.from_documents(
    docs,
    embeddings,
    location=":memory:",  # Local mode with in-memory storage only
    collection_name="my_documents",
)

In [43]:
# We can test different types of searches (similartiy search, mmr, etc.)
question = "What are the top in demand skills for data professionals?"
found_docs = qdrant.similarity_search(question)
# found_docs = qdrant.max_marginal_relevance_search(query, k=2, fetch_k=10)

### Set up the LLM 

In [44]:
##### Use this code to use Ollama with llama2 or mistral models
# from langchain.chat_models import ChatOllama
# llm = ChatOllama(model_name="llama2", temperature=0)

##### Use this code to connect with OpenAI API
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-3.5-turbo-1106", temperature=0)

**1: RetreivalQA Chain**

In [45]:
from langchain.chains import RetrievalQA

**Prompting**

In [46]:
from langchain.prompts import PromptTemplate

# Build prompt
template = """Use the following pieces of context to answer the question at the end. 
If you don't know the answer, just say that you don't know, don't try to make up an answer. 
Use three sentences maximum. Keep the answer as concise as possible. 
Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)


In [47]:
# Run chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=qdrant.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [48]:
question = "What big breakthroughs happened with LLMs in 2023?"

In [49]:
result = qa_chain({"query": question})

In [50]:
result["result"]

'In 2023, open source LLMs showed impressive results with releases like Dolly 2.0, LLaMA, Alpaca, and Vicuna. GPT-4 was also released, setting a new benchmark for parameter size and performance. Thanks for asking!'

In [51]:
result["source_documents"][0]

Document(page_content='a service that is widely accessible to users through a web interface  \nand kicks off a huge increase in public awareness of LLMs and  \ngenerative AI.\n \n2023   \nOpen source LLMs begin showing increasingly impressive results  \nwith releases such as Dolly 2.0, LLaMA, Alpaca and Vicuna.  \nGPT-4 is also released, setting a new benchmark for both parameter  \nsize and performance.', metadata={'source': '/Users/ingrid/Downloads/compact-guide-to-large-language-models.pdf', 'page': 2})

**Optional: Alternative Chain Types - Map Reduce**

In [52]:
qa_chain_mr = RetrievalQA.from_chain_type(
    llm,
    retriever=qdrant.as_retriever(),
    chain_type="map_reduce"
)
result = qa_chain_mr({"query": question})
result["result"]

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600).


'In 2023, there were significant breakthroughs with LLMs, including the release of open source LLMs such as Dolly 2.0, LLaMA, Alpaca, and Vicuna. Additionally, GPT-4 was released, setting a new benchmark for both parameter size and performance. The launch of ChatGPT also turned GPT-3 and similar models into a widely accessible service for users through a web interface, leading to a huge increase in public awareness of Large Language Models (LLMs) and generative AI.'

**Optional: Alternative Chain Types - Refine**

In [None]:
qa_chain_r = RetrievalQA.from_chain_type(
    llm,
    retriever=qdrant.as_retriever(),
    chain_type="refine"
)
result = qa_chain_r({"query": question})
result["result"]

### Memory

In [27]:
from langchain.memory import ConversationBufferMemory
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True
)

### Conversational Retreival Chain

In [28]:
from langchain.chains import ConversationalRetrievalChain
retriever=qdrant.as_retriever()
qa = ConversationalRetrievalChain.from_llm(
    llm,
    retriever=retriever,
    memory=memory
)

In [None]:
question = "What big breakthroughs happened with LLMs in 2023?"
result = qa({"question": question})

In [30]:
result['answer']

'In 2023, there were several significant breakthroughs with LLMs (Large Language Models). Open source LLMs such as Dolly 2.0, LLaMA, Alpaca, and Vicuna showed increasingly impressive results. Additionally, GPT-4 was released, setting a new benchmark for both parameter size and performance. These advancements contributed to a significant increase in public awareness of LLMs and generative AI.'

In [31]:
# Ask a follow-up question
question = "Can you tell me more about these types of models?"
result = qa({"question": question})

In [32]:
result['answer']

"There are two main types of large language models: proprietary services and open source models.\n\nProprietary services, like OpenAI's ChatGPT, provide a user interface or API where users can input prompts and receive fast responses from high-performing models like GPT-3.5 and GPT-4. These models are trained on enormous datasets and can handle complex tasks, both technical (like code generation) and creative (like writing poetry). However, these services require a significant amount of compute power to train and serve the models, making them expensive to operate at scale. Users also have to send their data to the service's servers, raising privacy and security concerns.\n\nOn the other hand, open source models are available in the open source community, with platforms like Hugging Face gathering hundreds of thousands of models from contributors. While the open source community has been rapidly catching up to the performance of proprietary models, it has not yet matched the performance