In [43]:
import openai 
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())

### Load Documents

In [10]:
from langchain.document_loaders import PyPDFLoader

In [11]:
loader = PyPDFLoader('/Users/ingrid/Downloads/compact-guide-to-large-language-models.pdf')
pages = loader.load()

### Split Documents


In [12]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [95]:
# define the text splitter
r_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1500,
    chunk_overlap=200, 
    separators=["\n\n", "\n", " ", ""]
)

In [96]:
# Create our splits from the PDF
docs = r_splitter.split_documents(pages)

### Create Embeddings & Vectorstore

In [97]:
from langchain.vectorstores import Qdrant

In [98]:
from langchain.embeddings.openai import OpenAIEmbeddings
embeddings = OpenAIEmbeddings()

In [99]:
qdrant = Qdrant.from_documents(
    docs,
    embeddings,
    location=":memory:",  # Local mode with in-memory storage only
    collection_name="my_documents",
)

In [100]:
# We can test different types of searches (similartiy search, mmr, etc.)
question = "What are the top in demand skills for data professionals?"
found_docs = qdrant.similarity_search(question)
# found_docs = qdrant.max_marginal_relevance_search(query, k=2, fetch_k=10)

### Set up the LLM 

In [129]:
##### Use this code to use Ollama with llama2 or mistral models
# from langchain.chat_models import ChatOllama
# llm = ChatOllama(model_name="llama2", temperature=0)

##### Use this code to connect with OpenAI API
from langchain.chat_models import ChatOpenAI
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

**1: RetreivalQA Chain**

In [116]:
from langchain.chains import RetrievalQA

In [117]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=qdrant.as_retriever()
)

In [118]:
question="What is a breif historical background on the development of LLMs?"
result = qa_chain({"query": question})
result['result']

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600).


'The development of Large Language Models (LLMs) can be traced back to the 1950s when initial attempts were made to map hard rules around languages and follow logical steps to accomplish tasks like translation. However, these strictly defined rules only worked for concrete, well-defined tasks that the system had knowledge about.\n\nIn the 1990s, language models started evolving into statistical models, and language patterns began to be analyzed. However, larger-scale projects were limited by computing power.\n\nAdvancements in machine learning in the 2000s increased the complexity of language models, and the wide adoption of the internet provided a vast amount of training data.\n\nIn 2012, advancements in deep learning architectures and larger data sets led to the development of GPT (Generative Pre-trained Transformer). In 2018, Google introduced BERT (Bidirectional Encoder Representations from Transformers), which was a significant leap in architecture and paved the way for future lar

**Prompting**

In [119]:
from langchain.prompts import PromptTemplate

# Build prompt
template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer. Use three sentences maximum. Keep the answer as concise as possible. Always say "thanks for asking!" at the end of the answer. 
{context}
Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate.from_template(template)


In [120]:
# Run chain
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=qdrant.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": QA_CHAIN_PROMPT}
)

In [121]:
question = "What big breakthroughs happened with LLMs in 2023?"

In [122]:
result = qa_chain({"query": question})

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised Timeout: Request timed out: HTTPSConnectionPool(host='api.openai.com', port=443): Read timed out. (read timeout=600).


In [123]:
result["result"]


'In 2023, open source LLMs such as Dolly 2.0, LLaMA, Alpaca, and Vicuna showed increasingly impressive results. Additionally, GPT-4 was released, setting a new benchmark for both parameter size and performance. Thanks for asking!'

In [124]:
result["source_documents"][0]

Document(page_content='a service that is widely accessible to users through a web interface  \nand kicks off a huge increase in public awareness of LLMs and  \ngenerative AI.\n \n2023   \nOpen source LLMs begin showing increasingly impressive results  \nwith releases such as Dolly 2.0, LLaMA, Alpaca and Vicuna.  \nGPT-4 is also released, setting a new benchmark for both parameter  \nsize and performance.', metadata={'source': '/Users/ingrid/Downloads/compact-guide-to-large-language-models.pdf', 'page': 2})

In [128]:
import os
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_ENDPOINT"] = "https://api.langchain.plus"
os.environ["LANGCHAIN_API_KEY"]

'ls__adf6707f28f14e0a84b131fd1c35d43d'

In [126]:
qa_chain_mr = RetrievalQA.from_chain_type(
    llm,
    retriever=qdrant.as_retriever(),
    chain_type="map_reduce"
)
result = qa_chain_mr({"query": question})
result["result"]

'There is no information provided in the given portion of the document about any specific breakthroughs or advancements related to LLMs in 2023.'

In [127]:
qa_chain_mr = RetrievalQA.from_chain_type(
    llm,
    retriever=qdrant.as_retriever(),
    chain_type="refine"
)
result = qa_chain_mr({"query": question})
result["result"]

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 4.0 seconds as it raised ServiceUnavailableError: The server is overloaded or not ready yet..


'In 2023, the field of Language Model Models (LLMs) witnessed several groundbreaking advancements that pushed the boundaries of language generation and understanding. These breakthroughs had a profound impact on the capabilities and accessibility of LLMs. Here are some notable developments:\n\n1. Enhanced Open Source LLMs: Open source LLMs like Dolly 2.0, LLaMA, Alpaca, and Vicuna underwent significant refinements. Researchers and developers dedicated their efforts to improving these models, resulting in enhanced language generation, comprehension, and overall effectiveness. These advancements contributed to more accurate and coherent language output.\n\n2. Introduction of GPT-4: The release of GPT-4 marked a major milestone in the LLM landscape. This highly anticipated model surpassed its predecessors in both parameter size and performance. With a larger model size, GPT-4 demonstrated superior language generation capabilities, producing more nuanced and contextually appropriate respon