## RAG Workflow

### Install packages

In [1]:
# ! pip install langchain==0.0.343
# ! pip install openai==1.3.6

### Set environment

In [2]:
import os
import openai

openai.api_key = os.environ['OPENAI_API_KEY']

### Load file

In [3]:
from langchain.document_loaders import PyPDFLoader

file_path = "docs/MachineLearning-Lecture01.pdf"
docs = PyPDFLoader(file_path).load()

print (f'There are {len(docs)} document(s) in the file.')
print (f'There are {len(docs[0].page_content)} characters in the first page of your document.')

There are 22 document(s) in the file.
There are 3131 characters in the first page of your document.


### Split documents and store the vector embeddings of the text

In [4]:
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
split_docs = text_splitter.split_documents(docs)

embeddings = OpenAIEmbeddings()

vectorstore = Chroma.from_documents(split_docs, embeddings, collection_name="serverless_guide")

### Create Chain

In [5]:
from langchain.chat_models import ChatOpenAI
from langchain.chains.question_answering import load_qa_chain

llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo")
chain = load_qa_chain(llm, chain_type="stuff")

### Similarity Search

In [6]:
query = "What is the basic concept of machine learning?"
similar_docs = vectorstore.similarity_search(query, 3, include_metadata=True)

### Complete answering

In [7]:
chain.run(input_documents=similar_docs, question=query)

'The basic concept of machine learning is that a computer program is designed to learn from experience (E) in order to improve its performance on a specific task (T) based on a performance measure (P). This means that the program can adapt and improve its performance without being explicitly programmed for every possible scenario.'