### LangChain - QA with Embeddings

In [5]:
import os

from dotenv import load_dotenv
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [6]:
load_dotenv()

openai_api_key = os.environ["OPENAI_API_KEY"]
temperature = 1.0
max_tokens = 512
model_name = "gpt-3.5-turbo-0125"

llm = ChatOpenAI(
    model=model_name,
    temperature=temperature,
    max_tokens=max_tokens,
    openai_api_key=openai_api_key
)

In [None]:
loader = TextLoader("./work.txt")
document = loader.load()
text_splitter = RecursiveCharacterTextSplitter(
    separators=["\n", "\n\n"],
    chunk_size=3000,
    chunk_overlap=300
)
documents = text_splitter.split_documents(document)
number_of_characters = sum([len(doc.page_content) for doc in documents])

print(f"Now you have {len(documents)} documents with {number_of_characters} characters")

In [7]:
embeddings = OpenAIEmbeddings(
    openai_api_key=openai_api_key
)

In [8]:
db = FAISS.from_documents(documents, embeddings)

In [9]:
chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever()
)

In [None]:
query = """summarize the answer in a line.
what does author describe as good work?
"""

answer = chain.run(query)

In [None]:
answer