In [1]:
from dotenv import load_dotenv
import os

load_dotenv()
OPEN_API_KEY = os.getenv("OPEN_API_KEY")

In [2]:
from openai import OpenAI
client = OpenAI(api_key=OPEN_API_KEY)

In [None]:
import requests
from IPython.display import display, Markdown

In [None]:
url = "https://raw.githubusercontent.com/langchain-ai/langchain/master/docs/docs/example_data/nke-10k-2023.pdf"

In [5]:
folder_path = r"C:\Users\csing\VSCode\Projects\RAG_LangChain_OpenAI_FAISS\data"
os.makedirs(folder_path, exist_ok=True)
file_path = os.path.join(folder_path, "nke-10k-2023.pdf")

In [8]:
response = requests.get(url)
with open(file_path, "wb") as f:
    f.write(response.content)

In [None]:
from langchain.document_loaders import PyPDFLoader

loader = PyPDFLoader(file_path)
documents = loader.load()

In [None]:
print(documents[0].page_content)

In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)

In [12]:
text_chunks = text_splitter.split_documents(documents)

In [None]:
display(Markdown(f"{text_chunks[0].page_content}"))


In [None]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5")

In [None]:
from langchain.vectorstores import FAISS

vector_store = FAISS.from_documents(text_chunks, embeddings)

In [33]:
retriever = vector_store.as_retriever()

In [36]:
from langchain.prompts import ChatPromptTemplate

In [35]:
template = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Use ten sentences maximum and keep the answer concise.
Question: {question}
Context: {context}
Answer:
"""

In [37]:
prompt = ChatPromptTemplate.from_template(template)

In [38]:
from langchain.chat_models import ChatOpenAI
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser

In [42]:
output_parser = StrOutputParser()
llm_model = ChatOpenAI(openai_api_key = OPEN_API_KEY, model_name="gpt-3.5-turbo")

In [44]:
rag_chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm_model
    | output_parser
)

In [None]:
rag_chain.invoke("What is the business strategy of Nike?")