In [69]:
from dotenv import load_dotenv
import os 
from langchain_openai.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS,DocArrayInMemorySearch
from langchain_core.runnables import RunnablePassthrough, RunnableParallel

Code from: https://github.com/Tech-Watt/RAG-Application-Using-OpenAI-Model/blob/main/main.ipynb 

In [70]:
load_dotenv()
API_KEY = os.getenv('OPENAI_API_KEY')
# Model = 'gpt-3.5-turbo' # model lib: https://platform.openai.com/docs/models/gpt-4o
Model = 'gpt-4o' 

In [71]:
model = ChatOpenAI(api_key=API_KEY,model=Model)
model.invoke('what is the meaning of life?') #Where we pass the question

AIMessage(content='The meaning of life is a question that has intrigued humanity for centuries and has various interpretations depending on philosophical, religious, scientific, and personal perspectives. Here are a few approaches to understanding the meaning of life:\n\n1. **Philosophical Perspectives**: Philosophers like Socrates, Aristotle, and existentialists such as Jean-Paul Sartre and Albert Camus have different takes. For example, existentialists often argue that life has no inherent meaning, and it’s up to individuals to create their own purpose through their choices and actions.\n\n2. **Religious Perspectives**: Many religious traditions offer their own explanations. For example, in Christianity, the meaning of life is often associated with serving God and following the teachings of Jesus Christ. In Buddhism, it might involve seeking enlightenment and escaping the cycle of suffering and rebirth.\n\n3. **Scientific Perspectives**: From a scientific standpoint, meaning might be

In [72]:
parser = StrOutputParser()
chain = model|parser
chain.invoke('what is Chalmers in Sweden?') #Less verbose

"Chalmers University of Technology, commonly known simply as Chalmers, is a prestigious technical university located in Gothenburg, Sweden. It was founded in 1829 and is named after its benefactor, William Chalmers, a director of the Swedish East India Company who bequeathed part of his fortune to establish the school. \n\nChalmers is renowned for its research and education in technology, natural sciences, architecture, maritime management, and other engineering disciplines. The university emphasizes innovation, sustainability, and collaboration with industry. It offers a range of undergraduate, master's, and doctoral programs and is known for its strong international presence and partnerships.\n\nThe university is also involved in various cutting-edge research projects and has a number of specialized research centers and labs. It operates two campuses in Gothenburg: Johanneberg and Lindholmen."

In [73]:
file_loader = PyPDFLoader('Riktlinje för inköp och upphandling vid Chalmers tekniska högskola 211008.pdf')
page = file_loader.load_and_split()
len(page) # number of pages read

7

In [74]:
#The doc is still to large, chunk it
spliter = RecursiveCharacterTextSplitter(chunk_size = 200,chunk_overlap = 50) #experiment here? eg. 10 -> destroys the understanding, 50+ works
pages = spliter.split_documents(page)
pages[0] #look at first

Document(page_content='STYRDOKUMENT: Riktlinje för inköp och upphandling vid Chalmers tekniska högskola . Dnr C 20 2 1 -\n1529 . Beslut av CPO Tommy Bothin , 20 2 1 - 10 - 08 .Dokumentets metadata:\nBeslut av:', metadata={'source': 'Riktlinje för inköp och upphandling vid Chalmers tekniska högskola 211008.pdf', 'page': 0})

In [75]:
# FAISS from facebook 
vector_storage = FAISS.from_documents(pages,OpenAIEmbeddings())
retriever = vector_storage.as_retriever()

In [76]:
question_template = """
your a smart bot that answers questions based on the context given to you only.
You don't make things up.
context:{context}
question:{question}

"""

In [77]:
prompt = PromptTemplate.from_template(template=question_template)
print(prompt.format(context = ' Here is the context to use',
              question = ' Answer this question based on the context'
              ))


your a smart bot that answers questions based on the context given to you only.
You don't make things up.
context: Here is the context to use
question: Answer this question based on the context




In [78]:
#Passing to vectorDB (fetch context) and to LLM in parallell (ask based on fetched context)
result = RunnableParallel(context= retriever,question = RunnablePassthrough())

In [79]:
chain = result |prompt | model | parser

In [80]:
#question based on wrong context
chain.invoke('What is a LLMs?')

'The provided context does not contain any information about what "LLM" or "LLMs" refers to. Therefore, based on the given context, I cannot provide an answer to your question.'

In [81]:
#question based on right context
chain.invoke('Vem får ingå avtal för Chalmers räkning?')

'Enligt den givna kontexten får endast den som har delegation ingå avtal för Chalmers räkning.'

In [83]:
#to get the source page we use the retreiver
retriever.invoke('Vem får ingå avtal för Chalmers räkning?')

#First one correct: Document(page_content='Chalmers, 201 8 - 06 - 25 , C201 8 - 0 262 i dess senaste version) får företräda Chalmers. Endast den som\nhar delegation kan ingå avtal för Chalmers räkning.', metadata={'source': 'Riktlinje för inköp och upphandling vid Chalmers tekniska högskola 211008.pdf', 'page': 1}),

[Document(page_content='Chalmers, 201 8 - 06 - 25 , C201 8 - 0 262 i dess senaste version) får företräda Chalmers. Endast den som\nhar delegation kan ingå avtal för Chalmers räkning.', metadata={'source': 'Riktlinje för inköp och upphandling vid Chalmers tekniska högskola 211008.pdf', 'page': 1}),
 Document(page_content='3 Grundläggande förutsättningar\nChalmers är en upphandlande enhet och omfattas av lagstiftningen avseende offentlig upphandling.\nVid all upphandling ska särskilt följande principer beaktas:', metadata={'source': 'Riktlinje för inköp och upphandling vid Chalmers tekniska högskola 211008.pdf', 'page': 0}),
 Document(page_content='– Styrdokument vid Chalmers , den 31 oktober 201 6, C 2016 - 1759 .\n2 Mål\nMål et är att visa att Chalmers gör goda affärer .\n3 Grundläggande förutsättningar', metadata={'source': 'Riktlinje för inköp och upphandling vid Chalmers tekniska högskola 211008.pdf', 'page': 0}),
 Document(page_content='Institutionerna ansvarar utifrån sin