# **Retreival Augmented Generation Question Answering System**

---



In [None]:
import os
from google.colab import userdata
os.environ["OPENAI_API_KEY"] = userdata.get('OPENAI_API_KEY')

In [None]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain_huggingface import HuggingFaceEmbeddings
import warnings
warnings.filterwarnings('ignore')

import os

# Load PDF
loader = PyPDFLoader('/content/EMAI.pdf')
pages = loader.load()  # This loads all pages

# Better text splitting (300-500 chars is typical)
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=500,
    chunk_overlap=50,
)
splits = text_splitter.split_documents(pages)

# Create vector store
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-small-en-v1.5") # free
vectorstore = Chroma.from_documents(splits, embeddings)

# Initialize LLM and retriever
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)
retriever = vectorstore.as_retriever(search_kwargs={"k": 3})  # Get top 3 relevant chunks

# Create QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever
)

# Ask SPECIFIC questions from the PDF content
response = qa_chain.invoke("What are the minimum ECTS requirements for mathematics courses?")
print("Answer:", response["result"])


Answer: The minimum ECTS requirements for mathematics courses are 12 ECTS credit points.


# **Evaluation**
- Question -> Query
- Response -> From LLM
- Expected -> Response from LLM

In [None]:
stest_questions = {
    "What ECTS are required for math courses?": "12 ECTS",
    "List mandatory documents": "Identity Card, Degree, Transcript, CV, etc."
}

for question, expected in test_questions.items():
    response = qa_chain.invoke(question)
    print(f"Q: {question}\nA: {response['result']}\nExpected: {expected}\n")

Q: What ECTS are required for math courses?
A: Math courses require a minimum of 12 ECTS credit points in topics such as calculus, linear algebra, logic, discrete mathematics, combinatorics, probability and statistics, information theory, graph theory, and number theory.
Expected: 12 ECTS

Q: List mandatory documents
A: The mandatory documents required are:
1. Identity Card or Passport. If you hold more than one nationality, please attach a copy of all the documents providing those nationalities.
2. Official university bachelor's or master's degree from the European Higher Education Area (EHEA) or equivalent foreign degree allowing access to a postgraduate degree in the country that issued the degree. Should the applicant have not from providing the certificate: Australia, Canada (excluding Quebec), New Zealand, the United Kingdom, or the United States of America, or in one of the four member states of the EMAI Consortium.
Expected: Identity Card, Degree, Transcript, CV, etc.

