In [1]:
!pip install -q torch transformers transformers accelerate gradio bitsandbytes langchain sentence-transformers faiss-gpu openpyxl pacmap datasets langchain-community ragatouille

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m647.5/647.5 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.7/86.7 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.4/122.4 MB[0m [31m7.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m52.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.5/85.5 MB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m471.6/471.6 kB[0m [31m33.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
import faiss
import pickle
import torch

from tqdm import tqdm
from langchain.docstore import InMemoryDocstore
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
from ragatouille import RAGPretrainedModel
from typing import List, Tuple, Optional
from langchain.docstore.document import Document as LangchainDocument

EMBEDDING_MODEL_NAME = "thenlper/gte-small"

tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL_NAME)
index = faiss.read_index('knowledge_vector_1.index')
with open('docs_processed.pkl', 'rb') as f:
    docs_processed = pickle.load(f)

lengths = [len(tokenizer.encode(doc.page_content)) for doc in tqdm(docs_processed, desc="Calculating token lengths")]

embedding_model = HuggingFaceEmbeddings(
    model_name=EMBEDDING_MODEL_NAME,
    multi_process=True,
    model_kwargs={"device": "cuda"},
    encode_kwargs={"normalize_embeddings": True}
)

docstore = InMemoryDocstore({i: doc for i, doc in enumerate(docs_processed)})

KNOWLEDGE_VECTOR_DATABASE = FAISS(
    index=index,
    docstore=docstore,
    index_to_docstore_id={i: i for i in range(len(docs_processed))},
    embedding_function=embedding_model
)

READER_MODEL_NAME = "RJ1200/llama-3_3b-fine_tuned"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME, quantization_config=bnb_config)
tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME)

READER_LLM = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=True,
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=1000,
)

RERANKER = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")

prompt_in_chat_format = [
    {
        "role": "system",
        "content": """You are an AI assistant specializing in analyzing PDF documents. Your task is to generate a comprehensive question paper based on the provided PDF context.The question paper should include the following header:

**Question Paper**

Reg. No. ____________________

End Semester Examination – Date

Code: 18CS1004
Duration: 3 hrs
Sub. Name: PROGRAMMING FOR PROBLEM SOLVING
Max. Marks: 100 .
 For each section mentioned, generate the exact number of questions as specified. Ensure that the questions are relevant, clear, and cover the key topics within the section. Reference specific page numbers or sections from the PDF whenever applicable. If the information needed to create questions is not available in the PDF context, clearly state that."""
    },
    {
        "role": "user",
        "content": """PDF Context:
        {context}
        ---
        For the following sections, generate the striclty required number of questions:
        section_requirements
    part A-10,
    part B- 5,
    part C- 4


        ---
        Question: {question}""",
    },
]

RAG_PROMPT_TEMPLATE = tokenizer.apply_chat_template(
    prompt_in_chat_format, tokenize=False, add_generation_prompt=True
)

def answer_with_rag(
    question: str,
    llm: pipeline,
    knowledge_index: FAISS,
    reranker: Optional[RAGPretrainedModel] = None,
    num_retrieved_docs: int = 30,
    num_docs_final: int = 5,
) -> Tuple[str, List[str]]:
    relevant_docs = KNOWLEDGE_VECTOR_DATABASE.similarity_search(query=question, k=num_retrieved_docs)
    relevant_docs = [doc.page_content for doc in relevant_docs]

    if reranker:
        relevant_docs = reranker.rerank(question, relevant_docs, k=num_docs_final)
        relevant_docs = [doc["content"] for doc in relevant_docs]
        relevant_docs = relevant_docs[:num_docs_final]

    context = "\nExtracted PDF content:\n" + "".join([f"Section {str(i+1)}:::\n" + doc for i, doc in enumerate(relevant_docs)])
    final_prompt = RAG_PROMPT_TEMPLATE.format(question=question, context=context)
    answer = llm(final_prompt)[0]["generated_text"]
    return answer, relevant_docs

question = "generate end-sem question paper?"
answer, relevant_docs = answer_with_rag(question, READER_LLM, KNOWLEDGE_VECTOR_DATABASE, reranker=RERANKER)

print("==================================Answer==================================")
print(f"{answer}")


Calculating token lengths: 100%|██████████| 45/45 [00:00<00:00, 1138.86it/s]
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:00<00:00,  5.18it/s]


Here is the question paper for PART A, with 10 questions.

**Question Paper**

Reg. No. ____________________

End Semester Examination – April / May – 2019 

Course Code           18CS1004 
Course Name            PROGRAMMING FOR PROBLEM SOLVING 
Duration              3hrs 
Sub. Name             PROGRAMMING FOR PROBLEM SOLVING 
Max. Marks            100 

**Part A (10 x 1 = 10 MARKS)**

1. Define algorithm. CO1 1 
2. Identify the invalid variable(s) in the following declarations. 
(a) int number;         (b) float for;        (c) invariable_count;          (d) int $main;  CO1 1 
3. Predict the output of the following program. 
int a=10; 
int  *ptr=&a; 
printf(“%d”,*ptr); 
printf(“%d”, ++(*ptr));  CO6 U 1 
4. __________ loop is called as exit  controlled loop.  CO3 1 
5. State the string termination character. CO5 R 1 
6. Predict the value at num[1][2] if 
int num[3][4]={1,3,4,2,4,5,6,7,8};  CO5 1 
7. Define a function. CO4 1 
8. Define function prototype. CO4 1 
9. Predict the output of

In [3]:
!pip install FPDF


Collecting FPDF
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: FPDF
  Building wheel for FPDF (setup.py) ... [?25l[?25hdone
  Created wheel for FPDF: filename=fpdf-1.7.2-py2.py3-none-any.whl size=40704 sha256=9cdee3622583c70b60d0cf4a72f301d9217338e78554c3691af959a2b198577c
  Stored in directory: /root/.cache/pip/wheels/f9/95/ba/f418094659025eb9611f17cbcaf2334236bf39a0c3453ea455
Successfully built FPDF
Installing collected packages: FPDF
Successfully installed FPDF-1.7.2


In [7]:
from fpdf import FPDF
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
import faiss
import pickle
import torch
from langchain.docstore import InMemoryDocstore
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
from ragatouille import RAGPretrainedModel
from typing import List, Tuple, Optional

# Initialize model components
EMBEDDING_MODEL_NAME = "thenlper/gte-small"
tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL_NAME)
index = faiss.read_index('knowledge_vector_1.index')
with open('docs_processed.pkl', 'rb') as f:
    docs_processed = pickle.load(f)

embedding_model = HuggingFaceEmbeddings(
    model_name=EMBEDDING_MODEL_NAME,
    multi_process=True,
    model_kwargs={"device": "cuda"},
    encode_kwargs={"normalize_embeddings": True}
)

docstore = InMemoryDocstore({i: doc for i, doc in enumerate(docs_processed)})

KNOWLEDGE_VECTOR_DATABASE = FAISS(
    index=index,
    docstore=docstore,
    index_to_docstore_id={i: i for i in range(len(docs_processed))},
    embedding_function=embedding_model
)

READER_MODEL_NAME = "RJ1200/llama-3_3b-fine_tuned"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME, quantization_config=bnb_config)
tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME)

READER_LLM = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=True,
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=1000,
)

RERANKER = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")

# Prompt template
prompt_in_chat_format = [
    {
        "role": "system",
        "content": """You are an AI assistant specializing in analyzing PDF documents. Your task is to generate a comprehensive question paper based on the provided PDF context. The question paper should include the following header:
        **Question Paper**
        Reg. No. ____________________
        End Semester Examination – Date
        Code: 18CS1004
        Duration: 3 hrs
        Sub. Name: PROGRAMMING FOR PROBLEM SOLVING
        Max. Marks: 100 .
        For each section mentioned, generate the exact number of questions as specified. Ensure that the questions are relevant, clear, and cover the key topics within the section. Reference specific page numbers or sections from the PDF whenever applicable. If the information needed to create questions is not available in the PDF context, clearly state that."""
    },
    {
        "role": "user",
        "content": """PDF Context:
        {context}
        ---
        For the following sections, generate the required number of questions:
        part A-10, part B- 5, part C- 4
        ---
        Question: {question}""",
    },
]

# Function for RAG-based question paper generation
def answer_with_rag(
    question: str,
    llm: pipeline,
    knowledge_index: FAISS,
    reranker: Optional[RAGPretrainedModel] = None,
    num_retrieved_docs: int = 30,
    num_docs_final: int = 5,
) -> Tuple[str, List[str]]:
    relevant_docs = KNOWLEDGE_VECTOR_DATABASE.similarity_search(query=question, k=num_retrieved_docs)
    relevant_docs = [doc.page_content for doc in relevant_docs]

    if reranker:
        relevant_docs = reranker.rerank(question, relevant_docs, k=num_docs_final)
        relevant_docs = [doc["content"] for doc in relevant_docs]
        relevant_docs = relevant_docs[:num_docs_final]

    context = "\nExtracted PDF content:\n" + "".join([f"Section {str(i+1)}:::\n" + doc for i, doc in enumerate(relevant_docs)])
    final_prompt = tokenizer.apply_chat_template(prompt_in_chat_format, tokenize=False, add_generation_prompt=True).format(
        question=question, context=context
    )

    answer = llm(final_prompt)[0]["generated_text"]
    return answer, relevant_docs

# Generate PDF using FPDF
def generate_pdf(text: str, filename: str = "generated_question_paper.pdf"):
    # Replace unsupported characters with supported equivalents
    text = text.replace('–', '-').replace('“', '"').replace('”', '"').replace('’', "'")

    pdf = FPDF()
    pdf.add_page()

    pdf.set_font("Arial", 'B', 16)
    pdf.cell(200, 10, txt="Question Paper", ln=True, align='C')

    pdf.set_font("Arial", '', 12)
    pdf.ln(10)

    # Write each line in the PDF, wrapping as necessary
    for line in text.split('\n'):
        pdf.multi_cell(0, 10, line)

    pdf.output(filename)



# Main code to generate question paper and PDF
question = "generate end-sem question paper?"
answer, relevant_docs = answer_with_rag(question, READER_LLM, KNOWLEDGE_VECTOR_DATABASE, reranker=RERANKER)

# Print the answer
print("==================================Answer==================================")
print(f"{answer}")

# Generate the PDF
generate_pdf(answer, "end_sem_question_paper.pdf")


`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  self.scaler = torch.cuda.amp.GradScaler()
  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
100%|██████████| 1/1 [00:00<00:00,  4.88it/s]


Here is the question paper for the course "Programming for Problem Solving" with the specified format:

**Question Paper**

**Reg. No.** _______________________________________
**End Semester Examination – June / July 2024**
**Course Code:** 18CS1004
**Duration:** 3 hours
**Course Name:** PROGRAMMING FOR PROBLEM SOLVING
**Max. Marks:** 100

**Part A (10 x 1 = 10 MARKS)**

1. Define the concept of a stack. CO1 1
2. Write a program in C to implement a stack using an array. CO2 1
3. Predict the output of the following program: int a=10; int *ptr=&a; printf("%d",*ptr); CO3 U 1
4. ____________ is called as exit controlled loop. CO3 1
5. List the two ways a string can be declared. CO4 R 1
6. Identify the value of num[6] from the below line of code int num[]={9,6,4,2,3,5,1,7,8}; CO4 U 1
7. State the string termination character. CO5 R 1
8. Predict the output for the following code: int a=10; int *ptr=&a; printf("%d",*ptr); printf("%d",++(*ptr)); CO6 U 1
9. Write a simple program to find wheth

UnicodeEncodeError: 'latin-1' codec can't encode character '\u2013' in position 436: ordinal not in range(256)

In [1]:
import gradio as gr
from fpdf import FPDF
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
import faiss
import pickle
import torch
from langchain.docstore import InMemoryDocstore
from tqdm import tqdm
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
from ragatouille import RAGPretrainedModel
from typing import List, Tuple, Optional

# Initialize model components
EMBEDDING_MODEL_NAME = "thenlper/gte-small"
tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL_NAME)
index = faiss.read_index('knowledge_vector_1.index')
with open('docs_processed.pkl', 'rb') as f:
    docs_processed = pickle.load(f)

embedding_model = HuggingFaceEmbeddings(
    model_name=EMBEDDING_MODEL_NAME,
    multi_process=True,
    model_kwargs={"device": "cuda"},
    encode_kwargs={"normalize_embeddings": True}
)

docstore = InMemoryDocstore({i: doc for i, doc in enumerate(docs_processed)})

KNOWLEDGE_VECTOR_DATABASE = FAISS(
    index=index,
    docstore=docstore,
    index_to_docstore_id={i: i for i in range(len(docs_processed))},
    embedding_function=embedding_model
)

READER_MODEL_NAME = "RJ1200/llama-3_3b-fine_tuned"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME, quantization_config=bnb_config)
tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME)

READER_LLM = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=True,
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=1000,
)

RERANKER = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")

# Prompt template
prompt_in_chat_format = [
    {
        "role": "system",
        "content": """You are an AI assistant specializing in analyzing PDF documents. Your task is to generate a comprehensive question paper based on the provided PDF context. The question paper should include the following header:
        **Question Paper**
        Reg. No. ____________________
        End Semester Examination – Date
        Code: 18CS1004
        Duration: 3 hrs
        Sub. Name: PROGRAMMING FOR PROBLEM SOLVING
        Max. Marks: 100 .
        For each section mentioned, generate the exact number of questions as specified. Ensure that the questions are relevant, clear, and cover the key topics within the section. Reference specific page numbers or sections from the PDF whenever applicable. If the information needed to create questions is not available in the PDF context, clearly state that."""
    },
    {
        "role": "user",
        "content": """PDF Context:
        {context}
        ---
        For the following sections, generate the required number of questions:
        part A-10, part B- 5, part C- 4
        ---
        Question: {question}""",
    },
]

# Function for RAG-based question paper generation
def answer_with_rag(
    question: str,
    llm: pipeline,
    knowledge_index: FAISS,
    reranker: Optional[RAGPretrainedModel] = None,
    num_retrieved_docs: int = 30,
    num_docs_final: int = 5,
) -> Tuple[str, List[str]]:
    relevant_docs = KNOWLEDGE_VECTOR_DATABASE.similarity_search(query=question, k=num_retrieved_docs)
    relevant_docs = [doc.page_content for doc in relevant_docs]

    if reranker:
        relevant_docs = reranker.rerank(question, relevant_docs, k=num_docs_final)
        relevant_docs = [doc["content"] for doc in relevant_docs]
        relevant_docs = relevant_docs[:num_docs_final]

    context = "\nExtracted PDF content:\n" + "".join([f"Section {str(i+1)}:::\n" + doc for i, doc in enumerate(relevant_docs)])
    final_prompt = tokenizer.apply_chat_template(prompt_in_chat_format, tokenize=False, add_generation_prompt=True).format(
        question=question, context=context
    )

    answer = llm(final_prompt)[0]["generated_text"]
    return answer, relevant_docs

# Generate PDF using FPDF
def generate_pdf(text: str, filename: str = "generated_question_paper.pdf"):
    # Replace unsupported characters with supported equivalents
    text = text.replace('–', '-').replace('“', '"').replace('”', '"').replace('’', "'")

    pdf = FPDF()
    pdf.add_page()

    pdf.set_font("Arial", 'B', 16)
    pdf.cell(200, 10, txt="Question Paper", ln=True, align='C')

    pdf.set_font("Arial", '', 12)
    pdf.ln(10)

    # Write each line in the PDF, wrapping as necessary
    for line in text.split('\n'):
        pdf.multi_cell(0, 10, line)

    pdf.output(filename)

# Gradio function to generate question paper and return PDF
def gradio_generate_question_paper(question):
    answer, relevant_docs = answer_with_rag(question, READER_LLM, KNOWLEDGE_VECTOR_DATABASE, reranker=RERANKER)
    pdf_filename = "end_sem_question_paper.pdf"
    generate_pdf(answer, pdf_filename)
    return pdf_filename

# Define Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# C Question Paper Generator")

    with gr.Row():
        question_input = gr.Textbox(label="Enter your question (e.g., 'generate end-sem question paper')")

    generate_button = gr.Button("Generate Question Paper")

    pdf_output = gr.File(label="Download Question Paper")

    generate_button.click(gradio_generate_question_paper, inputs=question_input, outputs=pdf_output)

# Launch Gradio app
demo.launch()


  embedding_model = HuggingFaceEmbeddings(
`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  self.scaler = torch.cuda.amp.GradScaler()


Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://0abc032dcac22310eb.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:

# Assuming KNOWLEDGE_VECTOR_DATABASE is already created with your PDF content

# Model initialization
READER_MODEL_NAME = "RJ1200/llama-3_3b-fine_tuned"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16,
)

model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME, quantization_config=bnb_config)
tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME)

READER_LLM = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=True,
    temperature=0.2,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=1000,
)

# Initialize reranker
RERANKER = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")

prompt_in_chat_format = [
    {
        "role": "system",
        "content": """You are an AI assistant specializing in analyzing PDF documents. Your task is to generate a comprehensive question paper based on the provided PDF context.
        For each section mentioned, generate the exact number of questions as specified.
        Ensure that the questions are relevant, clear, and cover the key topics within the section.
        Reference specific page numbers or sections from the PDF whenever applicable.
        If the information needed to create questions is not available in the PDF context, clearly state that.
        """,
    },
    {
        "role": "user",
        "content": """PDF Context:
        {context}
        ---
        For the following sections, generate the required number of questions:
        section_requirements
    part A-10,
    part B- 5,
    part C- 4


        ---
        Question: {question}""",
    },
]

RAG_PROMPT_TEMPLATE = tokenizer.apply_chat_template(
    prompt_in_chat_format, tokenize=False, add_generation_prompt=True
)

def answer_with_rag(
    question: str,
    llm: pipeline,
    knowledge_index: FAISS,
    reranker: Optional[RAGPretrainedModel] = None,
    num_retrieved_docs: int = 30,
    num_docs_final: int = 5,
) -> Tuple[str, List[str]]:
    # Gather documents with retriever
    print("=> Retrieving documents...")
    relevant_docs = KNOWLEDGE_VECTOR_DATABASE.similarity_search(query=question, k=num_retrieved_docs)
    relevant_docs = [doc.page_content for doc in relevant_docs]  # Keep only the text

    # Optionally rerank results
    if reranker:
        print("=> Reranking documents...")
        relevant_docs = reranker.rerank(question, relevant_docs, k=num_docs_final)
        relevant_docs = [doc["content"] for doc in relevant_docs]
        relevant_docs = relevant_docs[:num_docs_final]

    # Build the final prompt
    context = "\nExtracted PDF content:\n"
    context += "".join([f"Section {str(i+1)}:::\n" + doc for i, doc in enumerate(relevant_docs)])
    final_prompt = RAG_PROMPT_TEMPLATE.format(question=question, context=context)
    # Generate an answer
    print("=> Generating answer...")
    answer = llm(final_prompt)[0]["generated_text"]
    return answer, relevant_docs

# Example usage
question = "generate end-sem question paper?"
answer, relevant_docs = answer_with_rag(question, READER_LLM, KNOWLEDGE_VECTOR_DATABASE, reranker=RERANKER)

print("==================================Answer==================================")
print(f"{answer}")


`low_cpu_mem_usage` was None, now set to True since model is quantized.


Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

=> Retrieving documents...


  self.scaler = torch.cuda.amp.GradScaler()


=> Reranking documents...


  return torch.cuda.amp.autocast() if self.activated else NullContextManager()
100%|██████████| 1/1 [00:00<00:00,  4.44it/s]


=> Generating answer...
Here is the question paper for End-Semester exam in Programming for Problem Solving:

**Part A (10 x 1 = 10 MARKS)**

1. Define algorithm. CO1 1
2. Identify the invalid variable(s) in the following declarations. CO1 1
3. Predict the output of the following program. CO1 1
4. Compare and contrast 'keyword' and 'identifier' in C with examples. CO2 1
5. Write a simple program to find whether a number is odd or even. CO3 1
6. Write the algorithm for bubble sort program. CO4 1
7. Describe a function prototype along with its syntax. CO5 1
8. What is self-referential structure? Explain. CO6 1
9. Predict the output for the following code. CO6 1
10. Write a recursive function to return the factorial of all the numbers in a one-dimensional array. CO5 1

**Part B (6 x 3 = 18 MARKS)**

1. Draw the block diagram of computer and describe its components. CO1 U 3
2. Compare and contrast 'keyword' and 'identifier' in C with examples. CO2 U 3
3. Write a simple program to find whet