In [None]:
import os
import PyPDF2
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq
from reportlab.lib.pagesizes import letter
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.enums import TA_JUSTIFY


In [None]:
# Set up Groq API key
os.environ["GROQ_API_KEY"] = "Replace with your actual Groq API key"  # Replace with your actual Groq API key

In [None]:
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
    return text

In [None]:
def create_qa_system(past_paper_paths, subject_book_paths):
    all_texts = ""
    for pdf_path in past_paper_paths + subject_book_paths:
        pdf_text = extract_text_from_pdf(pdf_path)
        all_texts += pdf_text + "\n"

    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = text_splitter.split_text(all_texts)

    embeddings = HuggingFaceEmbeddings()
    db = Chroma.from_texts(texts, embeddings)

    llm = ChatGroq(
        model="llama-3.1-70b-versatile",
        temperature=0.7,
    )

    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever(search_kwargs={"k": 3}))

    return qa

In [None]:
def generate_question_paper(qa_system, num_questions=10):
    questions = []
    for i in range(num_questions):
        prompt = f"Generate a new exam question (question {i+1} of {num_questions}) based on the content of the past papers, ensuring it's within the scope of the subject books. The question should be challenging but fair. Format the question in a structured manner suitable for an exam paper, including clear instructions, sub-parts if applicable, and mark allocation."
        response = qa_system.run(prompt)
        questions.append(response)
    return questions

In [None]:
def create_reformatted_question_paper_pdf(questions, output_path):
    doc = SimpleDocTemplate(output_path, pagesize=letter)
    styles = getSampleStyleSheet()
    content = []

    content.append(Paragraph("Generated Question Paper", styles['Title']))
    content.append(Spacer(1, 12))

    styles.add(ParagraphStyle(name='Justify', alignment=TA_JUSTIFY))
    
    for i, question in enumerate(questions, 1):
        content.append(Paragraph(f"Question {i}", styles['Heading2']))
        lines = question.split('\n')
        for line in lines:
            if line.strip():
                if line.startswith('*') or line.startswith('-'):
                    content.append(Paragraph(f"• {line.strip('*- ')}", styles['BodyText']))
                else:
                    content.append(Paragraph(line, styles['Justify']))
        content.append(Spacer(1, 12))

    doc.build(content)

In [None]:
def main():
    past_paper_paths = ["output.pdf", "output1.pdf"]  # Your past paper PDFs
    subject_book_paths = ["cs10.pdf"]  # Add your subject book PDFs here
    
    qa_system = create_qa_system(past_paper_paths, subject_book_paths)
    
    num_questions = 10  # You can adjust this number
    questions = generate_question_paper(qa_system, num_questions)

    output_pdf_path = "generated_question_paper.pdf"
    create_reformatted_question_paper_pdf(questions, output_pdf_path)
    print(f"Generated question paper has been saved to {output_pdf_path}")

if __name__ == "__main__":
    main()