In [1]:
import os
import PyPDF2
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
import cv2
import pytesseract
from PIL import Image




In [2]:
# Set up Groq API key
os.environ["GROQ_API_KEY"] = "Replace with your actual Groq API key"  # Replace with your actual Groq API key


In [3]:
def extract_text_from_pdf(pdf_path):
    with open(pdf_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ""
        for page in reader.pages:
            text += page.extract_text()
    return text

In [4]:
def create_qa_system(question_paper_path):
    question_paper_text = extract_text_from_pdf(question_paper_path)

    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    texts = text_splitter.split_text(question_paper_text)

    embeddings = HuggingFaceEmbeddings()
    db = Chroma.from_texts(texts, embeddings)

    llm = ChatGroq(
        model="llama-3.1-70b-versatile",
        temperature=0.2,  # Lower temperature for more consistent evaluations
    )

    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever(search_kwargs={"k": 3}))

    return qa

In [5]:
def preprocess_image(image_path):
    img = cv2.imread(image_path)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (3,3))
    dilated = cv2.dilate(thresh, kernel, iterations=1)
    return dilated


In [6]:
def extract_text_from_image(image_path):
    processed_image = preprocess_image(image_path)
    text = pytesseract.image_to_string(processed_image)
    return text

In [None]:
def evaluate_answer(qa_system, question_number, student_answer):
    prompt = f"""
    Referring to Question {question_number} in the question paper:
    
    Student's Answer: {student_answer}

    Please evaluate the student's answer based on the following criteria:
    1. Correctness: Is the answer factually correct and aligned with the question's requirements?
    2. Completeness: Does the answer address all parts of the question?
    3. Clarity: Is the answer well-expressed and easy to understand?
    4. Relevance: Is the answer directly relevant to the question asked?

    Provide a brief evaluation for each criterion and an overall score out of 10 for the answer.
    """
    
    evaluation = qa_system.run(prompt)
    return evaluation

In [None]:
def main():
    question_paper_path = "generated_question_paper.pdf"
    qa_system = create_qa_system(question_paper_path)

    # Assuming the student's answers are in separate image files
    for question_number in range(1, 3):  # Assuming 10 questions
        student_answer_image_path = f"answer_{question_number}.jpg"
        
        if os.path.exists(student_answer_image_path):
            student_answer_text = extract_text_from_image(student_answer_image_path)
            evaluation = evaluate_answer(qa_system, question_number, student_answer_text)
            
            print(f"Evaluation for Question {question_number}:")
            print(evaluation)
            print("\n" + "-"*50 + "\n")
        else:
            print(f"No answer image found for Question {question_number}")

if __name__ == "__main__":
    main()