In [1]:
import os
import PyPDF2
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
import cv2
import pytesseract
from PIL import Image
import gc
import logging
import warnings




In [2]:
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')


In [3]:
os.environ["GROQ_API_KEY"] = ""  # Replace with your actual Groq API key

def extract_text_from_pdf(pdf_path):
    try:
        with open(pdf_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            text = ""
            for page in reader.pages:
                text += page.extract_text()
        return text
    except Exception as e:
        logging.error(f"Error extracting text from PDF: {e}")
        return ""


In [4]:
def create_qa_system(question_paper_path):
    question_paper_text = extract_text_from_pdf(question_paper_path)

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    texts = text_splitter.split_text(question_paper_text)

    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
    db = Chroma.from_texts(texts, embeddings)

    llm = ChatGroq(
        model="llama-3.1-70b-versatile",
        temperature=0.2,
        max_tokens=1000,  # Limit token generation
    )

    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever(search_kwargs={"k": 2}))

    return qa

In [5]:
def preprocess_image(image_path):
    try:
        image = cv2.imread(image_path)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        denoised = cv2.fastNlMeansDenoising(gray)
        threshold = cv2.threshold(denoised, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)[1]
        return threshold
    except Exception as e:
        logging.error(f"Error preprocessing image: {e}")
        return None

In [6]:
def extract_text_from_image(image_path):
    processed_image = preprocess_image(image_path)
    if processed_image is not None:
        try:
            text = pytesseract.image_to_string(processed_image)
            return text
        except Exception as e:
            logging.error(f"Error extracting text from image: {e}")
    return ""

In [7]:
def evaluate_mcq_answer(qa_system, question_number, student_answer):
    prompt = f"""
    Referring to Multiple Choice Question {question_number}:
    
    Student's Answer: {student_answer}

    Evaluate the student's answer based on these criteria:
    1. Correctness (6 points): Is the answer correct?
    2. Response validity (4 points): Did the student provide a valid response (A, B, C, D, etc.)?

    Provide a brief evaluation for each criterion with points.
    Sum the points for an overall score out of 10.
    Give a short sentence of feedback.
    
    Limit your response to 100 words.
    """
    
    try:
        evaluation = qa_system.run(prompt)
        return evaluation
    except Exception as e:
        logging.error(f"Error during evaluation: {e}")
        return "Error occurred during evaluation."

In [None]:
def main():
    question_paper_path = "multiple choice questions.pdf"
    
    try:
        qa_system = create_qa_system(question_paper_path)
    except Exception as e:
        logging.error(f"Error creating QA system: {e}")
        return

    student_answer_images = [
        "answer_1.jpg",
        "answer_2.jpg",
        # Add more image paths as needed
    ]

    for question_number, image_path in enumerate(student_answer_images, start=1):
        if os.path.exists(image_path):
            student_answer_text = extract_text_from_image(image_path)
            if student_answer_text:
                evaluation = evaluate_mcq_answer(qa_system, question_number, student_answer_text)
                
                print(f"Evaluation for Question {question_number}:")
                print(evaluation)
                print("\n" + "-"*50 + "\n")
            else:
                print(f"Could not extract text from image for Question {question_number}")
        else:
            print(f"No answer image found for Question {question_number}")
        
        # Clear some memory after each iteration
        gc.collect()

if __name__ == "__main__":
    main()