In [2]:
import os
import PyPDF2
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
import gc
import logging
import warnings
import re

warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [3]:
os.environ["GROQ_API_KEY"] = "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"  # Replace with your actual Groq API key

In [4]:
def extract_text_from_pdf(pdf_path):
    try:
        with open(pdf_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            text = ""
            for page in reader.pages:
                text += page.extract_text()
        return text
    except Exception as e:
        logging.error(f"Error extracting text from PDF: {e}")
        return ""

In [5]:
def create_qa_system(question_paper_path):
    question_paper_text = extract_text_from_pdf(question_paper_path)

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    texts = text_splitter.split_text(question_paper_text)

    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
    db = Chroma.from_texts(texts, embeddings)

    llm = ChatGroq(
        model="llama-3.1-70b-versatile",
        temperature=0.2,
        max_tokens=1000,  # Limit token generation
    )

    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever(search_kwargs={"k": 2}))

    return qa

In [6]:
def evaluate_mcq_answer(qa_system, question_number, student_answer):
    prompt = f"""
    Referring to Multiple Choice Question {question_number}:
    
    Student's Answer: {student_answer}

    Criteria:
    1. Correctness: Is the answer correct? (1 point if correct, 0 if incorrect)
    2. Validity: Is the response a valid option (A, B, C, or D)?

    Instructions:
    - Assign 1 point if the answer is correct and valid, 0 points otherwise.
    - Provide a brief explanation (1-2 sentences) for the score.
    - If the response is invalid, explain why and assign 0 points.

    Format your response as follows:
    Score: [0 or 1]
    Explanation: [Your brief explanation]

    Limit your entire response to 50 words.
    """
    
    try:
        evaluation = qa_system.run(prompt)
        return evaluation
    except Exception as e:
        logging.error(f"Error during evaluation: {e}")
        return "Error occurred during evaluation."

In [7]:
def parse_student_answers(student_answers_text):
    answers = {}
    # Split the text into lines
    lines = student_answers_text.split('\n')
    
    # Regular expression pattern to match "number. letter" format
    pattern = r'(\d+)\.\s*([A-D])'
    
    for line in lines:
        # Use regex to find matches in each line
        match = re.match(pattern, line.strip())
        if match:
            question_number = match.group(1)
            answer = match.group(2)
            answers[question_number] = answer
    
    return answers

In [8]:
def main():
    question_paper_path = r"mcqs_generation_pdfs/multiple choice questions.pdf"
    student_answers_path = r"mcqs_answer_pdfs/answer_1.pdf"
    
    try:
        qa_system = create_qa_system(question_paper_path)
    except Exception as e:
        logging.error(f"Error creating QA system: {e}")
        return

    student_answers_text = extract_text_from_pdf(student_answers_path)
    student_answers = parse_student_answers(student_answers_text)

    for question_number, student_answer in student_answers.items():
        evaluation = evaluate_mcq_answer(qa_system, question_number, student_answer)
        
        print(f"Evaluation for Question {question_number}:")
        print(evaluation)
        print("\n" + "-"*50 + "\n")
        
        # Clear some memory after each iteration
        gc.collect()

if __name__ == "__main__":
    main()

  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
  from tqdm.autonotebook import tqdm, trange
2024-10-13 21:56:15,481 - INFO - Use pytorch device_name: cpu
2024-10-13 21:56:15,487 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2
2024-10-13 21:56:21,767 - INFO - Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
  evaluation = qa_system.run(prompt)
2024-10-13 21:56:26,538 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Evaluation for Question 1:
Score: 0
Explanation: The answer is not provided in the given context, so it's impossible to determine the correctness of the student's answer. The context only provides information for questions 4-9, but not question 1.

--------------------------------------------------



2024-10-13 21:56:28,181 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Evaluation for Question 2:
Score: 1
Explanation: The answer is correct and valid. The student chose option B, but the question is not provided. However, based on the context, if the question is related to photosynthesis, transpiration, or evaporation, option B might be correct, but without the question, it's hard to confirm.

--------------------------------------------------



2024-10-13 21:56:30,119 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Evaluation for Question 3:
Score: 0
Explanation: The answer is incorrect because the question is not provided, and I don't know what the question is asking. However, the response "C" is a valid option, but without the question, I cannot determine its correctness.

--------------------------------------------------



2024-10-13 21:56:31,861 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Evaluation for Question 4:
Score: 0
Explanation: The answer is incorrect because Thyroxine is produced by the thyroid gland, not the pancreas. The correct answer is A) Insulin, which is produced by the pancreas and plays a key role in regulating blood sugar levels.

--------------------------------------------------



2024-10-13 21:56:33,417 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Evaluation for Question 5:
Score: 1
Explanation: The student's answer, A) Differentiation, is correct and valid. Differentiation is the process by which a cell becomes specialized to perform a specific function, making it the correct choice among the options provided.

--------------------------------------------------



2024-10-13 21:56:34,833 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Evaluation for Question 6:
Score: 0
Explanation: The answer is incorrect because T cells do not produce antibodies in response to infection. B cells are responsible for producing antibodies. The response is a valid option, but the answer is incorrect.

--------------------------------------------------



2024-10-13 21:56:36,366 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Evaluation for Question 7:
Score: 0
Explanation: The answer is incorrect. Natural selection (option C) is the process by which organisms with favorable traits are more likely to survive and reproduce, whereas heredity (option D) is the correct term for the process by which genetic information is passed from one generation to the next.

--------------------------------------------------



2024-10-13 21:56:37,809 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Evaluation for Question 8:
Score: 0
Explanation: The answer is incorrect because the lungs are responsible for exchanging oxygen and carbon dioxide, not filtering waste and excess fluids from the blood. The correct answer is B) Kidneys, which are the primary organs for filtering waste and excess fluids.

--------------------------------------------------



2024-10-13 21:56:39,437 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Evaluation for Question 9:
Score: 0
Explanation: The student's answer, A, is not the correct answer for the question about the term for the process by which an organism's genetic information is altered by external factors. The correct answer is not provided in the given context, but A is not a valid option based on the information given.

--------------------------------------------------



2024-10-13 21:56:40,873 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Evaluation for Question 10:
I don't know the answer. There is no Multiple Choice Question 10 provided in the given context.

--------------------------------------------------

