In [15]:
import os
import PyPDF2
from langchain_groq import ChatGroq
from langchain.chains import RetrievalQA
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
import gc
import logging
import warnings
import re

warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)

# Set up logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

In [16]:
os.environ["GROQ_API_KEY"] = ""  # Replace with your actual Groq API key

In [17]:
def extract_text_from_pdf(pdf_path):
    try:
        with open(pdf_path, 'rb') as file:
            reader = PyPDF2.PdfReader(file)
            text = ""
            for page in reader.pages:
                text += page.extract_text()
        return text
    except Exception as e:
        logging.error(f"Error extracting text from PDF: {e}")
        return ""

In [18]:
def create_qa_system(question_paper_path):
    question_paper_text = extract_text_from_pdf(question_paper_path)

    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    texts = text_splitter.split_text(question_paper_text)

    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
    db = Chroma.from_texts(texts, embeddings)

    llm = ChatGroq(
        model="llama-3.1-70b-versatile",
        temperature=0.2,
        max_tokens=1000,  # Limit token generation
    )

    qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=db.as_retriever(search_kwargs={"k": 2}))

    return qa

In [19]:
def evaluate_mcq_answer(qa_system, question_number, student_answer):
    prompt = f"""
    Referring to Multiple Choice Question {question_number}:
    
    Student's Answer: {student_answer}

    Criteria:
    1. Correctness: Is the answer correct? (1 point if correct, 0 if incorrect)
    2. Validity: Is the response a valid option (A, B, C, or D)?

    Instructions:
    - Assign 1 point if the answer is correct and valid, 0 points otherwise.
    - Provide a brief explanation (1-2 sentences) for the score.
    - If the response is invalid, explain why and assign 0 points.

    Format your response as follows:
    Score: [0 or 1]
    Explanation: [Your brief explanation]

    Limit your entire response to 50 words.
    """
    
    try:
        evaluation = qa_system.run(prompt)
        return evaluation
    except Exception as e:
        logging.error(f"Error during evaluation: {e}")
        return "Error occurred during evaluation."

In [20]:
def parse_student_answers(student_answers_text):
    answers = {}
    # Split the text into lines
    lines = student_answers_text.split('\n')
    
    # Regular expression pattern to match "number. letter" format
    pattern = r'(\d+)\.\s*([A-D])'
    
    for line in lines:
        # Use regex to find matches in each line
        match = re.match(pattern, line.strip())
        if match:
            question_number = match.group(1)
            answer = match.group(2)
            answers[question_number] = answer
    
    return answers

In [21]:
def main():
    question_paper_path = r"mcqs_generation_pdfs/multiple choice questions.pdf"
    student_answers_path = r"mcqs_answer_pdfs/answer_1.pdf"
    
    try:
        qa_system = create_qa_system(question_paper_path)
    except Exception as e:
        logging.error(f"Error creating QA system: {e}")
        return

    student_answers_text = extract_text_from_pdf(student_answers_path)
    student_answers = parse_student_answers(student_answers_text)

    for question_number, student_answer in student_answers.items():
        evaluation = evaluate_mcq_answer(qa_system, question_number, student_answer)
        
        print(f"Evaluation for Question {question_number}:")
        print(evaluation)
        print("\n" + "-"*50 + "\n")
        
        # Clear some memory after each iteration
        gc.collect()

if __name__ == "__main__":
    main()

2024-10-15 08:36:21,023 - INFO - Use pytorch device_name: cpu
2024-10-15 08:36:21,024 - INFO - Load pretrained SentenceTransformer: sentence-transformers/all-mpnet-base-v2


2024-10-15 08:36:28,975 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Evaluation for Question 1:
Score: 0
Explanation: The answer A) Mutualism is incorrect because in mutualism, both organisms benefit. The question asks for a relationship where one organism is harmed. A more suitable answer would be C) Parasitism, where one organism benefits and the other is harmed.

--------------------------------------------------



2024-10-15 08:36:30,403 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Evaluation for Question 2:
Score: 0
Explanation: The student's answer, B) Commensalism, is incorrect. In commensalism, one organism benefits and the other is not affected. The correct answer is C) Parasitism, where one organism benefits and the other is harmed.

--------------------------------------------------



2024-10-15 08:36:32,246 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Evaluation for Question 3:
Score: 1
Explanation: The answer C) Parasitism is correct and valid. In parasitism, one organism benefits while the other is harmed, making it the correct choice among the given options.

--------------------------------------------------



2024-10-15 08:36:33,371 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Evaluation for Question 4:
Score: 0
Explanation: The answer is incorrect. The correct answer is c) Parasitism, as it is the relationship where one organism benefits and the other is harmed. Competition is a relationship where both organisms are competing for the same resource.

--------------------------------------------------



2024-10-15 08:36:34,907 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Evaluation for Question 5:
Score: 0
Explanation: The answer is incorrect because mutation refers to a change in an organism's genetic information, not the passing of genetic information from one generation to the next. The correct answer is heredity (option d).

--------------------------------------------------



2024-10-15 08:36:36,294 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Evaluation for Question 6:
Score: 0
Explanation: The answer is incorrect. Sulfonamide is a competitive inhibitor, not a non-competitive inhibitor. A non-competitive inhibitor binds to a site other than the active site of the enzyme, whereas sulfonamide competes with the substrate for the active site.

--------------------------------------------------



2024-10-15 08:36:37,673 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Evaluation for Question 7:
Score: 0
Explanation: The answer is not provided in the given context, but based on general knowledge, the primary function of the Golgi apparatus is to modify, sort, and package proteins and lipids for transport out of the cell. Without the correct options, I couldn't verify the correctness of option C.

--------------------------------------------------



2024-10-15 08:36:39,418 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Evaluation for Question 8:
I don't know.

--------------------------------------------------



2024-10-15 08:36:40,642 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Evaluation for Question 9:
I don't know. There is no Multiple Choice Question 9 provided in the given context.

--------------------------------------------------



2024-10-15 08:36:41,772 - INFO - HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Evaluation for Question 10:
I don't know. There is no Multiple Choice Question 10 provided in the given context.

--------------------------------------------------

