In [1]:
local = False
log = True
log_detail = False

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split

import os
import sys
from tqdm import tqdm
sys.path.append(os.path.abspath('..'))
import pickle

In [3]:
import torch
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer

In [4]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

In [5]:
if local:
    misconceptions = pd.read_csv('../kaggle/input/eedi-mining-misconceptions-in-mathematics/misconception_mapping.csv', index_col='MisconceptionId')
    train = pd.read_csv('../kaggle/input/eedi-mining-misconceptions-in-mathematics/train.csv')
    test = pd.read_csv('../kaggle/input/eedi-mining-misconceptions-in-mathematics/test.csv')
else:
    misconceptions = pd.read_csv('/kaggle/input/eedi-mining-misconceptions-in-mathematics/misconception_mapping.csv', index_col='MisconceptionId')
    train = pd.read_csv('/kaggle/input/eedi-mining-misconceptions-in-mathematics/train.csv')
    test = pd.read_csv('/kaggle/input/eedi-mining-misconceptions-in-mathematics/test.csv')
if log: print("(1) Imported data")

(1) Imported data


In [6]:
# Define the identifier columns
id_cols = [
    'QuestionId', 'ConstructId', 'ConstructName', 
    'SubjectId', 'SubjectName', 'CorrectAnswer', 'QuestionText'
]

# Define the corresponding Answer options
answer_cols = ['AnswerAText', 'AnswerBText', 'AnswerCText', 'AnswerDText']
misconception_cols = ['MisconceptionAId', 'MisconceptionBId', 'MisconceptionCId', 'MisconceptionDId']

# Melt Answer Text
text_melted = train.melt(
    id_vars=id_cols,
    value_vars=answer_cols,
    var_name='Attribute',
    value_name='AnswerText'
)

# Melt Misconception IDs
misconception_melted = train.melt(
    id_vars=id_cols,
    value_vars=misconception_cols,
    var_name='Attribute',
    value_name='MisconceptionId'
)

# Extract the option letter (A, B, C, D) and the attribute type
text_melted['AnswerOption'] = text_melted['Attribute'].str.extract(r'Answer([ABCD])Text')[0]
misconception_melted['AnswerOption'] = misconception_melted['Attribute'].str.extract(r'Misconception([ABCD])Id')[0]

# Drop the original 'Attribute' columns as they are no longer needed
text_melted.drop('Attribute', axis=1, inplace=True)
misconception_melted.drop('Attribute', axis=1, inplace=True)

# Merge the two melted DataFrames on id_vars and AnswerOption
train_melted = pd.merge(
    text_melted,
    misconception_melted,
    on=id_cols + ['AnswerOption'],
    how='left'
)

train_melted = train_melted.merge(misconceptions, left_on='MisconceptionId', right_index=True, how='left')
if log: print("(2) Created train_melted")

(2) Created train_melted


In [7]:
misconception_list = list(misconceptions['MisconceptionName'])

sentence_model = SentenceTransformer('all-mpnet-base-v2')
misconception_embeddings = sentence_model.encode(misconception_list, convert_to_tensor=True).cpu().numpy()

Batches:   0%|          | 0/81 [00:00<?, ?it/s]

In [8]:
def match_misconception(response: str, subject: str, top_k: int = 3) -> list:
    """
    Matches response to the top k misconceptions from the misconception list using word embeddings.
    Returns a list of the top k matched misconceptions with their similarity scores.

    Args:
        response: The text to match against misconceptions
        subject: The subject area for context
        top_k: The number of top misconceptions to return

    Returns:
        list: A list of tuples containing the matched misconception and its similarity score
    """
    # Combine response with subject for context
    contextual_response = f"{response} (Subject: {subject})"
    
    # Compute embedding for the response and move to CPU, then convert to NumPy
    response_embedding = model.encode([contextual_response], convert_to_tensor=True).cpu().numpy()
    
    # Calculate cosine similarities between the response and all misconceptions
    similarities = cosine_similarity(response_embedding, misconception_embeddings)[0]
    
    # Get indices of top k similarities
    top_indices = similarities.argsort()[-top_k:][::-1]
    top_scores = similarities[top_indices]
    
    # Collect top k matches with their similarity scores
    top_matches = []
    for idx, score in zip(top_indices, top_scores):
        top_matches.append((misconception_list[idx], score))
    
    return top_matches

In [9]:
from transformers import AutoTokenizer, AutoModelForCausalLM

class QwenMathModel:
    def __init__(self, model_path: str = "/kaggle/input/qwen2.5-math/transformers/1.5b-instruct/1"):
        """
        Initializes the Qwen Math 2.5 model and tokenizer.
        Args:
            model_name (str): The name of the model on Hugging Face Hub.
        """
        self.tokenizer = AutoTokenizer.from_pretrained(model_path, local_files_only=True)
        self.model = AutoModelForCausalLM.from_pretrained(model_path, local_files_only=True)
        self.model.eval()
        if torch.cuda.is_available():
            self.model.to('cuda')

    def get_completion(self, prompt: str, max_tokens: int = 100) -> str:
        """
        Generates a prediction for the given prompt.
        Args:
            prompt (str): The input prompt.
            max_length (int): The maximum length of the generated sequence.
        Returns:
            str: The generated prediction.
        """
        inputs = self.tokenizer.encode(prompt, return_tensors="pt")
        if torch.cuda.is_available():
            inputs = inputs.to('cuda')
        with torch.no_grad():
            outputs = self.model.generate(
                inputs,
                max_new_tokens=max_tokens,
                num_return_sequences=1,
                temperature=0.2,
                top_p=0.95,
                do_sample=True
            )
        prediction = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
        return prediction

In [10]:
def print_response(response, n=25):
    for prediction in response[:n]:
        print(f"{prediction[1]:.4f} | {prediction[0]}")

In [11]:
def calc_map25(predictions: list[tuple[str, float]], label: str, top_k: int = 25):
    batch_size = len(predictions)
    if batch_size == 0:
        return 0.0
    
    # Get top k predictions by sorting
    sorted_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)[:top_k]
    pred_labels = [label for label, score in sorted_predictions]
    
    # Calculate AP for the single sample
    ap = 0.0
    hits = 0
    for j, pred_label in enumerate(pred_labels):
        if pred_label == label:
            hits += 1
            ap += hits / (j + 1)
            break  # Since there's only one correct label per observation
    return ap

In [12]:
model = QwenMathModel()


for index, row in train_melted.tail(1).iterrows():
    subject = row['SubjectName']
    question_text = row['QuestionText']
    answer_text = row['AnswerText']
    prompt = f"""Given the following question and incorrect answer option, identify the underlying misconception that would cause someone to arrive at the answer and output it as your response in one sentence.
Subject: {subject}
Question: {question_text}
Answer: {answer_text}"""
    
    raw_response = model.get_completion(prompt, max_tokens=100)
    response = match_misconception(raw_response, subject, top_k=25)

    print("-"*100)
    print(f"QuestionID={row['QuestionId']}, AnswerOption={row['AnswerOption']}")
    print("PROMPT")
    print(prompt)
    print()
    print("RESPONSE")
    print(f"Raw: {raw_response}")
    print("Top 5 predictions:")
    print_response(response, n=5)
    print()
    print(f"ACTUAL, MAP@25: {calc_map25(response, row['MisconceptionName'])}")
    print(row['MisconceptionName'])



The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


AttributeError: 'QwenMathModel' object has no attribute 'encode'

In [None]:
#########################
# Create test_melted 
#########################

# Include 'QuestionText' in id_vars to preserve it in the melted DataFrame
test_melted = test.melt(
    id_vars=['QuestionId', 'QuestionText', 'CorrectAnswer'],
    value_vars=['AnswerAText', 'AnswerBText', 'AnswerCText', 'AnswerDText'],
    var_name='AnswerOption',
    value_name='AnswerText'
)

# Clean the 'AnswerOption' column to obtain A, B, C, D
test_melted['AnswerOption'] = test_melted['AnswerOption'].str.replace('Answer', '').str.replace('Text', '')

test_melted['QA_Id'] = test_melted['QuestionId'].astype(str) + '_' + test_melted['AnswerOption']

# Drop correct answers
test_melted = test_melted[test_melted['CorrectAnswer'] != test_melted['AnswerOption']]

test_melted.head()
