In [7]:
import pandas as pd
ground_truth = pd.read_json('groud_truth_embedding.json')
targetStr = 'output/few_shot_model_responses_qwen.csv'
targetDf = pd.read_csv(targetStr)

In [8]:
import torch
from transformers import DistilBertTokenizer, DistilBertModel
import numpy as np
from tqdm import tqdm  # For progress bar
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')
model = DistilBertModel.from_pretrained('distilbert-base-uncased')
model.eval()  # Set to evaluation mode
def get_embedding(text):
    # Tokenize and process the input text
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding="max_length", max_length=512)
    
    with torch.no_grad():  # Disable gradient computation for efficiency
        outputs = model(**inputs)
        
    # Take the mean of the last hidden state to create a single embedding vector
    embeddings = outputs.last_hidden_state.mean(dim=1).squeeze().numpy()
    return embeddings
embeddings = []
for text in tqdm(targetDf['Generated Response'], desc="Generating embeddings"):
    embeddings.append(get_embedding(text))
targetDf['Embeddings_Generated'] = embeddings

Generating embeddings: 100%|██████████| 437/437 [00:40<00:00, 10.86it/s]


In [9]:
misconception_df = pd.read_csv('../embedding_generator/misconception_mapping.csv')
from sklearn.metrics.pairwise import cosine_similarity
def find_most_similar_id(target_embedding, ground_truth_df):
    # Calculate cosine similarity between the target embedding and each embedding in ground_truth
    similarities = cosine_similarity([target_embedding], list(ground_truth_df['Embedding']))
    
    # Find the index of the highest similarity score
    most_similar_index = np.argmax(similarities)
    
    # Retrieve the misconception_id with the highest similarity
    most_similar_id = ground_truth_df.iloc[most_similar_index]['MisconceptionId']
    return most_similar_id

# Apply the function to each row in targetDf
targetDf['prediction_result'] = targetDf['Embeddings_Generated'].apply(
    lambda emb: find_most_similar_id(emb, ground_truth)
)

In [10]:
targetDf.rename(columns={'prediction_result': 'MisconceptionId'}, inplace=True)

targetDf = targetDf.merge(misconception_df, on='MisconceptionId', how='left')

In [11]:
print(sum(targetDf['Expected Misconception'] == targetDf['MisconceptionName']) / len(targetDf))

0.006864988558352402


In [12]:
print(targetDf['Generated Response'][100])

Both Tom and Katie


In [13]:
print(targetDf['Prompt'][100])

Instruction: Why is the given answer wrong under such circumstances? Some of the examples are given below
Example from before: 
Example4204, Question :Calculate
\(
2 \div \frac{1}{5}
\)
Answer: \( \frac{5}{2} \)
Example1514, Question :When factorised, \( 9 x^{2}-16 \) will become...
Answer: \( 9 x(x-16) \)
Example1968, Question :The median of these four numbers is \( 5 \).
\(
\begin{array}{cccc}
3&12&1&?
\end{array}
\)
Find the value of the missing number.
Answer: \( 4 \)
Example3686, Question :Jo and Paul are arguing about powers.

Jo says there is no value of \( m \) that means \( 3^{m} \) is even.

Paul says there is no value of \( n \) that means \( 4^{n} \) ends in \( 0 \).
Who is correct?
Answer: Neither is correct
Example2714, Question :If you know the first term of a linear sequence and you multiply it by \( 10 \), you get the \( 10^{\text {th }} \) term of that sequence.
Is this statement...
Answer: never true

ConstructName: Factorise a quadratic expression in the form x² - b

In [14]:
targetDf['Generated Response'][0]

'The given answer is incorrect because the function machine does not accurately represent the equation \\( y = x^2 + 4 \\). The correct function machine should have an input "x" that goes into the "square" box, then add 4 to the result, producing the output "y". Instead, the function machine depicted adds 4 after squaring "x", which changes the equation to \\( y = (x+4)^2 \\).'

In [15]:
targetDf['Prompt'][0]

'Instruction: Why is the given answer wrong under such circumstances? Some of the examples are given below\nExample from before: \nExample2137, Question :This five-sided spinner is fair.\r\nWhat is the probability of spinning a \\( \\mathbf{3} \\) ?\r\nGive your answer as a percentage ![A five sided spinner labelled with 1, 1, 2, 3 and 4]()\nAnswer: \\( 25 \\% \\)\nExample2513, Question :When \\( h=5 \\)\r\nWhich of the following pairs of statements is true?\nAnswer: \\( \\begin{array}{l}3 h^{2}=225 \\\\ (3 h)^{2}=225\\end{array} \\)\nExample346, Question :Tom and Katie are discussing the order of operations\r\nTom says: \\( 3+2-1 \\equiv 3-1+2 \\)\r\nKatie says: \\( 3+2 \\times 1 \\equiv 2 \\times 1+3 \\)\nAnswer: Only\r\nTom\nExample3206, Question :When Sarah completes the square, what should replace the triangle?\r\n\\[\r\np^{2}-10 p-1 \\equiv(p-5)^{2}  \\Delta\r\n\\]\nAnswer: \\( \\Delta =-11 \\)\nExample1342, Question :Calculate the displacement \\( (s) \\), of a particle when ini