In [None]:
import pandas as pd
from collections import Counter

dataset_path = "path/to/original/data"
df = pd.read_excel(dataset_path, sheet_name='contraposition')

# rubrics of the original dataset
rubrics = [
    'Statement of what should be proven: A proof by contraposition of an implication consists in showing that if x rational, then x^2 is rational. ',
    'Correct assumption: x is rational [Assumption] ',
    'Correct proof reasoning',
    'Proof conclusion: By contraposition, if x^2 is irrational, then x is irrational.'
]

#knowledge profile column
df['Knowledge Profile'] = df[rubrics].astype(str).agg(''.join, axis=1)


knowledge_profile_mistakes = {}
for _, row in df.iterrows():
    knowledge_profile = row['Knowledge Profile']
    comments = row['Comments - Eylül']
    
    if pd.notna(comments):
        mistakes = [mistake.strip() for mistake in comments.split(',')]
        
        if knowledge_profile not in knowledge_profile_mistakes:
            knowledge_profile_mistakes[knowledge_profile] = Counter()
        
        knowledge_profile_mistakes[knowledge_profile].update(mistakes)

mistakes_df = pd.DataFrame([
    {'Knowledge Profile': kp, 'Mistake': mistake, 'Count': count}
    for kp, mistakes in knowledge_profile_mistakes.items()
    for mistake, count in mistakes.items()
])

mistakes_df[mistakes_df['Knowledge Profile'] == '1000']

In [None]:
import pandas as pd
from collections import Counter

dataset_path = "path/to/kp/dataset/original"
df = pd.read_excel(dataset_path)

# rubrics of the original dataset
rubrics = [
    'Statement of what should be proven: A proof by contraposition of an implication consists in showing that if x rational, then x^2 is rational. ',
    'Correct assumption: x is rational [Assumption] ',
    'Correct proof reasoning',
    'Proof conclusion: By contraposition, if x^2 is irrational, then x is irrational.'
]

#knowledge profile column
df['Knowledge Profile'] = df[rubrics].astype(str).agg(''.join, axis=1)

knowledge_profile_mistakes = {}

for _, row in df.iterrows():
    knowledge_profile = row['Knowledge Profile']
    comments = row['Comments - Eylül']
    
    if pd.notna(comments):
        mistakes = [mistake.strip() for mistake in comments.split(',')]
        
        if knowledge_profile not in knowledge_profile_mistakes:
            knowledge_profile_mistakes[knowledge_profile] = Counter()
        
        knowledge_profile_mistakes[knowledge_profile].update(mistakes)

mistakes_df = pd.DataFrame([
    {'Knowledge Profile': kp, 'Mistake': mistake, 'Count': count}
    for kp, mistakes in knowledge_profile_mistakes.items()
    for mistake, count in mistakes.items()
])
mistakes_df

## Chosen Mistakes

- missing assumption
- assume x^2 is rational
- assume p->q and not q
- assume x irrational
- assuming p -> q

## Utils

In [None]:
knowledge_profiles = {
    '1111': "This knowledge profile represents student answers where the student correctly states what should be proven (rubric 1), makes the correct assumption (rubric 2), follows correct proof reasoning (rubric 3), and correctly concludes the proof (rubric 4). However, minor inconsistencies such as variations in notation or redundant steps may still be present.",
    '0000': "This knowledge profile represents student answers where the student fails to correctly state what should be proven (rubric 1), does not make the correct assumption (rubric 2), lacks correct proof reasoning (rubric 3), and does not conclude the proof correctly (rubric 4).",
    '1110': "This knowledge profile represents student answers where the student correctly states what should be proven (rubric 1), makes the correct assumption (rubric 2), and follows correct proof reasoning (rubric 3), but fails to correctly conclude the proof (rubric 4).",
    '0111': "This knowledge profile represents student answers where the student fails to correctly state what should be proven (rubric 1) but makes the correct assumption (rubric 2), follows correct proof reasoning (rubric 3), and correctly concludes the proof (rubric 4).",
    '0110': "This knowledge profile represents student answers where the student fails to correctly state what should be proven (rubric 1) but makes the correct assumption (rubric 2) and follows correct proof reasoning (rubric 3), but fails to correctly conclude the proof (rubric 4).",
    '1100': "This knowledge profile represents student answers where the student correctly states what should be proven (rubric 1) and makes the correct assumption (rubric 2) but does not follow correct proof reasoning (rubric 3) and fails to conclude the proof correctly (rubric 4).",
    '1000': "This knowledge profile represents student answers where the student correctly states what should be proven (rubric 1) but does not make the correct assumption (rubric 2), lacks correct proof reasoning (rubric 3), and does not conclude the proof correctly (rubric 4).",
    '0100': "This knowledge profile represents student answers where the student fails to correctly state what should be proven (rubric 1) but makes the correct assumption (rubric 2), while lacking correct proof reasoning (rubric 3) and not concluding the proof correctly (rubric 4)."
    }

In [None]:
mistakes = {
    'missing assumption': "The student does not state the assumption required for the proof by contraposition (i.e., 'assume x is rational'). Without this assumption, it is typically impossible to proceed with the proof, resulting in an incomplete or invalid argument.",
    'assuming p -> q': "The student incorrectly assumes the statement 'if x^2 is irrational, then x is irrational' (P -> Q) as given, rather than proving it using contraposition.",
    'no statement to be proven': "The student does not clearly state what needs to be proven, omitting the initial setup necessary for the proof by contraposition.",
    'assume x\^2 is rational': "The student incorrectly assumes x^2 is rational instead of assuming x is rational, which leads them to misapply the proof technique from the beginning.",
    'contrapositive statement: not Q -> P': "The student misstates the contrapositive as 'not Q implies P' (i.e., 'if x is rational, then x^2 is irrational') instead of the correct contrapositive form 'not Q implies not P' (i.e., 'if x is rational, then x^2 is rational').",
    'contraposition statement: not P -> not Q': "The student incorrectly states the contrapositive as 'not P implies not Q' (e.g., 'if x^2 is rational, then x is rational') instead of the correct contrapositive form 'not Q implies not P' (e.g., 'if x is rational, then x^2 is rational').",
    'assume x irrational': "The student incorrectly assumes that x is irrational instead of assuming x is rational, which is required for a proof by contraposition. This mistake leads to a flawed setup that prevents the correct logical progression of the proof.",
    'contrapositive statement: Q -> P': "The student incorrectly states the contrapositive as 'Q implies P' (e.g., 'if x is irrational, then x^2 is irrational') instead of the correct contrapositive form 'not Q implies not P' (e.g., 'if x is rational, then x^2 is rational').",
    'assume x\^2 irrational': "The student incorrectly assumes that x^2 is irrational instead of assuming x is rational, which is required for a proof by contraposition. This mistake leads to a flawed setup that prevents the correct logical progression of the proof.",
    'assume x rational and x^2 irrational (assume not Q and P)': "The student incorrectly assumes both x^2 is irrational (P) and x is rational (not Q). This conflicts with the logical framework of contraposition, which requires assuming not Q (x is rational) to prove not P (x^2 is rational).",
    'assume p->q and not q': "The student incorrectly assumes both the statement P -> Q ('if x^2 is irrational, then x is irrational') and not Q ('x is rational') as starting points for the proof by contraposition. This shows a misunderstanding of the purpose of contraposition, which is to prove P -> Q by assuming not Q and deriving not P, rather than taking P -> Q as already true."

}

In [None]:
import random
random.seed(42)
def get_answers_for_mistake_and_profile(df, knowledge_profile, mistake_type, profile_column='Knowledge Profile', mistake_column='Comments - Eylül', answer_column='CONTRAPOSITION task'):
    
    filtered_df = df[df[profile_column].astype(str) == knowledge_profile]
    filtered_df = filtered_df[filtered_df[mistake_column].str.contains(mistake_type, case=False, na=False)]
    
    answers = filtered_df[answer_column].tolist()
    random.shuffle(answers)
    
    return answers

In [None]:
def construct_prompt(target_knowledge_profile, target_mistake, num_answers, example_answers):

    # define generic parts of the prompt
    target_knowledge_profile_and_mistake = f"""
        TARGET KNOWLEDGE PROFILE AND MISTAKE:
        I want you to generate answers for {num_answers} students with the knowledge profile {target_knowledge_profile}. {knowledge_profiles[target_knowledge_profile]} A common mistake students with this profile make is: {mistakes[target_mistake]}
        You will generate student answers with this profile and this mistake.
        Please ensure that each answer demonstrates the specified knowledge profile characteristics in addition to the specific mistake. Do not just generate an answer with the mistake and a correct proof otherwise; the answer should reflect both the knowledge profile and the mistake.
        The generated answers have to be different and diverse, do not just paraphrase the given examples.
        """

    examples = f"""
        EXAMPLES:
        Here are a few real answers from students who made this mistake and have this knowledge profile:
        {example_answers}
        """

    messages = [{"role": "system", "content": SYSTEM_ROLE},
                {"role": "user", "content": TASK_DESCRIPTION},
                {"role": "user", "content": SOLUTION},
                {"role": "user", "content": GRADING_RUBRICS},
                {"role": "user", "content": KNOWLEDGE_PROFILES_EXPLANATION},
                {"role": "user", "content": target_knowledge_profile_and_mistake},
                {"role": "user", "content": examples},
                {"role": "user", "content": OUTPUT_FORMAT}]
    
    return messages

In [None]:
import json
from datetime import datetime
import os
def save_completion_with_metadata(completion, prompt_messages, knowledge_profile, mistake_type, n, num_answers, temperature, save_dir):
    # Add extra metadata to the completion object
    completion["metadata"] = {
        "prompt": prompt_messages,
        "knowledge_profile": knowledge_profile,
        "mistake_type": mistake_type,
        "n_instances": n,
        "num_answers": num_answers,
        "temperature": temperature
    }
    
    # Generate filename with knowledge profile, date, and time
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"{knowledge_profile}_{timestamp}.json"
    save_path = os.path.join(save_dir, knowledge_profile, filename)
    
    os.makedirs(os.path.dirname(save_path), exist_ok=True)
    with open(save_path, 'w') as json_file:
        json.dump(completion, json_file, indent=4)
    return save_path

In [None]:
import json
import os
import re

def process_json_to_txt(input_json_path, output_dir):
    with open(input_json_path, 'r') as file:
        data = json.load(file)

    # Extract metadata from the filename
    json_filename = os.path.basename(input_json_path)
    base_filename = os.path.splitext(json_filename)[0]

    for choice_index, choice in enumerate(data.get("choices", [])):
        content = choice.get("message", {}).get("content", "")

        # Split content by '$' to separate each generated instance
        instances = content.split('$')
        
        for idx, instance in enumerate(instances):
            if instance.strip():  
                # Remove the grade formatted between '&' characters
                instance_cleaned = re.sub(r'&.*?&', '', instance).strip()

                # Sanitize the mistake name to avoid issues with special characters
                mistake_name = data.get("metadata", {}).get("mistake_type", "unknown_mistake").replace(' ', '_').replace('/', '_').replace('\\', '_').replace(':', '_').replace('>','').replace('\'', '').replace('__', '_').replace('-', '_').replace('_^', '')[:30] 
                knowledge_profile = data.get("metadata", {}).get("knowledge_profile", "unknown_profile")
                instance_dir = os.path.join(output_dir, knowledge_profile, mistake_name)
                os.makedirs(instance_dir, exist_ok=True)

                output_filename = f"{base_filename}_{mistake_name}_{choice_index + 1}_{idx + 1}.txt"
                output_path = os.path.join(instance_dir, output_filename)

                with open(output_path, 'w', encoding='utf-8') as txt_file:
                    txt_file.write(instance_cleaned)


## Prompt

In [None]:
SYSTEM_ROLE = """
You are a text generator that generates realistic student responses for a proof by contraposition question. Your role is to simulate student answers based on provided knowledge profiles and specific mistakes, which will be explained in more detail along with examples. Avoid including any evaluative language or comments that could bias the responses, as the generated data is used to train classifiers. Maintain a neutral, student-like tone and focus on coherence and clarity in your responses.
"""

In [None]:
TASK_DESCRIPTION = """
TASK DESCRIPTION:
Students are expected to answer this task:
"Let x be a real number. Prove that if x^2 is irrational, then x is irrational using a proof by contraposition.

Reminders:
- A real number x is irrational if it cannot be expressed as the quotient of two integers - that is, p/q where p and q (both integers) are not zero.
- A real number x is rational if it can be expressed as the quotient of two integers - that is, p/q where p and q (both integers) are not zero."
"""

In [None]:
SOLUTION = """
SOLUTION:
As a reference, here is the correct solution for this problem:
A proof by contraposition of an implication consists in showing that if x is rational, then x^2 is rational.
1. x is rational [Assumption]
2. x = a/b, where a and b ≠ 0 are integers with no common factors. [Definition of a rational number]
3. Therefore, x^2 = a^2/b^2. [Squaring]
4. Hence, x^2 is rational. [Definition of a rational number]
5. By contraposition, if x^2 is irrational, then x is irrational.
"""

In [None]:
GRADING_RUBRICS = """
GRADING RUBRICS:
Students are graded using 4 binary rubrics:
1. Statement of what should be proven: A proof by contraposition of an implication consists in showing that if x is rational, then x^2 is rational.
2. Correct assumption: x is rational
3. Correct proof reasoning
4. Proof conclusion: By contraposition, if x^2 is irrational, then x is irrational.

It is acceptable if students do not write the exact same answer as the solution. If the response satisfies the rubrics more or less, the student receives the point.
"""

In [None]:
KNOWLEDGE_PROFILES_EXPLANATION = """
KNOWLEDGE PROFILES:
We identify a student's knowledge profile based on their score in these 4 rubrics. For example:
- A student with all rubrics incorrect will have the profile 0-0-0-0.
- A student who has rubric 1 and 2 correct but 3 and 4 incorrect will have the profile 1-1-0-0.
"""

In [None]:
OUTPUT_FORMAT = """
DESIRED OUTPUT FORMAT:
- Separate each generated student answer with the character "$".
- Do not include any comments or evaluative language since the data is used to train a classifier.
- Words like "correct", "incorrect", "right", "wrong", "correctly", and "incorrectly" are not allowed.
- When you generate an answer of a student that has mistakes, you have to act as a student who does not know the correct answer, and does not know that he is answering wrong.
- You have to generate an answer that is coherent with the knowledge profile and the specific mistake of the student.
- At the end of each answer, include a grade formatted between two "&" characters, such as &1111& for a student scoring 1 in all rubrics or &0000& for a student scoring 0 in all. Please evaluate each generated answer based on the rubrics provided, grading it independently of the target knowledge profile specified in this prompt. Do not assume the answer matches the target profile; instead, assess it as if you were grading any student's answer without prior knowledge of the intended profile.
- Use the following syntax for formulas:
  - To put a to the power of b: (a POWER b) or a^b. Vary the syntax between them.
  - To divide a by b, use a/b.
- Structure your proof line by line, as shown in the example:
"Show that from p ∧ (p -> q) we can conclude q:
1) p ∧ (p -> q)
2) p  
3) (p -> q) 
4) q"
"""

## OpenAI Setup

In [None]:
key = 'API key'

In [None]:
from openai import OpenAI

OpenAI.api_key = key
client = OpenAI(api_key=key)

model="gpt-4o"
temperature=1

## Generating Responses for 1000

In [None]:
data1_path = 'path/to/original/data'
data2_path = 'path/to/kp/dataset/original'

save_dir = f'raw_responses/temp_{temperature}'
output_dir = f'processed_responses/temp_{temperature}' 

In [None]:
import pandas as pd

# Load the dataset
df1 = pd.read_excel(data1_path, sheet_name='contraposition')
df2 = pd.read_excel(data2_path, dtype={'Knowledge Profile': str})

# rubrics of the original dataset
rubrics = [
    'Statement of what should be proven: A proof by contraposition of an implication consists in showing that if x rational, then x^2 is rational. ',
    'Correct assumption: x is rational [Assumption] ',
    'Correct proof reasoning',
    'Proof conclusion: By contraposition, if x^2 is irrational, then x is irrational.'
]
df1['Knowledge Profile'] = df1[rubrics].astype(str).agg(''.join, axis=1)

### missing assumption

In [None]:
target_knowledge_profile = '1000'
target_mistake = 'missing assumption'
num_answers = 2
num_examples = 5
dataset = 1

if dataset == 1:
    df = df1
elif dataset == 2:
    df = df2
    
examples_list = get_answers_for_mistake_and_profile(df, target_knowledge_profile, target_mistake)
examples_count = min(num_examples, len(examples_list))
example_answers = "\n".join([f"- {answer}" for answer in examples_list[:examples_count]])
print("Example answers: ", example_answers, ' \n')
# construct the prompt 
prompt = construct_prompt(target_knowledge_profile, target_mistake, num_answers, example_answers)
print("Prompt: ", prompt, '\n')

n=20

In [None]:
completion = client.chat.completions.create(
    model=model,
    messages=prompt,
    temperature=temperature,
    n=n
)
path = save_completion_with_metadata(completion.to_dict(), prompt, target_knowledge_profile, target_mistake, n, num_answers, temperature, save_dir=save_dir) 
process_json_to_txt(path, output_dir)

### assume x^2 is rational

In [None]:
target_knowledge_profile = '1000'
target_mistake = 'assume x\^2 is rational'
num_answers = 2
num_examples = 5
dataset = 2

if dataset == 1:
    df = df1
elif dataset == 2:
    df = df2
    
examples_list = get_answers_for_mistake_and_profile(df, target_knowledge_profile, target_mistake)
examples_count = min(num_examples, len(examples_list))
example_answers = "\n".join([f"- {answer}" for answer in examples_list[:examples_count]])
print("Example answers: ", example_answers, ' \n')
# construct the prompt 
prompt = construct_prompt(target_knowledge_profile, target_mistake, num_answers, example_answers)
print("Prompt: ", prompt, '\n')

n=5

In [None]:
completion = client.chat.completions.create(
    model=model,
    messages=prompt,
    temperature=temperature,
    n=n
)
path = save_completion_with_metadata(completion.to_dict(), prompt, target_knowledge_profile, target_mistake, n, num_answers, temperature, save_dir=save_dir) 
process_json_to_txt(path, output_dir)

### assume p->q and not q

In [None]:
target_knowledge_profile = '1000'
target_mistake = 'assume p->q and not q'
num_answers = 2
num_examples = 5
dataset = 2

if dataset == 1:
    df = df1
elif dataset == 2:
    df = df2
    
examples_list = get_answers_for_mistake_and_profile(df, target_knowledge_profile, target_mistake)
examples_count = min(num_examples, len(examples_list))
example_answers = "\n".join([f"- {answer}" for answer in examples_list[:examples_count]])
print("Example answers: ", example_answers, ' \n')
# construct the prompt 
prompt = construct_prompt(target_knowledge_profile, target_mistake, num_answers, example_answers)
print("Prompt: ", prompt, '\n')

n=20

In [None]:
completion = client.chat.completions.create(
    model=model,
    messages=prompt,
    temperature=temperature,
    n=n
)
path = save_completion_with_metadata(completion.to_dict(), prompt, target_knowledge_profile, target_mistake, n, num_answers, temperature, save_dir=save_dir) 
process_json_to_txt(path, output_dir)

### assume x irrational

In [None]:
target_knowledge_profile = '1000'
target_mistake = 'assume x irrational'
num_answers = 2
num_examples = 5
dataset = 2

if dataset == 1:
    df = df1
elif dataset == 2:
    df = df2
    
examples_list = get_answers_for_mistake_and_profile(df, target_knowledge_profile, target_mistake)
examples_count = min(num_examples, len(examples_list))
example_answers = "\n".join([f"- {answer}" for answer in examples_list[:examples_count]])
print("Example answers: ", example_answers, ' \n')
# construct the prompt 
prompt = construct_prompt(target_knowledge_profile, target_mistake, num_answers, example_answers)
print("Prompt: ", prompt, '\n')

n=20

In [None]:
completion = client.chat.completions.create(
    model=model,
    messages=prompt,
    temperature=temperature,
    n=n
)
path = save_completion_with_metadata(completion.to_dict(), prompt, target_knowledge_profile, target_mistake, n, num_answers, temperature, save_dir=save_dir) 
process_json_to_txt(path, output_dir)

### assuming p -> q

In [None]:
target_knowledge_profile = '1000'
target_mistake = 'assuming p -> q'
num_answers = 2
num_examples = 5
dataset = 2

if dataset == 1:
    df = df1
elif dataset == 2:
    df = df2
    
examples_list = get_answers_for_mistake_and_profile(df, target_knowledge_profile, target_mistake)
examples_count = min(num_examples, len(examples_list))
example_answers = "\n".join([f"- {answer}" for answer in examples_list[:examples_count]])
print("Example answers: ", example_answers, ' \n')
# construct the prompt 
prompt = construct_prompt(target_knowledge_profile, target_mistake, num_answers, example_answers)
print("Prompt: ", prompt, '\n')

n=20

In [None]:
completion = client.chat.completions.create(
    model=model,
    messages=prompt,
    temperature=temperature,
    n=n
)
path = save_completion_with_metadata(completion.to_dict(), prompt, target_knowledge_profile, target_mistake, n, num_answers, temperature, save_dir=save_dir) 
process_json_to_txt(path, output_dir)