In [4]:
import pandas as pd
import random

# Define constants
student_ids = list(range(1, 37))
assignment_id = 1
question_ids = list(range(1, 11))
correctness_behaviors = [
    'Always correct',
    'Always incorrect',
    'Occasionally correct',
    'Occasionally incorrect'
]
confidence_behaviors = [
    'Always confident',
    'Always partially confident',
    'Always not confident',
    'Always confident but occasionally partially confident',
    'Always confident but occasionally not confident',
    'Always partially confident but occasionally confident',
    'Always partially confident but occasionally not confident',
    'Always not confident but occasionally confident',
    'Always not confident but occasionally partially confident'
]
grades_thresholds = [0.7, 0.9]  # thresholds for INC, PC, C

# Helper function to determine grade class
def get_grade_class(grade):
    if grade >= 0.9:
        return 1
    elif grade >= 0.7:
        return 2
    else:
        return 0

# Generate acquired grade based on correctness behavior
def generate_acquired_grade(correctness_behavior):
    if correctness_behavior == 'Always correct':
        return random.uniform(0.9, 1.0)
    elif correctness_behavior == 'Always incorrect':
        return random.uniform(0.0, 0.69)
    elif correctness_behavior == 'Occasionally correct':
        return random.choices([random.uniform(0.9, 1.0), random.uniform(0.0, 0.69)], [0.3, 0.7])[0]
    elif correctness_behavior == 'Occasionally incorrect':
        return random.choices([random.uniform(0.0, 0.6), random.uniform(0.9, 1.0)], [0.3, 0.7])[0]

# Generate given confidence based on confidence behavior
def generate_given_confidence(confidence_behavior):
    if confidence_behavior == 'Always confident':
        return 2
    elif confidence_behavior == 'Always partially confident':
        return 1
    elif confidence_behavior == 'Always not confident':
        return 0
    elif confidence_behavior == 'Always confident but occasionally partially confident':
        return random.choices([2, 1], [0.7, 0.3])[0]
    elif confidence_behavior == 'Always confident but occasionally not confident':
        return random.choices([2, 0], [0.7, 0.3])[0]
    elif confidence_behavior == 'Always partially confident but occasionally confident':
        return random.choices([1, 2], [0.7, 0.3])[0]
    elif confidence_behavior == 'Always partially confident but occasionally not confident':
        return random.choices([1, 0], [0.7, 0.3])[0]
    elif confidence_behavior == 'Always not confident but occasionally confident':
        return random.choices([0, 2], [0.7, 0.3])[0]
    elif confidence_behavior == 'Always not confident but occasionally partially confident':
        return random.choices([0, 1], [0.7, 0.3])[0]

# Create dataset
data = []

for student_id in student_ids:
    correctness_behavior = correctness_behaviors[(student_id - 1) // 9]
    confidence_behavior = confidence_behaviors[(student_id - 1) % 9]
    
    for question_id in question_ids:
        acquired_grade = generate_acquired_grade(correctness_behavior)
        grade_class = get_grade_class(acquired_grade)
        given_confidence = generate_given_confidence(confidence_behavior)
        
        data.append({
            'student_id': student_id,
            'assignment_id': assignment_id,
            'question_id': question_id,
            'correctness_trait': correctness_behavior,
            'confidence_trait': confidence_behavior,
            'given_confidence': given_confidence,
            'acquired_grade': round(acquired_grade * 100, 2),  # Convert to percentage
            'grade_class': grade_class
        })

# Convert to DataFrame
df = pd.DataFrame(data)

# Save to CSV
df.to_csv('./Inference_Data_Sample/student_behaviors_simulated.csv', index=False)

print("Inference dataset generated successfully!")


CSV dataset generated successfully!
