In [2]:
import numpy as np
import pandas as pd

# Define constants
num_students = 25
num_attempts = 15  # Each student makes 15 attempts per sign
max_time = 5000  # Max time in milliseconds

# Define signs (skills)
tamil_letters = {
    1: "அ", 2: "ஆ", 3: "இ", 4: "ஈ", 5: "உ",
    6: "ஊ", 7: "எ", 8: "ஏ", 9: "ஐ", 10: "ஒ",
    11: "ஓ", 12: "ஔ"
}

sinhala_letters = {
    13: "අ", 14: "ආ", 15: "ඇ", 16: "ඉ", 17: "උ", 
    18: "එ", 19: "ග", 20: "ව", 21: "ඩ", 22: "ද", 
    23: "ය", 24: "හ"
}

signs = list(tamil_letters.keys()) + list(sinhala_letters.keys())

# Generate data
data = []

for student_id in range(1, num_students + 1):
    for sign_id in signs:
        for opportunity in range(1, num_attempts + 1):
            # Simulate correctness using a logistic function (more 0s at start, more 1s later)
            p_correct = 1 / (1 + np.exp(-0.5 * (opportunity - 7)))  # Sigmoid function
            correctness = np.random.choice([0, 1], p=[1 - p_correct, p_correct])

            # Simulate time taken (higher time for early incorrect attempts, lower for correct ones)
            if correctness == 1:
                time_taken = np.random.randint(500, 2500)  # Faster responses for correct
            else:
                time_taken = np.random.randint(2500, max_time)  # Slower for incorrect

            data.append([student_id, sign_id, opportunity, correctness, time_taken])

# Convert to DataFrame
df = pd.DataFrame(data, columns=["Student ID", "Skill ID", "Opportunity Count", "Correctness", "TimesTaken"])
df.head()
df.to_csv("data.csv", index=False)


In [3]:
# Adjust time taken for incorrect attempts to be exactly 5000ms
df.loc[df["Correctness"] == 0, "TimesTaken"] = 5000

# Verify the changes
df.head()
df.to_csv("data.csv", index=False)
