In [1]:
import numpy as np
import pandas as pd

# Generate synthetic IRT dataset
np.random.seed(42)  # For reproducibility
num_students = 100
num_questions = 10

# Define student abilities (some better, some worse)
student_abilities = np.random.normal(0, 1, num_students)  # Mean=0, SD=1
student_abilities[:30] += 1  # Top 30 students are slightly better
student_abilities[-30:] -= 1  # Bottom 30 students are slightly worse

# Define item difficulties (each consecutive question is harder)
item_difficulties = np.linspace(-2, 2, num_questions)

# Generate responses using a logistic function (IRT 1PL Model)
responses = np.zeros((num_students, num_questions))
for i in range(num_students):
    for j in range(num_questions):
        prob_correct = 1 / (1 + np.exp(-(student_abilities[i] - item_difficulties[j])))
        responses[i, j] = np.random.rand() < prob_correct

# Create DataFrame
df = pd.DataFrame(responses, columns=[f"V{j+1}" for j in range(num_questions)])
df.insert(0, "Student", range(1, num_students + 1))

# Save to CSV
df.to_csv("irt_data.csv", index=False)

print(df.head())


   Student   V1   V2   V3   V4   V5   V6   V7   V8   V9  V10
0        1  1.0  1.0  1.0  1.0  0.0  1.0  1.0  0.0  1.0  0.0
1        2  0.0  1.0  1.0  1.0  1.0  1.0  0.0  0.0  1.0  0.0
2        3  1.0  1.0  1.0  1.0  0.0  1.0  1.0  0.0  1.0  0.0
3        4  1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0
4        5  1.0  1.0  1.0  1.0  1.0  1.0  0.0  1.0  0.0  1.0


In [2]:
import numpy as np
import pandas as pd

# Generate synthetic IRT dataset
np.random.seed(69)  # For reproducibility
num_students = 120
num_questions = 5

# Define student abilities (some better, some worse)
student_abilities = np.random.normal(0, 1, num_students)  # Mean=0, SD=1
student_abilities[:15] += 1  # Top 15 students are slightly better
student_abilities[-15:] -= 1  # Bottom 15 students are slightly worse

# Define item difficulties (each consecutive question is harder)
item_difficulties = np.linspace(-2, 2, num_questions)

# Generate responses using a logistic function (IRT 1PL Model)
responses = np.zeros((num_students, num_questions))
for i in range(num_students):
    for j in range(num_questions):
        prob_correct = 1 / (1 + np.exp(-(student_abilities[i] - item_difficulties[j])))
        responses[i, j] = np.random.rand() < prob_correct

# Create DataFrame
df = pd.DataFrame(responses, columns=[f"V{j+1}" for j in range(num_questions)])
df.insert(0, "Student", range(1, num_students + 1))

# Save to CSV
df.to_csv("irt_data_vaja.csv", index=False)

print(df.head())


   Student   V1   V2   V3   V4   V5
0        1  1.0  1.0  1.0  0.0  1.0
1        2  1.0  1.0  1.0  0.0  0.0
2        3  1.0  1.0  1.0  1.0  0.0
3        4  1.0  1.0  1.0  1.0  0.0
4        5  1.0  0.0  1.0  0.0  0.0
