In [1]:
import numpy as np
import pandas as pd

# Generate synthetic IRT dataset
np.random.seed(42)  # For reproducibility
num_students = 100
num_questions = 10

# Define student abilities (some better, some worse)
student_abilities = np.random.normal(0, 1, num_students)  # Mean=0, SD=1
student_abilities[:30] += 1  # Top 30 students are slightly better
student_abilities[-30:] -= 1  # Bottom 30 students are slightly worse

# Define item difficulties (each consecutive question is harder)
item_difficulties = np.linspace(-2, 2, num_questions)

# Generate responses using a logistic function (IRT 1PL Model)
responses = np.zeros((num_students, num_questions))
for i in range(num_students):
    for j in range(num_questions):
        prob_correct = 1 / (1 + np.exp(-(student_abilities[i] - item_difficulties[j])))
        responses[i, j] = np.random.rand() < prob_correct

# Create DataFrame
df = pd.DataFrame(responses, columns=[f"V{j+1}" for j in range(num_questions)])
df.insert(0, "Student", range(1, num_students + 1))

# Save to CSV
df.to_csv("irt_data.csv", index=False)

print(df.head())


   Student   V1   V2   V3   V4   V5   V6   V7   V8   V9  V10
0        1  1.0  1.0  1.0  1.0  0.0  1.0  1.0  0.0  1.0  0.0
1        2  0.0  1.0  1.0  1.0  1.0  1.0  0.0  0.0  1.0  0.0
2        3  1.0  1.0  1.0  1.0  0.0  1.0  1.0  0.0  1.0  0.0
3        4  1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0  1.0
4        5  1.0  1.0  1.0  1.0  1.0  1.0  0.0  1.0  0.0  1.0


In [2]:
import numpy as np
import pandas as pd

# Generate synthetic IRT dataset
np.random.seed(69)  # For reproducibility
num_students = 120
num_questions = 5

# Define student abilities (some better, some worse)
student_abilities = np.random.normal(0, 1, num_students)  # Mean=0, SD=1
student_abilities[:15] += 1  # Top 15 students are slightly better
student_abilities[-15:] -= 1  # Bottom 15 students are slightly worse

# Define item difficulties (each consecutive question is harder)
item_difficulties = np.linspace(-2, 2, num_questions)

# Generate responses using a logistic function (IRT 1PL Model)
responses = np.zeros((num_students, num_questions))
for i in range(num_students):
    for j in range(num_questions):
        prob_correct = 1 / (1 + np.exp(-(student_abilities[i] - item_difficulties[j])))
        responses[i, j] = np.random.rand() < prob_correct

# Create DataFrame
df = pd.DataFrame(responses, columns=[f"V{j+1}" for j in range(num_questions)])
df.insert(0, "Student", range(1, num_students + 1))

# Save to CSV
df.to_csv("irt_data_vaja.csv", index=False)

print(df.head())


   Student   V1   V2   V3   V4   V5
0        1  1.0  1.0  1.0  0.0  1.0
1        2  1.0  1.0  1.0  0.0  0.0
2        3  1.0  1.0  1.0  1.0  0.0
3        4  1.0  1.0  1.0  1.0  0.0
4        5  1.0  0.0  1.0  0.0  0.0


politomous

In [1]:
import numpy as np
import pandas as pd

# Nastavimo seme za naključni generator števil, da so rezultati ponovljivi.
np.random.seed(42)

# Določimo število udeležencev (študentov) in število vprašanj (postavk).
num_students = 100
num_questions = 10
# Določimo število kategorij odgovorov za Likertovo lestvico (npr. 0, 1, 2, 3, 4).
num_categories = 5

# Generiramo sposobnosti udeležencev iz normalne porazdelitve (povprečje 0, standardni odklon 1).
# Sposobnosti predstavljajo latentno lastnost, ki jo test meri (npr. znanje, mnenje).
student_abilities = np.random.normal(0, 1, num_students)
# Za bolj realistične podatke, "porežemo" ekstremne vrednosti sposobnosti,
# da preprečimo preveč ekstremne odgovore.
student_abilities[student_abilities > 2.0] = 2.0
student_abilities[student_abilities < -2.0] = -2.0

# Določimo "osrednje težavnosti" za vsako postavko.
# To so točke na latentni lestvici, kjer je postavka "najbolj informativna".
# Postavke so razporejene od lažjih (-1.5) do težjih (1.5).
item_central_difficulties = np.linspace(-1.5, 1.5, num_questions)

# Določimo skupne kategorijske pragove za Likertovo lestvico.
# Ti pragovi definirajo meje na latentni lestvici, kjer je 50% verjetnost,
# da se izbere določena kategorija ali višja/nižja.
# Npr. -2.0 je meja med kategorijo 0 in 1, -0.5 med 1 in 2 itd.
# Ti pragovi so enaki za vse postavke v modelu Rating Scale.
category_thresholds = np.array([-2.0, -0.5, 0.5, 2.0]) # Pragovi za prehod med kategorijami (0->1, 1->2, 2->3, 3->4)

# Pripravimo prazno matriko za shranjevanje odgovorov.
polytomous_responses = np.zeros((num_students, num_questions), dtype=int)

# Glavna zanka za generiranje odgovorov za vsakega udeleženca in vsako postavko.
for i in range(num_students): # Iteriramo skozi vsakega študenta
    for j in range(num_questions): # Iteriramo skozi vsako vprašanje
        # Izračunamo latentni "rezultat" za to kombinacijo študent-vprašanje.
        # To je razlika med sposobnostjo študenta in težavnostjo vprašanja,
        # dodan pa je naključni šum, ki simulira merilno napako.
        latent_score = student_abilities[i] - item_central_difficulties[j] + np.random.normal(0, 0.7)

        # Določimo izbrano kategorijo na podlagi latentnega rezultata in pragov.
        # Začnemo z najnižjo kategorijo (0).
        chosen_category = 0
        # Pregledamo pragove: če je latentni rezultat nad določenim pragom,
        # se premaknemo v naslednjo kategorijo.
        for k in range(num_categories - 1):
            if latent_score > category_thresholds[k]:
                chosen_category = k + 1 # Povečamo kategorijo
            else:
                break # Če je latentni rezultat pod pragom, ostanemo v trenutni kategoriji in končamo zanko

        # Shranimo določeno kategorijo odgovora.
        polytomous_responses[i, j] = chosen_category

# Ustvarimo Pandas DataFrame iz generiranih odgovorov.
df_polytomous = pd.DataFrame(polytomous_responses, columns=[f"P{j+1}" for j in range(num_questions)])
# Dodamo stolpec "Student" za lažjo identifikacijo.
df_polytomous.insert(0, "Student", range(1, num_students + 1))

# Shranimo DataFrame v CSV datoteko.
df_polytomous.to_csv("irt_polytomous_data.csv", index=False)

# Izpišemo prvih nekaj vrstic generiranih podatkov za preverjanje.
print("Generirani politomski podatki (prvih 5 vrstic):")
print(df_polytomous.head())

Generirani politomski podatki (prvih 5 vrstic):
   Student  P1  P2  P3  P4  P5  P6  P7  P8  P9  P10
0        1   3   3   3   2   3   3   3   2   2    1
1        2   2   3   3   4   2   2   1   1   1    1
2        3   4   3   4   2   3   4   1   1   2    1
3        4   3   4   3   4   3   4   2   2   3    1
4        5   3   3   1   2   2   2   1   1   1    1
