# Expert Values

In this notebook the expert values are created that are used for some approaches in the experiments for IRT and BKT. The expert values are created according to the descriptions in Section 5.1 in the report and are stored in the following files:
- `expert_data_irt.csv`: expert values for IRT experiments
- `expert_data_bkt_skills.csv`: skill-specific expert values for BKT experiments
- `expert_data_bkt_probs.csv`: problem-specific expert values for BKT experiments

The general procedure is as follows:
- The parameters are estimated according to a specified strategy.
- The estimations are corrected by some error term.
- Some parameter values are restricted to a specified interval.
- Parameters are sampled for cases in which the data base is too small.

In [1]:
%load_ext autoreload
%autoreload 2

import numpy as np
import pandas as pd

import sys
import os
sys.path.append(os.path.abspath('../../sources'))

import config
import training_general
import utils

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
SAVE_PRIMARY_FILES = False

## IRT - Difficulty

Estimation strategy: For each UT problem, we compute the proportion of students that answered the question incorrectly.

In [3]:
# read data
df = utils.read_data_file("final_data_main_approach.csv")
df_orig = df.copy()
print(df.shape)

  return pd.read_csv(config.DATA_FOLDER / filename)


(2664573, 22)


In [4]:
df_ut = df.loc[df["unit_test"] == 1].copy()
df_ut

Unnamed: 0,assignment_log_id,problem_id,timestamp,available_core_tutoring,actions,num_actions,first_answer,num_tries,final_answer,help_requested,...,hint_requested,live_tutor_requested,hint_id,explanation_id,sequence_id,student_id,class_id,teacher_id,assignment_finished,unit_test
158,4XW78VIO5,ZFAEQ6OJY,,,,,1,,,,...,,,,,1MOJEXDKSA,125OTT0E74,100VH25818,WOOQUZY13,,1
159,4XW78VIO5,1IGF6IHIXV,,,,,1,,,,...,,,,,1MOJEXDKSA,125OTT0E74,100VH25818,WOOQUZY13,,1
160,4XW78VIO5,D7MVVLH6Z,,,,,1,,,,...,,,,,1MOJEXDKSA,125OTT0E74,100VH25818,WOOQUZY13,,1
161,4XW78VIO5,SN5KALBY2,,,,,1,,,,...,,,,,1MOJEXDKSA,125OTT0E74,100VH25818,WOOQUZY13,,1
162,4XW78VIO5,14YKGV0BZP,,,,,1,,,,...,,,,,1MOJEXDKSA,125OTT0E74,100VH25818,WOOQUZY13,,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2664568,1HYD23RZDK,243CUENAK6,,,,,0,,,,...,,,,,2DRIUYX4MA,VVV6OVLGL,ZY24QW3NN,17D0I6MDKC,,1
2664569,1HYD23RZDK,1PDYC0EONZ,,,,,1,,,,...,,,,,2DRIUYX4MA,VVV6OVLGL,ZY24QW3NN,17D0I6MDKC,,1
2664570,1HYD23RZDK,1QQ09WIHQN,,,,,1,,,,...,,,,,2DRIUYX4MA,VVV6OVLGL,ZY24QW3NN,17D0I6MDKC,,1
2664571,1HYD23RZDK,1DSNBBHLYE,,,,,1,,,,...,,,,,2DRIUYX4MA,VVV6OVLGL,ZY24QW3NN,17D0I6MDKC,,1


In [5]:
df_ut["problem_id"].nunique()

1367

In [6]:
num_stud_per_prob = df_ut.groupby("problem_id").size().sort_values()
num_stud_per_prob

problem_id
1A1E1BJ5ND       1
1FC53EZKNG       1
27BD8Z6PPP       1
U20YSTLUF        1
2NSQN4O2D9       1
              ... 
NOM3EFG6J     2229
1UHANE1F4H    2240
2I05VLMJLL    2252
1ZABSUK3ER    2252
2JUP62579A    2252
Length: 1367, dtype: int64

In [7]:
num_stud_per_prob.value_counts().sort_index().loc[:16].sum()

49

There are 49 problems which are completed by at most 15 students. For those, we decide to randomly sample the difficulties.

In [8]:
np.random.seed(42)

# compute proportion of wrong answers
diff = 1 - df_ut.groupby("problem_id")["first_answer"].mean().rename("irt_difficulty")

# sample error terms
error_terms = np.random.uniform(-0.1, 0.1, size=len(diff))

# add error terms, round and clip
expert_df = (diff + error_terms).round(config.ROUND_DECIMALS).clip(0, 1).to_frame()

# sample difficulties for problems with too small data base
probs_sample = num_stud_per_prob[num_stud_per_prob < 16].index
expert_df.loc[probs_sample, "irt_difficulty"] = np.random.uniform(
    0.2, 0.8, size=len(probs_sample)
).round(config.ROUND_DECIMALS)

In [9]:
expert_df

Unnamed: 0_level_0,irt_difficulty
problem_id,Unnamed: 1_level_1
104GN803C7,0.4631
1052VSI90Q,0.2837
1056MOBB1X,0.3507
107HWF0F0D,0.5575
10B335WIH,0.1579
...,...
ZPY48D4CJ,0.1996
ZQMHFZJ53,0.4650
ZSC5XCDO9,0.3348
ZTUHRKY2,0.3763


In [10]:
(
    expert_df["irt_difficulty"].mean(),
    expert_df["irt_difficulty"].min(),
    expert_df["irt_difficulty"].max(),
    (expert_df["irt_difficulty"] == 1).sum(),
    (expert_df["irt_difficulty"] == 0).sum(),
)

(0.4206056327724946, 0.0, 1.0, 7, 12)

In [11]:
if SAVE_PRIMARY_FILES:
    utils.save_as_csv(expert_df, "expert_data_irt.csv", save_idx=True)

## BKT

In [12]:
ROUND_DEC = 2

In [13]:
# read data
df = utils.read_data_file("final_data_main_approach.csv")
df_orig = df.copy()
print(df.shape)

  return pd.read_csv(config.DATA_FOLDER / filename)


(2664573, 22)


In [14]:
# prepare df
df = training_general.prepare_df({"method": config.RecMethod.KT}, df)
# necessary because it maps the skill code and replaces the nans in the skill codes

In [15]:
df.head()

Unnamed: 0,assignment_log_id,problem_id,timestamp,first_answer,sequence_id,student_id,class_id,teacher_id,unit_test,problem_skill_code_domain,problem_skill_code_1,problem_skill_code_2
0,12LNLV9T5P,2MZER0YXHT,1587658000.0,1,27GEL3MV6E,125OTT0E74,100VH25818,WOOQUZY13,0,1.G,1,G
1,XJMWRTN1F,SXFPGQ0Z7,1587658000.0,1,1DHG1UNJ8Q,125OTT0E74,100VH25818,WOOQUZY13,0,1.G,1,G
2,XJMWRTN1F,C4U09P1SM,1587658000.0,1,1DHG1UNJ8Q,125OTT0E74,100VH25818,WOOQUZY13,0,1.G,1,G
3,XJMWRTN1F,1Q6NCCELIN,1587658000.0,1,1DHG1UNJ8Q,125OTT0E74,100VH25818,WOOQUZY13,0,1.G,1,G
4,XJMWRTN1F,36DSFWGZG,1587659000.0,1,1DHG1UNJ8Q,125OTT0E74,100VH25818,WOOQUZY13,0,1.G,1,G


In [16]:
# define parameters per skill
expert_df_skills = pd.DataFrame(
    index=pd.Index(df["problem_skill_code_2"].unique(), name="problem_skill_code_2"),
    columns=["bkt_init_known", "bkt_learn_prob", "bkt_forget", "bkt_slip", "bkt_guess"],
)
expert_df_skills.head()

Unnamed: 0_level_0,bkt_init_known,bkt_learn_prob,bkt_forget,bkt_slip,bkt_guess
problem_skill_code_2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
G,,,,,
MD,,,,,
NBT,,,,,
OA,,,,,
REI,,,,,


In [17]:
# define parameters per problem
expert_df_probs = pd.DataFrame(
    index=pd.Index(df["problem_id"].unique(), name="problem_id"),
    columns=["bkt_slip", "bkt_guess"],
)
expert_df_probs.head()

Unnamed: 0_level_0,bkt_slip,bkt_guess
problem_id,Unnamed: 1_level_1,Unnamed: 2_level_1
2MZER0YXHT,,
SXFPGQ0Z7,,
C4U09P1SM,,
1Q6NCCELIN,,
36DSFWGZG,,


### Initial Probability (skill-specific)

Estimation Strategy: For each skill, consider the first completed IU problem of every student. Among those compute the proportion of correct responses.

In [18]:
iu = df.loc[df["unit_test"] == 0].copy()

# sort by timestamp
iu = iu.sort_values(["problem_skill_code_2", "student_id", "timestamp"])

# only keep first problem per student and skill
iu_first = iu.drop_duplicates(["problem_skill_code_2", "student_id"], keep="first")

# number of studens per skill
num_stud_per_skill = iu_first.groupby("problem_skill_code_2").size().sort_values()
num_stud_per_skill

problem_skill_code_2
MG        1
GPE       8
Q         9
CN      138
RN      140
GMD     143
APR     238
SRT     266
SSE     333
CO      546
CED     571
LE      639
ID      680
BF      803
F       840
REI     885
SP      987
IF     1079
NS     3128
EE     3408
RP     3956
G      4537
NF     5331
OA     6669
MD     7764
NBT    8734
dtype: int64

In [19]:
np.random.seed(1)

# proportion of correct answers per skill
prop_corr = iu_first.groupby("problem_skill_code_2")["first_answer"].mean()
print(prop_corr.mean())

# sample error terms
error_terms = np.random.uniform(0, 0.3, size=len(prop_corr))

# subtract error terms, round and clip
expert_df_skills["bkt_init_known"] = (
    (prop_corr - error_terms).round(ROUND_DEC).clip(0, 1)
).clip(0.1, 0.7)

# sample init probabilities for skills with too small data base
skills_sample = num_stud_per_skill[num_stud_per_skill < 10].index
expert_df_skills.loc[skills_sample, "bkt_init_known"] = np.random.uniform(
    0.1, 0.7, size=len(skills_sample)
).round(ROUND_DEC)
print(expert_df_skills["bkt_init_known"].mean())
expert_df_skills

0.5930505655537573
0.4157692307692307


Unnamed: 0_level_0,bkt_init_known,bkt_learn_prob,bkt_forget,bkt_slip,bkt_guess
problem_skill_code_2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
G,0.51,,,,
MD,0.34,,,,
NBT,0.52,,,,
OA,0.66,,,,
REI,0.35,,,,
CED,0.66,,,,
ID,0.52,,,,
SP,0.43,,,,
IF,0.36,,,,
BF,0.42,,,,


### Learning (Transition) Probability (skill-specific)

Estimation Strategy: For each skill, compute the proportion of wrong responses that are followed by a correct response (for the same student)  among all wrong responses.

In [20]:
# shift column first_answer to store next answer in same row
iu["next_answer"] = iu.groupby(["problem_skill_code_2", "student_id"])["first_answer"].shift(-1)

# restrict to rows where first answer is 0 and next answer exists
iu_rest = iu[(iu["first_answer"] == 0) & (~pd.isna(iu["next_answer"]))]

# number of considered problems per skill
num_prob_per_skill = iu_rest.groupby("problem_skill_code_2").size().sort_values()
num_prob_per_skill

problem_skill_code_2
Q           9
GPE        19
RN        534
CN        602
LE       1268
APR      1566
GMD      1702
SSE      1848
CED      3938
F        4036
SP       4919
SRT      5885
ID       7154
REI      7744
IF       8558
BF      10137
CO      11415
EE      36427
G       37795
NS      41430
RP      62247
OA      64080
MD      73224
NF     121278
NBT    176581
dtype: int64

In [21]:
np.random.seed(10)

# proportion of transitions from 0 to 1
p_learn = iu_rest.groupby("problem_skill_code_2")["next_answer"].mean()
print(p_learn.mean())

# sample error terms
error_terms = np.random.uniform(0, 0.3, size=len(p_learn))

# subtract error terms, round and clip
expert_df_skills["bkt_learn_prob"] = (
    (p_learn - error_terms).round(ROUND_DEC).clip(0, 1)
).clip(0.1, 0.5)

# sample transition probabilities for skills with too small data base
skills_sample = num_prob_per_skill[num_prob_per_skill < 20].index.append(
    expert_df_skills[pd.isna(expert_df_skills["bkt_learn_prob"])].index
)
expert_df_skills.loc[skills_sample, "bkt_learn_prob"] = np.random.uniform(
    0.1, 0.5, size=len(skills_sample)
).round(ROUND_DEC)
print(expert_df_skills["bkt_learn_prob"].mean())
expert_df_skills

0.45697392096165323
0.3142307692307692


Unnamed: 0_level_0,bkt_init_known,bkt_learn_prob,bkt_forget,bkt_slip,bkt_guess
problem_skill_code_2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
G,0.51,0.23,,,
MD,0.34,0.28,,,
NBT,0.52,0.25,,,
OA,0.66,0.4,,,
REI,0.35,0.24,,,
CED,0.66,0.29,,,
ID,0.52,0.33,,,
SP,0.43,0.35,,,
IF,0.36,0.19,,,
BF,0.42,0.42,,,


### Forget Probability (skill-specific)

Estimation Strategy: For each skill, compute the proportion of correct responses that are followed by a wrong response (for the same student) among all correct responses.

In [22]:
# shift column first_answer to store next answer in same row
iu["next_answer"] = iu.groupby(["problem_skill_code_2", "student_id"])["first_answer"].shift(-1)

# restrict to rows where first answer is 1 and next answer exists
iu_rest = iu[(iu["first_answer"] == 1) & (~pd.isna(iu["next_answer"]))]

# number of considered problems per skill
num_prob_per_skill = iu_rest.groupby("problem_skill_code_2").size().sort_values()
num_prob_per_skill

problem_skill_code_2
MG         11
GPE        72
RN        998
GMD      1238
APR      1482
CN       1769
LE       2963
SSE      3240
CED      6802
F        8277
SP       9583
SRT     10574
REI     12786
ID      14186
BF      15587
IF      17591
CO      20297
G       73467
EE      77445
NS      91406
RP     104475
MD     148684
OA     189145
NF     271847
NBT    493592
dtype: int64

In [23]:
np.random.seed(10)

# proportion of transitions from 1 to 0
p_forget = 1 - iu_rest.groupby("problem_skill_code_2")["next_answer"].mean()
print(p_forget.mean())

# sample error terms
error_terms = np.random.uniform(0, 0.2, size=len(p_forget))

# subtract error terms, round and clip
expert_df_skills["bkt_forget"] = (
    (p_forget - error_terms).round(ROUND_DEC).clip(0, 0.3)
)

# sample transition probabilities for skills with too small data base
skills_sample = num_prob_per_skill[num_prob_per_skill < 20].index.append(
    expert_df_skills[pd.isna(expert_df_skills["bkt_forget"])].index
)
expert_df_skills.loc[skills_sample, "bkt_forget"] = np.random.uniform(
    0, 0.3, size=len(skills_sample)
).round(ROUND_DEC)
print(expert_df_skills["bkt_forget"].mean())
expert_df_skills

0.23592725969839656
0.14884615384615385


Unnamed: 0_level_0,bkt_init_known,bkt_learn_prob,bkt_forget,bkt_slip,bkt_guess
problem_skill_code_2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
G,0.51,0.23,0.08,,
MD,0.34,0.28,0.12,,
NBT,0.52,0.25,0.05,,
OA,0.66,0.4,0.0,,
REI,0.35,0.24,0.14,,
CED,0.66,0.29,0.15,,
ID,0.52,0.33,0.13,,
SP,0.43,0.35,0.17,,
IF,0.36,0.19,0.04,,
BF,0.42,0.42,0.28,,


### Guessing Probability (skill-specific and problem-specific)

Estimation Strategy: Determine the guessing probability based on the problem type stored in `problem_details.csv`:
- Multiple Choice: 0.25 (wir wissen nicht wie viele Antwortmöglichkeiten es gibt)
- Check all that apply: 0.1
- Ordering: 0.1
- alles andere: 0

For the skill-specific values, group the values by the skill (weighted according to the appearance in df).

In [24]:
problem_details = utils.read_problem_details()
problem_details = problem_details.loc[df["problem_id"].unique()].copy()

# set guessing probabilities
problem_details["p_guess"] = 0.
problem_details.loc[problem_details["problem_type"] == "Multiple Choice", "p_guess"] = 0.25
problem_details.loc[problem_details["problem_type"] == "Check All That Apply", "p_guess"] = 0.1
problem_details.loc[problem_details["problem_type"] == "Ordering", "p_guess"] = 0.1

# write to expert df
expert_df_probs["bkt_guess"] = problem_details["p_guess"]
expert_df_probs

Unnamed: 0_level_0,bkt_slip,bkt_guess
problem_id,Unnamed: 1_level_1,Unnamed: 2_level_1
2MZER0YXHT,,0.00
SXFPGQ0Z7,,0.10
C4U09P1SM,,0.10
1Q6NCCELIN,,0.10
36DSFWGZG,,0.25
...,...,...
2OZWR6ZVE5,,0.00
BD0J5WCFQ,,0.10
12RWLJT53H,,0.25
783GJ85W7,,0.00


In [25]:
# for skill-specific: aggregate over skills
# get guess probability for each problem
df = df.merge(problem_details["p_guess"], how="left", left_on="problem_id", right_index=True)

# aggregate over skills
expert_df_skills["bkt_guess"] = df.groupby("problem_skill_code_2")["p_guess"].mean().round(ROUND_DEC)
expert_df_skills

Unnamed: 0_level_0,bkt_init_known,bkt_learn_prob,bkt_forget,bkt_slip,bkt_guess
problem_skill_code_2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
G,0.51,0.23,0.08,,0.07
MD,0.34,0.28,0.12,,0.04
NBT,0.52,0.25,0.05,,0.02
OA,0.66,0.4,0.0,,0.03
REI,0.35,0.24,0.14,,0.12
CED,0.66,0.29,0.15,,0.14
ID,0.52,0.33,0.13,,0.12
SP,0.43,0.35,0.17,,0.13
IF,0.36,0.19,0.04,,0.11
BF,0.42,0.42,0.28,,0.08


### Slipping Probability (skill-specific and problem-specific)

Estimation Strategy: For each problem the slipping probability is determined by the problem's difficulty (diff):
- 0.6 < diff: slip=0.2
- 0.4 < diff <= 0.6: slip=0.15
- 0.2 < diff <= 0.4: slip=0.1
- diff <= 0.2: slip=0.05

The difficulty of a problem is estimated based on the proportion of wrong answers for the problem.

For the skill-specific values, group the values by the skill (weighted according to the appearance in df).

In [26]:
num_stud_per_prob = df.groupby("problem_id").size().sort_values()
num_stud_per_prob.value_counts().sort_index()[:10]

1     788
2     474
3     467
4     391
5     675
6     496
7     464
8     423
9     442
10    439
Name: count, dtype: int64

In [27]:
# compute proportion of wrong answers
diff = (1 - df.groupby("problem_id")["first_answer"].mean().rename("bkt_diff")).to_frame()
print(diff["bkt_diff"].mean())

np.random.seed(20)
# sample difficulties for problems with too small data base
probs_sample = num_stud_per_prob[num_stud_per_prob < 10].index
diff.loc[probs_sample, "bkt_diff"] = np.random.uniform(0.2, 0.8, size=len(probs_sample))
print(diff["bkt_diff"].mean())
diff

0.31549906765512303
0.34310718745197616


Unnamed: 0_level_0,bkt_diff
problem_id,Unnamed: 1_level_1
1008LAM6IQ,0.177778
100IN2GQ3G,0.125000
100LV3OS5B,0.233333
100OBANI0T,0.298246
100SHH969K,0.232143
...,...
ZZI36O3O1,0.243056
ZZN00M396,0.678571
ZZQ7VFS0B,0.085366
ZZX9A654F,0.086957


In [28]:
# define slip probabilities
diff["bkt_slip"] = 0.05
diff.loc[diff["bkt_diff"] > 0.2, "bkt_slip"] = 0.1
diff.loc[diff["bkt_diff"] > 0.4, "bkt_slip"] = 0.15
diff.loc[diff["bkt_diff"] > 0.6, "bkt_slip"] = 0.2
diff["bkt_slip"].value_counts().sort_index()

bkt_slip
0.05     8956
0.10    10003
0.15     6486
0.20     4368
Name: count, dtype: int64

In [29]:
# store in expert df
expert_df_probs["bkt_slip"] = diff["bkt_slip"]
expert_df_probs

Unnamed: 0_level_0,bkt_slip,bkt_guess
problem_id,Unnamed: 1_level_1,Unnamed: 2_level_1
2MZER0YXHT,0.05,0.00
SXFPGQ0Z7,0.10,0.10
C4U09P1SM,0.10,0.10
1Q6NCCELIN,0.15,0.10
36DSFWGZG,0.05,0.25
...,...,...
2OZWR6ZVE5,0.20,0.00
BD0J5WCFQ,0.15,0.10
12RWLJT53H,0.15,0.25
783GJ85W7,0.10,0.00


In [30]:
# for skill-specific: aggregate over skills
# get slip probability for each problem
df = df.merge(diff["bkt_slip"], how="left", left_on="problem_id", right_index=True)

# aggregate over skills
expert_df_skills["bkt_slip"] = df.groupby("problem_skill_code_2")["bkt_slip"].mean().round(ROUND_DEC)
expert_df_skills

Unnamed: 0_level_0,bkt_init_known,bkt_learn_prob,bkt_forget,bkt_slip,bkt_guess
problem_skill_code_2,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
G,0.51,0.23,0.08,0.11,0.07
MD,0.34,0.28,0.12,0.11,0.04
NBT,0.52,0.25,0.05,0.09,0.02
OA,0.66,0.4,0.0,0.09,0.03
REI,0.35,0.24,0.14,0.12,0.12
CED,0.66,0.29,0.15,0.12,0.14
ID,0.52,0.33,0.13,0.12,0.12
SP,0.43,0.35,0.17,0.11,0.13
IF,0.36,0.19,0.04,0.11,0.11
BF,0.42,0.42,0.28,0.12,0.08


In [31]:
if SAVE_PRIMARY_FILES:
    utils.save_as_csv(expert_df_skills, "expert_data_bkt_skills.csv", save_idx=True)
    utils.save_as_csv(expert_df_probs, "expert_data_bkt_probs.csv", save_idx=True)