In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm

data = pd.read_csv('../data/traintest_v3.1.csv')

In [None]:
user_item = data[['userID', 'assessmentItemID', 'KnowledgeTag', 'testId', 'test_cat', 'question_number', 'answerCode']].copy()
user_item = user_item[user_item['answerCode'] != -1]

In [None]:
def get_phi(theta, beta, c=0.2):
    return c + (1-c) / (1+ np.exp(-(theta-beta)))

def learning_rate_theta(nb_answers):
    return max(0.3 / (1 + 0.01 * nb_answers), 0.04)

def learning_rate_beta(nb_answers):
    return 1 / (1 + 0.05 * nb_answers)

def get_new_theta(is_correct, theta, beta, nb_answers, c=0.2):
    return theta + learning_rate_theta(nb_answers) * (is_correct - get_phi(theta, beta, c))

def get_new_beta(is_correct, theta, beta, nb_answers, c=0.2):
    return beta - learning_rate_beta(nb_answers) * (is_correct - get_phi(theta, beta, c))

user_params = {user:{'theta': 0, "nb_answers": 0} for user in user_item['userID'].unique()}
item_params = {item:{'beta': 0, "nb_answers": 0} for item in user_item['assessmentItemID'].unique()}
tag_params = {tag:{'beta': 0, "nb_answers": 0} for tag in user_item['KnowledgeTag'].unique()}
testid_params = {testid:{'beta': 0, "nb_answers": 0} for testid in user_item['testId'].unique()}
testcat_params = {testcat:{'beta': 0, "nb_answers": 0} for testcat in user_item['test_cat'].unique()}
question_number_params = {question_number:{'beta': 0, "nb_answers": 0} for question_number in user_item['question_number'].unique()}


for i, row in tqdm(user_item.iterrows()):
    user = row['userID']
    item = row['assessmentItemID']
    tag = row['KnowledgeTag']
    testid = row['testId']
    testcat = row['test_cat']
    question_number = row['question_number']
    answer = row['answerCode']

    theta = user_params[user]['theta']
    beta_1 = item_params[item]['beta']
    beta_2 = tag_params[tag]['beta']
    beta_3 = testid_params[testid]['beta']
    beta_4 = testcat_params[testcat]['beta']
    beta_5 = question_number_params[question_number]['beta']

    user_nb_answers = user_params[user]['nb_answers']
    nb_answers_1 = item_params[item]['nb_answers']
    nb_answers_2 = tag_params[tag]['nb_answers']
    nb_answers_3 = testid_params[testid]['nb_answers']
    nb_answers_4 = testcat_params[testcat]['nb_answers']
    nb_answers_5 = question_number_params[question_number]['nb_answers']

    user_params[user]['theta'] = get_new_theta(answer, theta, beta_1, user_nb_answers)
    item_params[item]['beta'] = get_new_beta(answer, theta, beta_1, nb_answers_1)
    tag_params[tag]['beta'] = get_new_beta(answer, theta, beta_2, nb_answers_2)
    testid_params[testid]['beta'] = get_new_beta(answer, theta, beta_3, nb_answers_3)
    testcat_params[testcat]['beta'] = get_new_beta(answer, theta, beta_4, nb_answers_4)
    question_number_params[question_number]['beta'] = get_new_beta(answer, theta, beta_5, nb_answers_5)

    user_params[user]['nb_answers'] += 1
    item_params[item]['nb_answers'] += 1
    tag_params[tag]['nb_answers'] += 1
    testid_params[testid]['nb_answers'] += 1
    testcat_params[testcat]['nb_answers'] += 1
    question_number_params[question_number]['nb_answers'] += 1

data['user_params'] = data['userID'].map(user_params)
data['item_params'] = data['assessmentItemID'].map(item_params)
data['tag_params'] = data['KnowledgeTag'].map(tag_params)
data['testid_params'] = data['testId'].map(testid_params)
data['testcat_params'] = data['test_cat'].map(testcat_params)
data['question_number_params'] = data['question_number'].map(question_number_params)

data['userID_theta'] = data['user_params'].apply(lambda x: x['theta'])
data['assessmentItemID_beta'] = data['item_params'].apply(lambda x: x['beta'])
data['KnowledgeTag_beta'] = data['tag_params'].apply(lambda x: x['beta'])
data['testId_beta'] = data['testid_params'].apply(lambda x: x['beta'])
data['test_cat_beta'] = data['testcat_params'].apply(lambda x: x['beta'])
data['question_number_beta'] = data['question_number_params'].apply(lambda x: x['beta'])

data.drop(['user_params', 'item_params', 'tag_params', 'testid_params', 'testcat_params', 'question_number_params'], axis=1, inplace=True)

In [None]:
data.to_csv('../data/traintest_v3.2.csv', index=False)