In [1]:
import pandas as pd
import numpy as np

# Processing data

In [3]:
# Loading data with pandas
df = pd.read_csv('choice_data.csv')

num_participants = df.participant.max() + 1
num_tasks = df.task.max() + 1

features = []
for participant in range(num_participants):
    print(participant)
    
    df_participant = df[(df['participant'] == participant)]
    
    for task in range(num_tasks):
        df_task = df_participant[(df_participant['task'] == task)]
        num_trials = df_task.trial.max() + 1
        
        m = np.array([50.0, 50.0])
        s = np.array([400.0, 400.0])
        reward_variance = np.array([64.0, 64.0])

        for trial in range(num_trials):
            df_trial = df_task[(df_task['trial'] == trial)]
            
            # store data
            features.append([
                m[0] - m[1],
                np.sqrt(s[0]) - np.sqrt(s[1]),
                (m[0] - m[1]) / (np.sqrt(s[0] + s[1]))
            ])
            
            c = df_trial.choice.item()
            r = df_trial.reward.item()
            
            # update parameters
            k = s[c] / (s[c] + reward_variance[c])
            err = r - m[c]
            m[c] = m[c] + k * err
            s[c] = s[c] - k * s[c]

features = np.array(features)
df['V'] = features[:, 0]    
df['RU'] = features[:, 1] 
df['TU'] = features[:, 2]
df = df[~df.forced_choice]

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59


# Predicting choice behavior with extracted features

In [4]:
from sklearn.linear_model import LogisticRegressionCV
from sklearn.model_selection import train_test_split

In [5]:
features = df[['V', 'RU', 'TU']]
features

Unnamed: 0,V,RU,TU
4,-40.571803,2.927462,-4.671593
9,-9.580969,2.927462,-1.103189
10,-13.683779,0.942959,-1.937451
11,-12.134972,1.520988,-1.808688
12,-13.124462,1.921507,-2.024360
...,...,...,...
143991,-9.553063,1.520988,-1.423861
143992,-10.486649,1.921507,-1.617495
143993,-9.006734,2.220019,-1.423747
143994,-9.476516,2.453570,-1.525928


In [7]:
# Initializing logistic regression 
regr = LogisticRegressionCV()

# Splitting the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(features, df['choice'], test_size=.1, random_state=42)
f'Train size: {len(X_train)}, test size: {len(X_test)}'

'Train size: 60480, test size: 6720'

In [8]:
# Fitting the model and evaluating performance
regr.fit(X_train, y_train)
f'Test R2 = {regr.score(X_test, y_test).round(2)}'

'Test R2 = 0.83'