In [1]:
import os
os.chdir("/Users/cornederuijt/github/GCM/") # Adjust after construction of the package

import numpy as np
import pandas as pd
from scripts.clickmodel_fitters.clickdefinitionreader import ClickDefinition
from scripts.clickmodel_fitters.GCM import GCM
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import RMSprop
from copy import deepcopy

np.random.seed(1992)

In [2]:
# Define the model

list_size = 10
no_states = 7
click_states = np.zeros((no_states, list_size + 1))
click_states[3, :] = 1
abs_state = [(i, 6) for i in range(7)]
init_state = 3  # Equals the click state
batch_size = 10000
no_items = 100

var_dic = {
    'gamma': {
        'var_type': 'session',
        't0_fixed': 1,  # Always continues to evaluate the first item
        'pos_mat': np.vstack((np.zeros((2, no_states)),
                              np.array([0, 0, 1, 1, 0, 0, 0]),
                              np.array([0, 0, 1, 1, 0, 0, 0]),
                              np.zeros((3, no_states)))),
        'neg_mat': np.vstack((np.zeros((2, no_states)),
                              np.array([1, 1, 0, 0, 0, 0, 0]),
                              np.array([1, 1, 0, 0, 0, 0, 0]),
                              np.zeros((3, no_states)))),
        'fixed_mat': np.vstack((np.zeros((3, no_states)),
                                np.array([0, 0, 0, 0, 1, 1, 0]),
                                np.zeros((3, no_states))))
    },
    'phi_S': {
        'var_type': 'item',
        't0_fixed': 0,  # Never satisfied in the first state
        'pos_mat': np.vstack((np.zeros((3, no_states)),
                              np.array([0, 0, 0, 0, 1, 1, 0]),
                              np.zeros((3, no_states)))),
        'neg_mat': np.vstack((np.zeros((3, no_states)),
                              np.array([1, 1, 1, 1, 0, 0, 0]),
                              np.zeros((3, no_states)))),
        'fixed_mat': np.vstack((np.zeros((2, no_states)),
                                np.array([1, 1, 1, 1, 0, 0, 0]),
                                np.zeros((4, no_states))))
    },
    'phi_A': {
        'var_type': 'item',
        'pos_mat': np.vstack((np.zeros((2, no_states)),
                              np.array([0, 1, 0, 1, 0, 0, 0]),
                              np.array([0, 1, 0, 1, 0, 1, 0]),
                              np.zeros((3, no_states)))),
        'neg_mat': np.vstack((np.zeros((2, no_states)),
                              np.array([1, 0, 1, 0, 0, 0, 0]),
                              np.array([1, 0, 1, 0, 1, 0, 0]),
                              np.zeros((3, no_states))))
    }
}

model_def = ClickDefinition(click_states, init_state, list_size, no_states, batch_size, no_items, abs_state,
                            var_dic)

In [3]:
# Load data:
click_data = pd.read_csv("./data/small_example/simulation_res_train.csv", index_col=False)
prod_position = pd.read_csv("./data/small_example/simulation_item_props.csv", index_col=False)

    
# Ensure the order is correct:
click_data = click_data.sort_values(['user_id', 'session_count', 'item_order'])
    
# Add session index:
session_index = (click_data
                 .loc[:, ['user_id', 'session_count']] 
                 .drop_duplicates() 
                 .reset_index()
                )

session_index['session'] = session_index.index.to_numpy()

click_data = (click_data 
    .set_index(['user_id', 'session_count']) 
    .join(session_index
          .set_index(['user_id', 'session_count']),
          on=['user_id', 'session_count']) 
    .reset_index()
    .set_index('item')
    .join(prod_position
         .set_index('item'),
         on='item')
    .reset_index()
)

click_data

    

Unnamed: 0,item,user_id,session_count,item_order,click,attr,satis,eval,orig_list_id,index,session,X0,X1
0,42,0,0,1,0.0,0,0,1.0,0,0,0,-0.617880,-0.424150
1,66,0,0,2,0.0,0,0,1.0,0,0,0,-0.463804,-0.667938
2,89,0,0,3,0.0,0,0,1.0,0,0,0,-0.849402,1.813430
3,64,0,0,4,0.0,0,0,1.0,0,0,0,-1.875517,-1.026840
4,25,0,0,5,0.0,0,0,1.0,0,0,0,-0.908807,0.050236
...,...,...,...,...,...,...,...,...,...,...,...,...,...
250565,85,17999,2,6,0.0,0,0,0.0,35807,250560,25056,-0.362179,-0.737943
250566,3,17999,2,7,0.0,1,0,0.0,35807,250560,25056,0.136692,0.039374
250567,76,17999,2,8,0.0,1,0,0.0,35807,250560,25056,0.257214,-0.653226
250568,64,17999,2,9,0.0,0,0,0.0,35807,250560,25056,-1.875517,-1.026840


In [4]:
# Create the click matrix and item position matrix
click_mat = click_data.loc[:, ['session', 'item_order', 'click']] \
    .pivot(index='session', columns='item_order', values='click') \
    .to_numpy()

item_pos_mat = click_data.loc[:, ['session', 'item_order', 'item']] \
    .pivot(index='session', columns='item_order', values='item') \
    .to_numpy()

In [5]:
# Create feature matrix for phi_A, phi_S and gamma
item_feature_mat_A = (click_data.loc[:, ['item', 'X0', 'X1']] 
                                .drop_duplicates() 
                                .sort_values('item')
                                .to_numpy()
                     )
item_feature_mat_S = deepcopy(item_feature_mat_A)

n_sessions = click_data['session'].nunique()
gamma_feature_mat = np.ones((n_sessions, 1))

In [6]:
# Define the Keras models
model_phi_A = Sequential()
model_phi_A.add(Dense(1, input_dim=item_feature_mat_A.shape[1], activation='sigmoid', use_bias=False))
model_phi_A.compile(loss=GCM.pos_log_loss, optimizer=RMSprop())

model_phi_S = Sequential()
model_phi_S.add(Dense(1, input_dim=item_feature_mat_S.shape[1], activation='sigmoid', use_bias=False))
model_phi_S.compile(loss=GCM.pos_log_loss, optimizer=RMSprop())

model_gamma = Sequential()
model_gamma.add(Dense(1, input_dim=gamma_feature_mat.shape[1], activation='sigmoid', use_bias=False))
model_gamma.compile(loss=GCM.pos_log_loss, optimizer=RMSprop())


var_dic = {'phi_A': item_feature_mat_A, 'phi_S': item_feature_mat_S, 'gamma': gamma_feature_mat}
var_models = {'phi_A': model_phi_A, 'phi_S': model_phi_S, 'gamma': model_gamma}

In [7]:
# Run model
res = GCM.runEM(click_mat, var_dic, var_models, item_pos_mat, model_def, verbose=True)

Iteration: 0
Running E-step ...
Current conditional entropy:0.71579
Running M-step ...
Iteration: 1
Current norm: 9.58283
Running E-step ...
Current conditional entropy:0.67749
Running M-step ...
Iteration: 2
Current norm: 3.52685
Running E-step ...
Current conditional entropy:0.69458
Running M-step ...
Iteration: 3
Current norm: 3.36435
Running E-step ...
Current conditional entropy:0.67745
Running M-step ...
Iteration: 4
Current norm: 3.49011
Running E-step ...
Current conditional entropy:0.6422
Running M-step ...
Iteration: 5
Current norm: 4.10625
Running E-step ...
Current conditional entropy:0.57979
Running M-step ...
Iteration: 6
Current norm: 7.96356
Running E-step ...
Current conditional entropy:0.36115
Running M-step ...
Iteration: 7
Current norm: 8.53812
Running E-step ...
Current conditional entropy:0.74362
Running M-step ...
Iteration: 8
Current norm: 4.29083
Running E-step ...
Current conditional entropy:0.90908
Running M-step ...
Iteration: 9
Current norm: 3.86049
Running

In [8]:
# Entropy plot
(ggplot(res[2])
  
)

({'phi_A': <tensorflow.python.keras.engine.sequential.Sequential at 0x7f80dd76bc70>,
  'phi_S': <tensorflow.python.keras.engine.sequential.Sequential at 0x7f80c8fb9d00>,
  'gamma': <tensorflow.python.keras.engine.sequential.Sequential at 0x7f80c8783f70>},
 [{'phi_A': array([0.12052774, 0.8505674 , 0.80491   , 0.8917428 , 0.9927211 ,
          0.97811854, 0.9971267 , 0.99128675, 0.9930174 , 0.9969612 ,
          0.999     , 0.999     , 0.9985438 , 0.999     , 0.99854696,
          0.999     , 0.999     , 0.999     , 0.999     , 0.999     ,
          0.999     , 0.999     , 0.999     , 0.999     , 0.999     ,
          0.999     , 0.999     , 0.999     , 0.999     , 0.999     ,
          0.999     , 0.999     , 0.999     , 0.999     , 0.999     ,
          0.999     , 0.999     , 0.999     , 0.999     , 0.999     ,
          0.999     , 0.999     , 0.999     , 0.999     , 0.999     ,
          0.999     , 0.999     , 0.999     , 0.999     , 0.999     ,
          0.999     , 0.999     , 0