In [1]:
import tensorflow as tf
import numpy as np
import pandas as pd

class SessionDataset:

      def __init__(self, df):

          self.df = df.sort_values(by = ['session', 'timestamp']).reset_index(drop = True) # session (int) | timestamp (int) | item (string)
          self.offsets    = np.concatenate((np.zeros(1, dtype = np.int32), self.df.groupby('session').size().cumsum().values))
          self.n_sessions = len(self.offsets) - 1

          self.item_to_id = {item : i for i, item in enumerate(self.df.item.unique())}
          self.id_to_item = {i : item for i, item in self.item_to_id.items()}

          self.n_items = len(self.item_to_id)
          self.item_to_one_hot = {item : tf.one_hot(self.item_to_id[item], depth = self.n_items) for item in self.item_to_id.keys()}

      def extract_session(self, i, one_hot_encoded = True):

          session = self.df[self.offsets[i]:self.offsets[i+1]].copy()
          if one_hot_encoded:
              session.loc[:, 'item'] = session.item.apply(lambda x : self.item_to_one_hot[x])
          return session.item.values.tolist()


In [297]:
# y_true = (BATCH_SIZE,)                integer indexes of target items (ground truths)
# y_pred = (BATCH_SIZE, 1, n_classes)   next item scores for each item in the batch

def BPR(y_true, y_pred):
    to_lookup = tf.argmax(y_true, axis = 1)
    scores = tf.transpose(tf.nn.embedding_lookup(tf.transpose(y_pred), to_lookup))
    res = -tf.reduce_sum(tf.math.log(tf.nn.sigmoid(tf.linalg.diag_part(scores) - scores) + 1E-10), axis = 1)
    return tf.reduce_sum(res)

def TOP1(y_true, y_pred):
    to_lookup = tf.argmax(y_true, axis = 1)
    scores = tf.transpose(tf.nn.embedding_lookup(tf.transpose(y_pred), to_lookup))
    #scores = tf.transpose(tf.matmul(y_true, tf.transpose(y_pred)))
    diag_scores = tf.linalg.diag_part(scores)
    res = tf.reduce_sum(tf.nn.sigmoid(scores - diag_scores) + tf.nn.sigmoid(tf.square(scores)) - tf.nn.sigmoid(tf.square(diag_scores)), axis = 1)
    return tf.reduce_sum(res)

In [2]:
def TOP1(y_true, y_pred):
    _y_pred = tf.expand_dims(y_pred, axis = -1)  # (BATCH_SIZE, n_classes) ---> (BATCH_SIZE, n_classes, 1) 
    mat = tf.matmul(tf.expand_dims(tf.ones_like(y_true), -1), tf.expand_dims(y_true, axis = 1)) # (BATCH_SIZE, n_classes, 1) x (BATCH_SIZE, 1, n_classes) --> (BATCH_SIZE, n_classes, n_classes)
    score_diffs = tf.matmul(mat, _y_pred) # (BATCH_SIZE, n_classes, n_classes) x (BATCH_SIZE, n_classes, 1) --> (BATCH_SIZE, n_classes, 1)
    score_diffs = tf.squeeze(score_diffs - _y_pred, -1) # (BATCH_SIZE, n_classes)
    loss_by_sample = tf.reduce_sum(tf.nn.sigmoid(tf.square(y_pred)), axis = -1) + \
                      tf.reduce_sum(tf.sigmoid(-score_diffs), axis = -1) + \
                    -tf.squeeze(tf.squeeze(tf.nn.sigmoid(tf.square(tf.matmul(tf.expand_dims(y_true, 1), _y_pred))), -1), -1)
    return tf.reduce_sum(loss_by_sample)

def BPR(y_true, y_pred):  # both inputs have shape (BATCH_SIZE, n_classes)
    _y_pred = tf.expand_dims(y_pred, axis = -1)  # (BATCH_SIZE, n_classes, 1) 
    mat = tf.matmul(tf.expand_dims(tf.ones_like(y_true), -1), tf.expand_dims(y_true, axis = 1)) # (BATCH_SIZE, n_classes, 1) x (BATCH_SIZE, 1, n_classes) = (BATCH_SIZE, n_classes, n_classes)
    score_diffs = tf.matmul(mat, _y_pred) # (BATCH_SIZE, n_classes, n_classes) x (BATCH_SIZE, n_classes, 1) = (BATCH_SIZE, n_classes, 1)
    score_diffs = tf.squeeze(score_diffs - _y_pred, -1) # (BATCH_SIZE, n_classes)
    return -tf.reduce_sum(tf.math.log(tf.nn.sigmoid(score_diffs)))

In [18]:
class Gru4Rec:

    def __init__(self, n_classes, n_layers = 1, n_hidden = 64, loss = TOP1, batch_size = 16):

        self.n_classes  = n_classes   # = number of items

        self.n_layers = n_layers  # number of stacked GRU layers
        self.n_hidden = n_hidden  # dimension of GRU cell's hidden state
        self.loss     = loss
        self.batch_size = batch_size

        self.model = self.build_model()

    def build_model(self):

        model = tf.keras.models.Sequential()
        for i in range(self.n_layers):
            model.add(tf.keras.layers.GRU(name = 'GRU_{}'.format(i+1),
                                          units      = self.n_hidden, 
                                          activation = 'relu', 
                                          stateful   = True,
                                          return_sequences = (i < self.n_layers - 1)))
        model.add(tf.keras.layers.Dense(units = self.n_classes, activation = 'linear'))

        top3accuracy = lambda y_true, y_pred: tf.keras.metrics.top_k_categorical_accuracy(y_true, y_pred, k = 3)
        top3accuracy.__name__ = 'top3accuracy'
        model.compile(loss = self.loss, optimizer = 'adam', metrics = ['accuracy', top3accuracy])

        model.build(input_shape = (self.batch_size, 1, self.n_classes))
        print(model.summary())

        return model

    def _reset_hidden(self, i):

        for nl, layer in enumerate(self.model.layers):   # session has change: reset related hidden state
            if layer.name.startswith('GRU_') and layer.states[0] is not None:
                hidden_updated = layer.states[0].numpy()
                hidden_updated[i, :] = 0.
                self.model.layers[nl].reset_states(hidden_updated)

    def train_batch_generator(self, dataset):  # session | item | timestamp

      assert dataset.n_sessions > self.batch_size, "Training set is too small"
      ixs = np.arange(dataset.n_sessions)

      stacks = [[]] * self.batch_size
      next_session_id = 0

      X, y = np.empty(shape = (self.batch_size, 1, self.n_classes)), np.empty(shape = (self.batch_size, self.n_classes))
      X[:], y[:] = None, None
      while True:
          for i in range(self.batch_size):
              # 1. If stack i has only one element: change session
              if len(stacks[i]) <= 1:
                  if next_session_id >= dataset.n_sessions: # no more sessions available: shuffle sessions and restart
                      np.random.shuffle(ixs)
                      next_session_id = 0
                  while not len(stacks[i]) >= 2:   # ignore sessions with only one element
                      stacks[i] = dataset.extract_session(ixs[next_session_id])[::-1]
                      next_session_id += 1
                  self._reset_hidden(i)
              # 2. Stack i is now valid: set input + target variables
              X[i, 0] = stacks[i].pop()
              y[i]    = stacks[i][-1]

          yield tf.constant(X, dtype = tf.float32), tf.constant(y, dtype = tf.float32)

    def fit(self, dataset, steps_per_epoch = 10000, epochs = 5):

        checkpoint = tf.keras.callbacks.ModelCheckpoint(filepath = "gru-chkpt-{epoch:02d}.hdf5")
        self.model.fit_generator(generator       = self.train_batch_generator(dataset), 
                                 steps_per_epoch = steps_per_epoch, 
                                 epochs          = epochs, 
                                 callbacks       = [checkpoint], 
                                 shuffle         = False)

    def get_final_hidden_states(self, dataset):

        final_states = np.empty(shape = (dataset.n_sessions, self.n_layers, self.n_hidden))
        final_states[:] = None
        done = [False] * dataset.n_sessions

        stacks = [dataset.extract_session(i)[::-1] for i in range(self.batch_size)]   # events are in reverse time order
        next_session_id = self.batch_size
        batch_idx_to_session = np.arange(self.batch_size)
        X = np.empty(shape = (self.batch_size, 1, self.n_classes))

        self.model.reset_states()

        n_done = 0
        while n_done < dataset.n_sessions:
            for i in range(self.batch_size):
                while len(stacks[i]) == 1:
                    if not done[batch_idx_to_session[i]]:
                        final_states[batch_idx_to_session[i], :] = np.array([layer.states[0][i, :] for layer in self.model.layers if layer.name.startswith('GRU_')])
                        done[batch_idx_to_session[i]] = True
                        n_done += 1
                        if n_done % 100 == 0:
                            print("{} / {}".format(n_done, dataset.n_sessions))
                    if next_session_id >= dataset.n_sessions: # restart from the beginning
                        next_session_id = 0
                    stacks[i] = dataset.extract_session(next_session_id)[::-1]
                    batch_idx_to_session[i] = next_session_id
                    next_session_id += 1
                    self._reset_hidden(i)   # session has changes --> reset corresponding hidden state
                X[i, 0] = stacks[i].pop()

            _ = self.model.predict(X)   # hidden states get updated
            
        return final_states
        

In [17]:
import pandas as pd
df = pd.read_csv("workouts_clean_2.csv").sort_values(by = ['session', 'timestamp']).reset_index(drop = True)
offsets = np.concatenate((np.zeros(1, dtype = np.int32), df.groupby('session').size().cumsum().values))

dataset_train = SessionDataset(df.iloc[~df.index.isin(offsets[1:] - 1)])    # training set: remove last element from each session

X_test = df.iloc[offsets[1:] - 2][['session', 'item']].sort_values(by = ['session']).reset_index(drop = True)
y_test = df.iloc[offsets[1:] - 1][['session', 'item']].sort_values(by = ['session']).reset_index(drop = True)

In [19]:
g4r = Gru4Rec(n_classes = dataset_train.n_items)
g4r.fit(dataset_train)

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
GRU_1 (GRU)                  multiple                  21888     
_________________________________________________________________
dense_2 (Dense)              multiple                  3120      
Total params: 25,008
Trainable params: 25,008
Non-trainable params: 0
_________________________________________________________________
None
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [20]:
final_states = g4r.get_final_hidden_states(dataset_train)
np.save('final_states.npy', final_states, allow_pickle = False)

100 / 1082
200 / 1082
300 / 1082
400 / 1082
500 / 1082
600 / 1082
700 / 1082
800 / 1082
900 / 1082
1000 / 1082


In [21]:

final_states = np.load('final_states.npy')

g4r.model.reset_states()

rem = dataset_train.n_sessions % g4r.batch_size
if rem > 0:
  X_test = pd.concat((X_test, X_test[:(g4r.batch_size - rem)]), axis = 0)

y_pred = np.empty(shape = (dataset_train.n_sessions, g4r.n_classes))
y_pred[:] = None
X = np.empty(shape = (g4r.batch_size, 1, g4r.n_classes))
for batch_id in range(dataset_train.n_sessions // g4r.batch_size):
    X[:] = None
    for i in range(g4r.batch_size):
        X[i, :] = dataset_train.item_to_one_hot[X_test.iloc[batch_id * g4r.batch_size + i]['item']]
    nlg = 0
    for nl, layer in enumerate(g4r.model.layers):
        if layer.name.startswith('GRU_'):
            g4r.model.layers[nl].reset_states(final_states[batch_id * g4r.batch_size : (batch_id + 1) * g4r.batch_size, nlg, :])
            nlg += 1
    y_pred[batch_id * g4r.batch_size : (batch_id + 1) * g4r.batch_size, :] = g4r.model.predict(X)[:g4r.batch_size]

y_pred = tf.constant(y_pred[:dataset_train.n_sessions], dtype = tf.float32)

In [22]:
y_true = np.empty(shape = (dataset_train.n_sessions, dataset_train.n_items))
for i in range(y_true.shape[0]):
    y_true[i, :] = dataset_train.item_to_one_hot[y_test.item.values[i]]
y_true = tf.constant(y_true, dtype = tf.float32)

In [23]:
tf.reduce_sum(tf.keras.metrics.top_k_categorical_accuracy(y_true, y_pred, k = 3)) / y_true.shape[0]

<tf.Tensor: shape=(), dtype=float32, numpy=0.9537893>