# NARM+




In [1]:
#Importing functions.
import pandas
import csv
from collections import Counter, OrderedDict, defaultdict, namedtuple
import gc
import gensim
from gensim.models import Word2Vec
from gensim.models.doc2vec import Doc2Vec, TaggedDocument
import itertools
import math
import numpy as np
import pandas
import random
from sklearn.model_selection import train_test_split
import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

# PyTorch can run on CPU or on Nvidia GPU (video card) using CUDA
# This cell selects the GPU if one is available.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
if device == "cuda":
    torch.cuda.synchronize() 
    
# requirements:
# gensim version 3.6.0
# torch version 1.1.0
# numpy version newest

In [2]:
torch.backends.cudnn.version()

7501

In [3]:
# Create sessions
sample = pandas.read_pickle("./data/processedData.pkl")
sample['SESSION'] = pandas.to_datetime(sample['TIMESTAMP'],unit='s').dt.date

print(len(sample["USERID"].unique()))
print(len(sample["PRODUCTID"].unique()))


11832
67172


In [4]:
print("Average number of sessions per user")
print(sample.groupby('USERID')['SESSION'].nunique().mean())

print("Average number of clicks per session")
sample.groupby(['USERID', 'SESSION'])['ACTION'].count().mean()

Average number of sessions per user
5.765128465179175
Average number of clicks per session


11.017386715142274

In [5]:
userList = sample["USERID"].unique()
productList = sample["PRODUCTID"].unique()

In [6]:
# Here we first define a class that can map a product to an ID (p2i)
# and back (i2p).

class OrderedCounter(Counter, OrderedDict):
    """Counter that remembers the order elements are first seen"""
    def __repr__(self):
        return '%s(%r)' % (self.__class__.__name__,
                      OrderedDict(self))
    def __reduce__(self):
        return self.__class__, (OrderedDict(self),)


class Vocabulary:
    """A vocabulary, assigns IDs to tokens"""
    def __init__(self):
        self.freqs = OrderedCounter()
        self.users = []
        self.u2i = {}
        self.i2u = []
        self.p2i = {}
        self.i2p = []
        self.p2e = {}
        self.u2e = {}

    def count_product(self, t):
        self.freqs[t] += 1
    
    def count_user(self, t):
        self.users.append(t)

    def add_product(self, t):
        self.p2i[t] = str(len(self.p2i))
        self.i2p.append(t) 
        
    def add_user(self, t):
        self.u2i[t] = str(len(self.u2i))
        self.i2u.append(t)

    def build(self, min_freq=0):
#         self.add_product("<unk>")  # reserve 0 for <unk> (unknown products (products only occuring in test set))
#         self.add_user("<unk>")
        tok_freq = list(self.freqs.items())
        tok_freq.sort(key=lambda x: x[1], reverse=True)
        for tok, freq in tok_freq:
            if freq >= min_freq:
                self.add_product(tok)
        for user in self.users:
            self.add_user(user)

In [7]:
# This process should be deterministic and should have the same result 
# if run multiple times on the same data set.

def build_voc(userList, productList):
    v = Vocabulary()
    for product in productList:
        v.count_product(product)
    for user in userList:
        v.count_user(user)
    v.build()
    return v

v = build_voc(userList, productList)
print("Vocabulary size:", len(v.p2i))



Vocabulary size: 67172


In [8]:
# Create nested list of sessions and items per user
userBase = sample.groupby(['USERID', 'SESSION'])['PRODUCTID'].apply(list).groupby('USERID').apply(list)
print(userBase[1])

[[2268318, 2333346], [4365585, 230380], [2951368, 3108797], [2734026, 4152983, 266784, 266784, 1305059], [2087357, 3157558], [2087357, 1340922, 4954999], [3219016, 2028434, 3219016], [4954999, 818610, 271696]]


In [9]:
# More efficient create examples function
# A simple way to define a class is using namedtuple.
Example = namedtuple("Example", ["userID", "history", "inputs", "target"])

allSessions = []
allUsers = []

def f(userid, sessions, train):
    #print(sessions)
    sessions = [[v.p2i.get(t,0) for t in ses] for ses in sessions if len(ses) > 1]
    if train:
        train_session = sessions[-2]
        train_sessions, train_labels = process_seqs(train_session)
        objects_train = []
        for session, label in zip(train_sessions, train_labels):
            object_train = Example(userID = str(userid), history = 
                                   [item for sublist in sessions[:-2] for item in sublist], 
                                   inputs = session, target = [label])
            objects_train.append(object_train)
        return objects_train
    else:
        # store info for the pretrained embeddings
        allSessions.extend(sessions)
        userDoc = [t for ses in sessions for t in ses]
        allUsers.append(TaggedDocument(userDoc, [str(userid)]))
        
        test_session = sessions[-1]
        test_sessions, test_labels = process_seqs(test_session)
        objects_test = []
        for session, label in zip(test_sessions, test_labels):
            object_test= Example(userID = str(userid), history = 
                           [item for sublist in sessions[:-1] for item in sublist], 
                           inputs = session, 
                           target = [label])
            objects_test.append(object_test)
        return objects_test
    
def process_seqs(seq):
    out_seqs = []
    labs = []
    for i in range(1, len(seq)):
        tar = seq[-i]
        labs += [tar]
        out_seqs += [seq[:-i]]

    return out_seqs, labs

def createExamples(userBase):
    ''' Create training and testing set '''
    userBase = pandas.DataFrame(userBase)
    userBase.reset_index(level = 0, inplace = True)
    trainData = [x for l in 
                 userBase.apply(lambda x: f(x['USERID'], x['PRODUCTID'], True), axis = 1).tolist() for x in l
                 if x is not None]
    testData = [x for l in 
                userBase.apply(lambda x: f(x['USERID'], x['PRODUCTID'], False), axis = 1).tolist() for x in l
                if x is not None]
    return trainData, testData

trainData, testData = createExamples(userBase)
print(trainData[0])
print('')
print(testData[0])
    
    

Example(userID='1', history=['0', '1', '2', '3', '4', '5', '6', '7', '8', '8', '9', '10', '11', '10', '12', '13'], inputs=['14', '15'], target=['14'])

Example(userID='1', history=['0', '1', '2', '3', '4', '5', '6', '7', '8', '8', '9', '10', '11', '10', '12', '13', '14', '15', '14'], inputs=['13', '16'], target=['17'])


In [10]:
# HELPER FUNCTIONS
        
def get_minibatch(data, batch_size=25, shuffle=True):
    """Return minibatches, optional shuffling"""

    if shuffle:
#         print("Shuffling training data")
        random.shuffle(data)  # shuffle training data each epoch

    batch = []

    # yield minibatches
    for example in data:
        batch.append(example)

        if len(batch) == batch_size:
            yield batch
            batch = []

        # in case there is something left
    if len(batch) > 0:
        yield batch

def pad(tokens, length, pad_value=-1):
    """add padding 0s to a sequence to that it has the desired length"""
    return tokens + [pad_value] * (length - len(tokens))

def prepare_minibatch(mb, vocab):
    """
    Minibatch is a list of examples.
    This function converts products to IDs and returns
    torch tensors to be used as input/targets.
    """
    batch_size = len(mb)
    
    u = [vocab.u2i.get(example.userID,0) for example in mb]
    v = torch.LongTensor(u)
    v = v.to(device)
    # shape v (batch size, user id)
    
    # vocab returns 0 if the word is not there
    maxlen = max([len(ex.history) for ex in mb])
    w = [[pad([int(t) for t in ex.history], maxlen)] for ex in mb]
    w = torch.LongTensor(w)
    w = w.to(device)
    # shape w (batch size, max history length)

    # vocab returns 0 if the word is not there
    maxlen = max([len(ex.inputs) for ex in mb])
    x = [pad([int(t) for t in ex.inputs], maxlen) for ex in mb]
    x = torch.LongTensor(x)
    x = x.to(device)
    xlengths = torch.LongTensor([len(ex.inputs) for ex in mb])
    # shape x (batch size, max current session length)

    y = [[int(t)-1 for t in ex.target] for ex in mb]
    y = torch.LongTensor(y)
    y = y.to(device)
    
    gc.collect()

    return (v,w,(x,xlengths)), y

def prepare_minibatch_pre_trained(mb, vocab):
    """
    Minibatch is a list of examples.
    This function converts products to IDs and returns
    torch tensors to be used as input/targets.
    """
    batch_size = len(mb)
    embedding_dim = len(vocab.p2e['1'])
    
    u = [[vocab.u2e[example.userID]] for example in mb]
    v = torch.FloatTensor(u)
    v = v.to(device)
    # shape v (batch size, 1, embedding size)
    
    # vocab returns 0 if the word is not there
    maxlen = max([len(ex.history) for ex in mb])
    w = [pad(ex.history, maxlen, pad_value='0') for ex in mb]
    w = [[vocab.p2e[t] for t in ex] for ex in w]
    w = torch.FloatTensor(w)
    w = w.to(device)
    # shape w (batch size, max history length, embedding size)

    # vocab returns 0 if the word is not there
    maxlen = max([len(ex.inputs) for ex in mb])
    x = [pad(ex.inputs, maxlen, pad_value='0') for ex in mb]
    x = [[vocab.p2e[t] for t in ex] for ex in x]
    x = torch.FloatTensor(x)
    x = x.to(device)
    xlengths = torch.LongTensor([len(ex.inputs) for ex in mb])
    # shape x (batch size, max current session length, embedding size)

    y = [[int(t)-1 for t in ex.target] for ex in mb]
    y = torch.LongTensor(y)
    y = y.to(device)
        
    gc.collect()
    
    return (v,w,(x,xlengths)), y

def recall(model, data, prep_fn=prepare_minibatch, batch_fn=get_minibatch, at=20, batch_size=25, **kwargs):
    model.eval() # disable dropout
    targets = []
    predictions = []
    recall = 0
    
    vocab = model.vocab
    for batch in batch_fn(data, batch_size=batch_size):
        # convert the example input and targets to PyTorch tensors
        x, target = prep_fn(batch, vocab)
        # forward pass
        # get the output from the neural network for input x
        with torch.no_grad():
            output, alphas = model(x)
        # output shape: (batch size, sequence length, nr of products)
        prediction = torch.argsort(output, dim=1, descending=True)[:,:at].tolist()
    
        batch_targets = [i for l in target.tolist() for i in l]
        
        targets.extend(batch_targets)
        predictions.extend(prediction)
        gc.collect()
        
    print(predictions[:10],targets[:10])
    recall = sum(
        [1 if t in p else 0 for t,p in zip(targets,predictions)]
    )/len(targets)
    
    gc.collect()
    
    return recall, None

def mrr(model, data, prep_fn=prepare_minibatch, batch_fn=get_minibatch, at=5, **kwargs):
    model.eval() # disable dropout
    targets = []
    predictions = []
    mrr = 0
    
    vocab = model.vocab
    for batch in batch_fn(data, batch_size=batch_size):
        # convert the example input and targets to PyTorch tensors
        x, target = prep_fn(batch, vocab)
        # forward pass
        # get the output from the neural network for input x
        with torch.no_grad():
            output, alphas = model(x)
        # output shape: (batch size, sequence length, nr of products)
        prediction = torch.argsort(output, dim=1, descending=True)[:,:at].tolist()
    
        batch_targets = [i for l in target.tolist() for i in l]
        
        targets.extend(batch_targets)
        predictions.extend(predictions)
        gc.collect()
        
    print(predictions[:10],targets[:10])
        
    mrr = sum(
        [1/(p.index(t) + 1) if t in p else 0 for t,p in zip(targets,predictions)]
    )/len(targets)
    
    gc.collect()
    
    return mrr, None

In [15]:
# Custom NN

#Item embedding & User Embedding equal size
#

class NarmPlus(nn.Module):
    def __init__(self, 
                 item_embedding_dim, user_embedding_dim, hidden_size, output_dim, num_layers, 
                 vocab, pre_trained=True, batch_size=10,
                 activation_fn=nn.RReLU(), dropout=0.2):
        super(NarmPlus, self).__init__()
        # Store parameters
        self.item_embedding_dim = item_embedding_dim
        self.user_embedding_dim = user_embedding_dim
        self.hidden_size = hidden_size # hidden size is also user embedding dim
        self.output_dim = output_dim
        self.num_layers = num_layers
        self.pre_trained = pre_trained
        self.batch_size = batch_size
        # Shape of hidden_state: (num_layers * num_directions, batch, hidden_size)
        self.hidden_state_dim = (num_layers, batch_size, hidden_size)
        self.hidden_state_size = num_layers * hidden_size
        self.vocab = vocab
        num_users = len(vocab.u2i)
        num_items = len(vocab.p2i)
        
        # General part
        self.ActivationFn = activation_fn
        self.Softmax = nn.Softmax(dim=1)
        self.loss = self.top1loss
        self.dropout = nn.Dropout(p=dropout)
        
        # History part
        # embeddings are done seperately
        self.LatentItemHistory = nn.Linear(item_embedding_dim, user_embedding_dim)
        self.ProfileToHidden = nn.Linear(user_embedding_dim, self.hidden_state_size)
        
        # NARM Part
        # Input to the GRU is the item embedding: input_size = embedding_size
        # Hidden size is something we can experiment with
        self.Global = nn.GRU(item_embedding_dim, hidden_size, num_layers=num_layers, batch_first=True)
        self.Decoder = nn.Bilinear(item_embedding_dim, 2*hidden_size, output_dim)
        
        # Inner working of NARM attention part
        # Latent space for alpha: what value to pick?
        # I assume no bias, based on the paper
        latent_space = hidden_size
        self.A1 = nn.Linear(hidden_size,latent_space,bias=False)
        self.A2 = nn.Linear(hidden_size,latent_space,bias=False)
        self.v = nn.Linear(latent_space,1,bias=False)
        
        
    def forward(self,x):
        user, history, (inputs, input_lengths) = x
        # user shape (batch size, embedding size)
        # history shape (batch size, history length, embedding size)
        # inputs shape (batch size, sequence length, embedding size)
        if self.item_embedding_dim == self.user_embedding_dim:
            dense = history
        else:
            dense = self.ActivationFn(self.LatentItemHistory(history))
        # dense shape (batch size, history_length, hidden_state_size)
        alpha1 = self.Softmax(torch.matmul(dense,torch.transpose(user, 1, 2)))
        # alpha1 shape (batch size, history length, 1)
        profile = torch.sum(torch.mul(alpha1,dense),1)
        # profile shape (batch size, embedding size)
        if self.user_embedding_dim == self.hidden_size:
            h_0 = profile[None,:,:]
        else:
            h_0 = self.ActivationFn(self.ProfileToHidden(profile[None,:,:]))
        # h_0 needs to be of shape: (num layers, batch size, embedding size)

        inputs = nn.utils.rnn.pack_padded_sequence(inputs, input_lengths, batch_first=True, enforce_sorted=False)
        out_global, _ = self.Global(inputs, h_0)
        out_global, lengths = nn.utils.rnn.pad_packed_sequence(out_global, batch_first=True)
        # out shape (batch_size, seq_length, hidden_size), containing hidden state output for every step
        l = (lengths - 1).unsqueeze(1).to(device)
        l = torch.transpose(l.repeat(1,1,self.hidden_size),0,1)
        c_global = torch.gather(out_global, 1, l)
        # Shape of c_global and c_local should be: (batch size, 1, hidden_size)
        c_global = out_global[:,-1,:].unsqueeze(1)
        # c_global shape (batch size, 1, hidden_size)
        c_local = self.calculate_c_local(out_global, c_global, lengths)
        # Shape of c_global and c_local are: (batch size, 1, hidden_size)
        c = self.dropout(torch.cat((c_global, c_local), dim=2))
        # shape c (batch size, 1, hidden size * 2)
        # Decoder takes as inputs: embeddings for each item and c
        embeds = torch.FloatTensor([[self.vocab.p2e[str(i)] for i in range(len(self.vocab.p2i))]]).to(device)
        # shape embeds (1, number of products, embedding dim)
        batch_size = c.shape[0]
        sequence_length = c.shape[1]
        nr_of_products = embeds.shape[1]

        # Make embeds and c the same shape
        embeds = embeds.repeat(batch_size,1,1).contiguous()
        c = c.repeat(1,nr_of_products,1).contiguous()
        out = self.Decoder(embeds, c)[:,:,0]
        # out shape (batch size, number of products)
        output = self.Softmax(out)
        gc.collect()
        return output, None
    
    def calculate_c_local(self,hidden_states,c_global,lengths):
        c_local = c_global.new_empty(c_global.shape)
        lengths = lengths.tolist()
        batch_size = hidden_states.shape[0]
        for b,length in enumerate(lengths):
            alphas = torch.zeros(length, dtype=torch.float32).to(device)
            ht = c_global[b,0,:]
            A1 = self.A1(ht)
            for j in range(length):
                hj = hidden_states[b,j,:]
                A2 = self.A2(hj)
                alphas[j] = self.v(self.ActivationFn(A1 + A2))
            ct = torch.sum(alphas.unsqueeze(1) * hidden_states[b,:length,:], dim=0).unsqueeze(0)
            c_local[b] = ct
        return c_local          
    
    def top1loss(self, output, targets):        
        scores_for_targets = torch.gather(output, 1, targets)
        loss = torch.mean(torch.sigmoid(output - scores_for_targets) +
            torch.sigmoid(output**2))
        gc.collect()
        return loss
        

In [12]:
# function to train a model
name_extension = ''
def train_model(model, optimizer, num_epochs=10, 
                print_every=1, eval_every=1,
                batch_fn=get_minibatch, 
                prep_fn=prepare_minibatch_pre_trained,
                eval_fn=recall,
                batch_size=10, eval_batch_size=None,
                pre_trained=True
               ):
    """Train a model."""  
    train_loss = 0.
    start = time.time()
    best_eval = 0.
    best_iter = 0
    eval_iter = 0
    criterion=model.loss

    # store train loss and validation accuracy during training
    # so we can plot them afterwards
    losses = []
    accuracies = []  

    if eval_batch_size is None:
        eval_batch_size = batch_size
    
    vocab = model.vocab
    
    for epoch in range(num_epochs):

        for example in batch_fn(train_data, batch_size=batch_size): 
            # goes through the entire training data once, a.k.a. an epoch
            # do a garbage collect to make sure the memory is freed
            gc.collect()
            # forward pass, make sure the model is in train modus
            model.train()
            x, targets = prep_fn(example, vocab)

            output, alphas = model(x)
            # output shape (batch size, sequence length, nr of products): a score for each product at each time step
            # alphas are the alphas generated in the Narm part

            eval_iter += 1
                
            loss = criterion(output, targets)
            train_loss += float(loss.item())

            # backward pass
            # erase previous gradients
            model.zero_grad()

            # compute gradients
            loss.backward()

            # update weights - take a small step in the opposite dir of the gradient
            optimizer.step()
            
            if eval_iter % 1000 == 0:
                accuracy, _ = eval_fn(model, dev_data, batch_size=eval_batch_size,
                                         batch_fn=batch_fn, prep_fn=prep_fn)
                accuracies.append(accuracy)
                print("epoch %r: dev acc=%.4f" % (epoch + 1, accuracy))       

                # save best model parameters
                if accuracy > best_eval:
                    print("new highscore")
                    best_eval = accuracy
                    best_iter = epoch + 1
                    path = "{}{}.pt".format(model.__class__.__name__,name_extension)
                    ckpt = {
                      "state_dict": model.state_dict(),
                      "optimizer_state_dict": optimizer.state_dict(),
                      "best_eval": best_eval,
                      "best_iter": best_iter
                    }
                    torch.save(ckpt, path)

        if (epoch + 1) % print_every == 0:
            print("Epoch %r: loss=%.4f, time=%.2fs" % 
                 (epoch + 1, train_loss, time.time()-start))
            losses.append(train_loss)       
            train_loss = 0.
            
        if (epoch + 1) % eval_every == 0:
            accuracy, _ = eval_fn(model, dev_data, batch_size=eval_batch_size,
                                         batch_fn=batch_fn, prep_fn=prep_fn)
            accuracies.append(accuracy)
            print("epoch %r: dev acc=%.4f" % (epoch + 1, accuracy))       

            # save best model parameters
            if accuracy > best_eval:
                print("new highscore")
                best_eval = accuracy
                best_iter = epoch + 1
                path = "{}{}.pt".format(model.__class__.__name__,name_extension)
                ckpt = {
                  "state_dict": model.state_dict(),
                  "optimizer_state_dict": optimizer.state_dict(),
                  "best_eval": best_eval,
                  "best_iter": best_iter
                }
                torch.save(ckpt, path)
    
    # Done training
    # evaluate on train, dev, and test with best model
    print("Loading best model")
    path = "{}{}.pt".format(model.__class__.__name__,name_extension)        
    ckpt = torch.load(path)
    model.load_state_dict(ckpt["state_dict"])

    train_acc, _ = eval_fn(
        model, train_data, batch_size=eval_batch_size, 
        batch_fn=batch_fn, prep_fn=prep_fn)
    dev_acc, _ = eval_fn(
        model, dev_data, batch_size=eval_batch_size,
        batch_fn=batch_fn, prep_fn=prep_fn)
    test_acc, predictions = eval_fn(
        model, test_data, batch_size=eval_batch_size, 
        batch_fn=batch_fn, prep_fn=prep_fn)

    print("best model iter {:d}: "
          "train acc={:.4f}, dev acc={:.4f}, test acc={:.4f}".format(
              best_iter, train_acc, dev_acc, test_acc))

    return test_acc, predictions

In [13]:
def createSplits(data, k):
    folds = {}
    for i in range(k):
        dev = data[math.ceil(i*len(data)/k) : math.ceil((i+1)*len(data)/k)]
        train = [x for x in data if x not in dev]
        folds[i] = train,dev
    return folds

In [None]:
gc.collect()
if device == 'cuda':
    torch.cuda.empty_cache()

# One of the problems: too much memory is needed to run it on GPU, so manually set device='cpu'
# device='cpu'

train_data = trainData
test_data = testData
dev_data = testData[:100]

num_users = len(userList)
num_products = len(productList)

# item_embedding_dim, user_embedding_dim, hidden_size, output_dim, num_layers, vocab, 
model = NarmPlus(16,8,50,1,1,v,dropout=0.2,pre_trained=True, batch_size=5)
model.to(device)
optimizer = optim.Adam(model.parameters())

vocab = model.vocab
product_embeddings = Word2Vec(allSessions, size=model.item_embedding_dim, window=5, min_count=1)
user_embeddings = Doc2Vec(allUsers, vector_size=model.user_embedding_dim, window=5, min_count=1)
vocab.u2e = user_embeddings.wv
vocab.u2e.add(['0'], [np.zeros(model.user_embedding_dim)])
vocab.p2e = product_embeddings.wv
vocab.p2e.add(['0'], [np.zeros(model.item_embedding_dim)])

a, p = train_model(model, optimizer, eval_fn=recall, 
                   num_epochs=10,
                   batch_size=5)

[[601, 602, 600, 3096, 2172, 7431, 1905, 3246, 1484, 5316, 3345, 3099, 64993, 4504, 3133, 1503, 66452, 3279, 2374, 238], [601, 602, 600, 3096, 2172, 7431, 1905, 3246, 1484, 3345, 66452, 5316, 64993, 3099, 4504, 3133, 2374, 39777, 7761, 1503], [601, 3096, 2172, 1905, 3246, 7431, 600, 602, 1484, 5316, 3099, 3345, 1503, 4504, 3279, 238, 3133, 64993, 1553, 3510], [601, 602, 600, 3096, 2172, 7431, 1905, 3246, 3345, 1484, 66452, 5316, 64993, 3099, 4504, 3133, 39777, 2374, 1503, 7761], [2172, 3096, 5316, 3246, 7431, 1503, 1905, 3345, 1484, 238, 3099, 3133, 2374, 10864, 233, 735, 3279, 4504, 3844, 7507], [601, 602, 600, 7431, 3096, 66452, 2172, 1905, 3246, 3345, 64993, 61410, 1484, 5316, 39777, 61414, 4504, 3099, 3133, 2374], [601, 602, 600, 3096, 2172, 7431, 1905, 3246, 1484, 3345, 5316, 66452, 64993, 3099, 39777, 4504, 3133, 2374, 1503, 7761], [2172, 3096, 5316, 3246, 7431, 1503, 1905, 3345, 3133, 1484, 3099, 238, 2374, 735, 10864, 4504, 3279, 4704, 7507, 921], [601, 602, 600, 3096, 2172, 19

In [None]:
a = torch.tensor([i for i in range(20)]).view(4,5)
print(a)
print(a.view(-1))

# ix = torch.tensor([[[0]*5],[[1]*5],[[3]*5]])
# print(ix.shape)
# print(a.shape)
# print(a)
# torch.gather(a,1,ix)