In [1]:
# 1. magic for inline plot
# 2. magic to print version
# 3. magic so that the notebook will reload external python modules
# 4. magic to enable retina (high resolution) plots
# https://gist.github.com/minrk/3301035
%matplotlib inline
%load_ext watermark
%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'retina'

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

# change default style figure and font size
plt.rcParams['figure.figsize'] = 8, 6
plt.rcParams['font.size'] = 12

%watermark -a 'Ethen' -d -t -v -p numpy,pandas,sklearn,matplotlib

Ethen 2018-05-14 19:58:09 

CPython 3.6.4
IPython 6.3.1

numpy 1.14.3
pandas 0.22.0
sklearn 0.19.1
matplotlib 2.2.2


In [2]:
import hashlib
import json
import os
import shutil
import sys

import numpy as np

from sklearn.model_selection import ParameterSampler

from spotlight.datasets.movielens import get_movielens_dataset
from spotlight.cross_validation import user_based_train_test_split
from spotlight.sequence.implicit import ImplicitSequenceModel
from spotlight.sequence.representations import CNNNet
from spotlight.evaluation import sequence_mrr_score

  from ._conv import register_converters as _register_converters


In [3]:
max_sequence_length = 200
min_sequence_length = 20
step_size = 200
random_state = np.random.RandomState(100)

dataset = get_movielens_dataset('100K')
dataset

<Interactions dataset (944 users x 1683 items x 100000 interactions)>

In [4]:
CUDA = (os.environ.get('CUDA') is not None or
        shutil.which('nvidia-smi') is not None)

NUM_SAMPLES = 1

LEARNING_RATES = [1e-3, 1e-2, 5 * 1e-2, 1e-1]
LOSSES = ['adaptive_hinge']  # ['bpr', 'hinge', 'adaptive_hinge', 'pointwise']
BATCH_SIZE = [8, 16, 32, 256]
EMBEDDING_DIM = [8, 16, 32, 64, 128, 256]
N_ITER = list(range(5, 20))
L2 = [1e-6, 1e-5, 1e-4, 1e-3, 1e-2, 0.0]

In [5]:
def sample_lstm_hyperparameters(random_state, num):

    space = {
        'n_iter': N_ITER,
        'batch_size': BATCH_SIZE,
        'l2': L2,
        'learning_rate': LEARNING_RATES,
        'loss': LOSSES,
        'embedding_dim': EMBEDDING_DIM
    }

    sampler = ParameterSampler(space,
                               n_iter=num,
                               random_state=random_state)
    for params in sampler:
        yield params

In [6]:
def evaluate_lstm_model(hyperparameters, train, test, validation, random_state):

    h = hyperparameters

    model = ImplicitSequenceModel(loss=h['loss'],
                                  representation='lstm',
                                  batch_size=h['batch_size'],
                                  learning_rate=h['learning_rate'],
                                  l2=h['l2'],
                                  n_iter=h['n_iter'],
                                  use_cuda=CUDA,
                                  random_state=random_state)

    model.fit(train, verbose=True)

    test_mrr = sequence_mrr_score(model, test)
    val_mrr = sequence_mrr_score(model, validation)

    return model, test_mrr, val_mrr

In [7]:
train, rest = user_based_train_test_split(dataset, test_percentage = 0.2,
                                          random_state=random_state)
test, validation = user_based_train_test_split(rest,
                                               test_percentage=0.5,
                                               random_state=random_state)

In [8]:
from spotlight.evaluation import mrr_score
from spotlight.factorization.implicit import ImplicitFactorizationModel

model = ImplicitFactorizationModel(n_iter=7,
                                   loss='adaptive_hinge')
model.fit(train)

val_mrr = mrr_score(model, validation)
test_mrr = mrr_score(model, test)
print('Test MRR {} val MRR {}'.format(
        test_mrr.mean(), val_mrr.mean()
    ))

  epoch_loss += loss.data[0]


Test MRR 0.031805849817807076 val MRR 0.035169679226644525


In [9]:
train_seq = train.to_sequence(max_sequence_length=max_sequence_length,
                          min_sequence_length=min_sequence_length,
                          step_size=step_size)
test_seq = test.to_sequence(max_sequence_length=max_sequence_length,
                        min_sequence_length=min_sequence_length,
                        step_size=step_size)
validation_seq = validation.to_sequence(max_sequence_length=max_sequence_length,
                                    min_sequence_length=min_sequence_length,
                                    step_size=step_size)
train_seq

<Sequence interactions dataset (855 sequences x 200 sequence length)>

In [10]:
eval_fnc, sample_fnc = (evaluate_lstm_model,
                        sample_lstm_hyperparameters)

for hyperparameters in sample_fnc(random_state, NUM_SAMPLES):
    print('Evaluating {}'.format(hyperparameters))

    (model, test_mrr, val_mrr) = eval_fnc(hyperparameters,
                                   train_seq,
                                   test_seq,
                                   validation_seq,
                                   random_state)

    print('Test MRR {} val MRR {}'.format(
        test_mrr.mean(), val_mrr.mean()
    ))

Evaluating {'n_iter': 19, 'loss': 'adaptive_hinge', 'learning_rate': 0.1, 'l2': 1e-05, 'embedding_dim': 128, 'batch_size': 256}


  epoch_loss += loss.data[0]


Epoch 0: loss 1.2338563203811646
Epoch 1: loss 1.0411996841430664
Epoch 2: loss 1.0209192037582397
Epoch 3: loss 0.9970923662185669
Epoch 4: loss 0.9197668433189392
Epoch 5: loss 0.8312190175056458
Epoch 6: loss 0.782036542892456
Epoch 7: loss 0.7287655472755432
Epoch 8: loss 0.6983942985534668
Epoch 9: loss 0.6871305108070374
Epoch 10: loss 0.6732932329177856
Epoch 11: loss 0.649735689163208
Epoch 12: loss 0.6486098766326904
Epoch 13: loss 0.640937328338623
Epoch 14: loss 0.621277391910553
Epoch 15: loss 0.61250239610672
Epoch 16: loss 0.6008219122886658
Epoch 17: loss 0.6032966375350952
Epoch 18: loss 0.5985197424888611
Test MRR 0.04692862489922453 val MRR 0.054846583599892015


## From Scratch

In [11]:
from scipy.stats import rankdata

sequences = test_seq.sequences[:, :-1]
targets = test_seq.sequences[:, -1]
print('sequences shape:', sequences.shape)
print('targets shape:', targets.shape)

sequences shape: (127, 199)
targets shape: (127,)


In [12]:
# sequence_mrr_score
FLOAT_MAX = np.finfo(np.float32).max

mrrs = []
for i in range(sequences.shape[0]):
    # negate the score to sort in decreasing order later
    predictions = -model.predict(sequences[i])
    # exclude seqeuences that already occurred
    predictions[sequences[i]] = FLOAT_MAX
    mrr = (1.0 / rankdata(predictions))[targets[i]]
    mrrs.append(mrr)

mrrs = np.array(mrrs)
mrrs.mean()

0.057476799713576686

In [13]:
def generate_sequence(user_ids, item_ids, indices, max_sequence_len, step_size):
    """
    
    Returns user_id along with the corresponding item_ids
    """
    for i in range(len(indices)):
        start = indices[i]
        if i >= len(indices) - 1:
            stop = None
        else:
            stop = indices[i + 1]
        
        tensor = item_ids[start:stop]
        for seq in sliding_window(tensor, max_sequence_len, step_size):
            yield user_ids[i], seq
        
def sliding_window(tensor, window_size, step_size):
    for i in range(len(tensor), 0, -step_size):
        yield tensor[max(i - window_size, 0):i]

In [14]:
step_size = max_sequence_length

# sort first by user id, then by timestamp
sort_indices = np.lexsort((train.timestamps, train.user_ids))
user_ids = train.user_ids[sort_indices]
item_ids = train.item_ids[sort_indices]
user_ids, indices, counts = np.unique(user_ids, return_index = True, return_counts = True)

num_subsequences = int(np.ceil(counts / step_size).sum())
sequences = np.zeros((num_subsequences, max_sequence_length), dtype = np.int)
sequence_users = np.empty(num_subsequences, dtype = np.int)

generated_seq = generate_sequence(
    user_ids, item_ids, indices, max_sequence_length, step_size)
for i, (user_id, seq) in enumerate(generated_seq):
    # perform pre-zero-padding while assigning the sub-sequences
    sequences[i, -len(seq):] = seq
    sequence_users[i] = user_id

print(sequences.shape)
if min_sequence_length is not None:
    long_enough = sequences[:, -min_sequence_length] != 0
    sequences = sequences[long_enough]
    sequence_users = sequence_users[long_enough]

print(sequences.shape)
sequences

(877, 200)
(855, 200)


array([[214, 182,  48, ...,   5,  74, 102],
       [  0,   0,   0, ...,  26,  89,   8],
       [  0,   0,   0, ..., 320, 317, 181],
       ...,
       [  0,   0,   0, ..., 879, 355, 316],
       [  0,   0,   0, ...,  95,  31, 662],
       [  0,   0,   0, ..., 230, 228, 234]])

The default initialization methods for layers is determined by the reset_parameters method

In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable
from spotlight.layers import ScaledEmbedding, ZeroEmbedding

PADDING_IDX = 0

def shuffle(array, random_state = 1234):
    rstate = np.random.RandomState(random_state)
    shuffle_indices = np.arange(array.shape[0])
    rstate.shuffle(shuffle_indices)
    return array[shuffle_indices]

def minibatch(array, batch_size = 128):
    for i in range(0, len(array), batch_size):
        yield array[i:i + batch_size]

In [16]:
# create a sequence interactions
# train.num_items, sequences, sequence_users
num_items = train.num_items

# 1. shuffle
seqs = shuffle(sequences)

# 2. convert to gpu if cuda is available
sequence_tensor = torch.from_numpy(seqs)

# 3. generate mini-batches, size of [batch size, max_sequence_length]
sequence_batch = next(minibatch(sequence_tensor, batch_size = 2))
sequence_var = Variable(sequence_batch)
sequence_var

tensor([[    0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,     0,
             0,     0,     0,     0,     0,     0,     0,     0,

Understanding the behavior of padding in torch.

In [17]:
inputs = torch.randn(1, 2, 4)
F.pad(inputs.unsqueeze(3), (0, 0, 1, 0)).size()

torch.Size([1, 2, 5, 1])

In [18]:
inputs = torch.randn(1, 2, 4)
F.pad(inputs, (0, 0, 0, 1))

tensor([[[ 0.8845, -1.1054,  0.1424, -0.3206],
         [ 0.1817,  0.5613, -0.8864, -0.2151],
         [ 0.0000,  0.0000,  0.0000,  0.0000]]])

In [21]:
from representations import LSTMNet

torch.manual_seed(1234)
net = LSTMNet(num_items)

# perform padding with zeros from the left on the
# [max_sequence_length] dimension
sequence_embeddings1 = net.item_embedding(sequence_var)
sequence_embeddings1 = F.pad(sequence_embeddings1, (0, 0, 1, 0))
print(sequence_embeddings1.size())
print(sequence_embeddings1)

torch.Size([2, 201, 32])
tensor([[[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         ...,
         [ 0.0169,  0.0165,  0.0149,  ..., -0.0250, -0.0165, -0.0360],
         [ 0.0460, -0.0251, -0.0332,  ...,  0.0203,  0.0172,  0.0537],
         [ 0.0307,  0.0455,  0.0651,  ...,  0.0521,  0.0297,  0.0208]],

        [[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
         ...,
         [-0.0185,  0.0262, -0.0536,  ...,  0.0372,  0.0189, -0.0139],
         [ 0.0255,  0.0020,  0.0259,  ...,  0.0111,  0.0833, -0.0163],
         [ 0.0585,  0.0156, -0.0272,  ..., -0.0076, -0.0270,  0.0429]]])


In [25]:
# make embedding dimension the channel, i.e.
# size of [batch_size, embedding_dim, max_sequence_length]
sequence_embeddings = net.item_embedding(sequence_var).permute(0, 2, 1)
print(sequence_embeddings.size())

# add a trailing dimension of 1 to perform
# padding with zeros from the left on the
# max_sequence_length dimension; then remove the
# training dimension once we're done
sequence_embeddings = sequence_embeddings.unsqueeze(3)
sequence_embeddings = F.pad(sequence_embeddings, (0, 0, 1, 0)).squeeze(3)
sequence_embeddings = sequence_embeddings.permute(0, 2, 1)
print(sequence_embeddings.size())

user_representations, _ = net.lstm(sequence_embeddings)
user_representations = user_representations.permute(0, 2, 1)
print(user_representations.size())

# user_representations[:, :, :-1], user_representations[:, :, -1]

torch.Size([2, 32, 200])
torch.Size([2, 201, 32])
torch.Size([2, 32, 201])


In [36]:
target_embedding = net.item_embedding(sequence_var).permute(0, 2, 1)
target_bias = net.item_biases(sequence_var).squeeze()
dot = (user_representations[:, :, :-1] * target_embedding).sum(dim = 1)
output = dot + target_bias

??  SESSION-PARALLEL MINI-BATCHES