In [1]:
!git clone https://github.com/maciejkula/spotlight.git

Cloning into 'spotlight'...
remote: Enumerating objects: 3396, done.[K
remote: Total 3396 (delta 0), reused 0 (delta 0), pack-reused 3396[K
Receiving objects: 100% (3396/3396), 9.01 MiB | 1000.00 KiB/s, done.
Resolving deltas: 100% (2234/2234), done.


In [2]:
!cd spotlight/ && pip install .

Processing /content/spotlight
Building wheels for collected packages: spotlight
  Building wheel for spotlight (setup.py) ... [?25l[?25hdone
  Created wheel for spotlight: filename=spotlight-0.1.6-cp36-none-any.whl size=33921 sha256=8778d5381b784d23e942ba12564d522588e9e8eceeb70135358d0b89873f87cd
  Stored in directory: /tmp/pip-ephem-wheel-cache-imu1oazg/wheels/aa/35/94/c1f256fcf5e8f90a60a6733fdcc982ecbb2d249e1868cefb98
Successfully built spotlight
Installing collected packages: spotlight
Successfully installed spotlight-0.1.6


In [3]:
import hashlib
import json
import os
import shutil
import sys
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

from sklearn.model_selection import ParameterSampler
from spotlight.datasets.movielens import get_movielens_dataset
from spotlight.cross_validation import user_based_train_test_split
from spotlight.sequence.implicit import ImplicitSequenceModel
from spotlight.sequence.representations import LSTMNet
from spotlight.evaluation import sequence_mrr_score

In [5]:
class GRU(nn.Module):
    def __init__(self, num_items, embedding_dim=32):
        super().__init__()

        self.embedding_dim = embedding_dim
        self.item_embeddings = nn.Embedding(num_items, embedding_dim)

        self.item_biases = nn.Embedding(num_items, 1)

        self.gru = nn.GRU(batch_first=True,
                          input_size=embedding_dim,
                          hidden_size=embedding_dim,
                          num_layers=3,
                          dropout=0.5)

    def user_representation(self, item_sequences):
        # Make the embedding dimension the channel dimension
        embeds = self.item_embeddings(item_sequences).permute(0, 2, 1)
        # Add a trailing dimension of 1
        embeds = embeds.unsqueeze(3)
        # Pad it with zeros from left
        embeds = F.pad(embeds, (0, 0, 1, 0)).squeeze(3)
        embeds = embeds.permute(0, 2, 1)

        user_representations, _ = self.gru(embeds)
        user_representations = user_representations.permute(0, 2, 1)

        return user_representations[..., :-1], user_representations[..., -1]

    def forward(self, user_representations, targets):
        target_embedding = (self.item_embeddings(targets)
                            .permute(0, 2, 1)
                            .squeeze())
        target_bias = self.item_biases(targets).squeeze()

        dot = ((user_representations * target_embedding)
               .sum(1)
               .squeeze())

        return dot + target_bias

In [10]:
max_sequence_length = 7
min_sequence_length = None
step_size = 1
random_state = np.random.RandomState(100)

dataset = get_movielens_dataset('1M')

train, rest = user_based_train_test_split(dataset,
                                            random_state=random_state)
test, validation = user_based_train_test_split(rest,
                                                test_percentage=0.5,
                                                random_state=random_state)
train = train.to_sequence(max_sequence_length=max_sequence_length,
                            min_sequence_length=min_sequence_length,
                            step_size=step_size)
test = test.to_sequence(max_sequence_length=max_sequence_length,
                        min_sequence_length=min_sequence_length,
                        step_size=step_size)
validation = validation.to_sequence(max_sequence_length=max_sequence_length,
                                    min_sequence_length=min_sequence_length,
                                    step_size=step_size)

In [16]:
#net = LSTMNet(train.num_items,
#             embedding_dim=100)

net = GRU(train.num_items,
          embedding_dim=32)

model = ImplicitSequenceModel(loss='bpr',
                              representation=net,
                              batch_size=128,
                              learning_rate=5e-3,
                              l2=1e-5,
                              n_iter=15,
                              use_cuda=torch.cuda.is_available(),
                              random_state=random_state)

model.fit(train, verbose=True)

(sequence_mrr_score(model, test).mean(), sequence_mrr_score(model, validation).mean())

Epoch 0: loss 0.2017378723654667
Epoch 1: loss 0.19739369838977944
Epoch 2: loss 0.19701408993360622
Epoch 3: loss 0.19754002839445783
Epoch 4: loss 0.1973850923305688
Epoch 5: loss 0.19746466446712507
Epoch 6: loss 0.1973836047475279
Epoch 7: loss 0.19762609945662193
Epoch 8: loss 0.1972320837966937
Epoch 9: loss 0.1974905374377419
Epoch 10: loss 0.19742278506160257
Epoch 11: loss 0.19743469977797456
Epoch 12: loss 0.19734470864485856
Epoch 13: loss 0.19716169162454933
Epoch 14: loss 0.1973626666300219


(0.013560227251241503, 0.013796930220500242)