In [2]:
import numpy as np
import scipy as sp
import scipy.sparse as sps
import matplotlib.pyplot as plt

URM_file = open('data/train.csv', 'r')

def rowSplit (rowString):
    split = rowString.split(',')
    split[0] = int(split[0])
    split[1] = int(split[1])
    result = tuple(split)
    return result

next(URM_file)

URM_tuples = []
for line in URM_file:
    URM_tuples.append(rowSplit(line))
    
URM_tuples[0:10]

playlist_list, track_list = zip(*URM_tuples)

playlist_list = list(playlist_list)
track_list = list(track_list)
ratings_list = np.ones(len(playlist_list))

URM_all = sps.coo_matrix((ratings_list, (playlist_list, track_list)))
URM_all = URM_all.tocsr()

from Notebooks_utils.data_splitter import train_test_holdout

URM_train, URM_test = train_test_holdout(URM_all, train_perc = 0.8)

In [3]:
num_factors = 10

n_playlists, n_tracks = URM_train.shape

In [5]:
import torch

playlist_factors = torch.nn.Embedding(num_embeddings = n_playlists, embedding_dim = num_factors)
track_factors = torch.nn.Embedding(num_embeddings = n_tracks, embedding_dim = num_factors)

In [6]:
playlist_factors

Embedding(50446, 10)

In [7]:
track_factors

Embedding(20635, 10)

In [8]:
layer_1 = torch.nn.Linear(in_features = num_factors, out_features = 1)

layer_1

Linear(in_features=10, out_features=1, bias=True)

In [9]:
activation_function = torch.nn.ReLU()

activation_function

ReLU()

In [11]:
from torch.autograd import Variable

playlist_index = [15]
track_index = [42]

track_index = torch.Tensor(track_index).type(torch.LongTensor)
playlist_index = torch.Tensor(playlist_index).type(torch.LongTensor)

playlist_index = Variable(playlist_index)
track_index = Variable(track_index)

current_playlist_factors = playlist_factors(playlist_index)
current_track_factors = track_factors(track_index)

element_wise_product = torch.mul(current_playlist_factors, current_track_factors)

In [12]:
current_playlist_factors

tensor([[-0.2705,  0.2640, -1.6621,  0.8074,  1.0417, -1.4686,  0.0150,  2.0636,
         -0.8746, -1.9344]], grad_fn=<EmbeddingBackward>)

In [13]:
current_track_factors

tensor([[ 0.5640,  0.5401, -0.3379,  0.1828,  1.9412, -1.9180, -0.9178,  1.3724,
          0.2269, -0.6731]], grad_fn=<EmbeddingBackward>)

In [14]:
element_wise_product

tensor([[-0.1526,  0.1426,  0.5615,  0.1476,  2.0222,  2.8168, -0.0138,  2.8322,
         -0.1984,  1.3021]], grad_fn=<MulBackward0>)

In [15]:
prediction = layer_1(element_wise_product)
prediction = activation_function(prediction)

prediction_numpy = prediction.detach().numpy()

print("Prediction is {}".format(prediction_numpy))

Prediction is [[0.]]


### Step 1 Create a Model python object

In [16]:
class MF_MSE_PyTorch_model(torch.nn.Module):

    def __init__(self, n_playlists, n_tracks, n_factors):

        super(MF_MSE_PyTorch_model, self).__init__()

        self.n_playlists = n_playlists
        self.n_tracks = n_tracks
        self.n_factors = n_factors

        self.playlist_factors = torch.nn.Embedding(num_embeddings = self.n_playlists, embedding_dim = self.n_factors)
        self.track_factors = torch.nn.Embedding(num_embeddings = self.n_tracks, embedding_dim = self.n_factors)

        self.layer_1 = torch.nn.Linear(in_features = self.n_factors, out_features = 1)

        self.activation_function = torch.nn.ReLU()



    def forward(self, playlist_coordinates, track_coordinates):

        current_playlist_factors = self.playlist_factors(playlist_coordinates)
        current_track_factors = self.track_factors(track_coordinates)

        prediction = torch.mul(current_playlist_factors, current_track_factors)

        prediction = self.layer_1(prediction)
        prediction = self.activation_function(prediction)

        return prediction



    def get_W(self):

        return self.playlist_factors.weight.detach().cpu().numpy()


    def get_H(self):

        return self.track_factors.weight.detach().cpu().numpy()

In [17]:
use_cuda = False

if use_cuda and torch.cuda.is_available():
    device = torch.device('cuda')
    print("MF_MSE_PyTorch: Using CUDA")
else:
    device = torch.device('cpu')
    print("MF_MSE_PyTorch: Using CPU")

MF_MSE_PyTorch: Using CPU


In [18]:
pyTorchModel = MF_MSE_PyTorch_model(n_playlists, n_tracks, num_factors).to(device)

In [19]:
lossFunction = torch.nn.MSELoss(size_average=False)



In [20]:
learning_rate = 1e-4

optimizer = torch.optim.Adagrad(pyTorchModel.parameters(), lr = learning_rate)

In [21]:
from torch.utils.data import Dataset
import numpy as np

class DatasetIterator_URM(Dataset):

    def __init__(self, URM):

        URM = URM.tocoo()

        self.n_data_points = URM.nnz

        self.playlist_track_coordinates = np.empty((self.n_data_points, 2))

        self.playlist_track_coordinates[:,0] = URM.row.copy()
        self.playlist_track_coordinates[:,1] = URM.col.copy()
        self.rating = URM.data.copy().astype(np.float)

        self.playlist_track_coordinates = torch.Tensor(self.playlist_track_coordinates).type(torch.LongTensor)
        self.rating = torch.Tensor(self.rating)





    def __getitem__(self, index):
        """
        Format is (row, col, data)
        :param index:
        :return:
        """

        return self.playlist_track_coordinates[index, :], self.rating[index]


    def __len__(self):

        return self.n_data_points

In [22]:
from torch.utils.data import DataLoader

batch_size = 200

dataset_iterator = DatasetIterator_URM(URM_train)

train_data_loader = DataLoader(dataset = dataset_iterator,
                   batch_size = batch_size,
                   shuffle = True,
                   #num_workers = 2,
                   )

In [40]:
for num_batch, (input_data, label) in enumerate(train_data_loader, 0):
    
    cumulative_loss = 0

    # On windows requires int64, on ubuntu int32
    #input_data_tensor = Variable(torch.from_numpy(np.asarray(input_data, dtype=np.int64))).to(self.device)
    input_data_tensor = Variable(input_data).to(device)

    label_tensor = Variable(label).to(device)


    playlist_coordinates = input_data_tensor[:,0]
    track_coordinates = input_data_tensor[:,1]

    # FORWARD pass
    prediction = pyTorchModel(playlist_coordinates, track_coordinates)

    # Pass prediction and label removing last empty dimension of prediction
    loss = lossFunction(prediction.view(-1), label_tensor)
    

    if num_batch % 100 == 0:
        
        print("Batch {} of {}, loss {:.4f}".format(num_batch, len(train_data_loader), loss.data.item()))
        
        #if num_batch == 2000:
        #    print("Interrupting train")
        #    break
    

    # BACKWARD pass
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

Batch 0 of 4847, loss 128.0904
Batch 100 of 4847, loss 133.2470
Batch 200 of 4847, loss 133.2471
Batch 300 of 4847, loss 126.5507
Batch 400 of 4847, loss 132.5936
Batch 500 of 4847, loss 131.2636
Batch 600 of 4847, loss 127.4965
Batch 700 of 4847, loss 126.3322
Batch 800 of 4847, loss 125.1275
Batch 900 of 4847, loss 131.3287
Batch 1000 of 4847, loss 128.0992
Batch 1100 of 4847, loss 133.8381
Batch 1200 of 4847, loss 132.2967
Batch 1300 of 4847, loss 130.2351
Batch 1400 of 4847, loss 125.2143
Batch 1500 of 4847, loss 130.6168
Batch 1600 of 4847, loss 136.3115
Batch 1700 of 4847, loss 126.7903
Batch 1800 of 4847, loss 131.0420
Batch 1900 of 4847, loss 133.6401
Batch 2000 of 4847, loss 131.8910
Batch 2100 of 4847, loss 127.1952
Batch 2200 of 4847, loss 126.9921
Batch 2300 of 4847, loss 136.7751
Batch 2400 of 4847, loss 129.1919
Batch 2500 of 4847, loss 130.7349
Batch 2600 of 4847, loss 127.8732
Batch 2700 of 4847, loss 127.1365
Batch 2800 of 4847, loss 122.8699
Batch 2900 of 4847, loss 1

In [41]:
W = pyTorchModel.get_W()
H = pyTorchModel.get_H()

In [42]:
W.shape

(50446, 10)

In [43]:
H.shape

(20635, 10)

In [44]:
class Recommender(object):
    
    def __init__(self, URM, W, H):
        self.URM = URM
        self.W = W
        self.H = H.T
        
    def recommend(self, playlist_id, at=None, exclude_seen=True, output=False):
        # compute the scores using the dot product
        playlist_factors = self.W[playlist_id]
        scores = playlist_factors.dot(self.H).ravel()

        if exclude_seen:
            scores = self.filter_seen(playlist_id, scores)

        # rank items
        ranking = scores.argsort()[::-1]
        
        # output for challenge
        if output:
            print("{}, {}".format(playlist_id, " ".join(ranking)))
        
        return ranking[:at]
    
    
    def filter_seen(self, playlist_id, scores):

        start_pos = self.URM.indptr[playlist_id]
        end_pos = self.URM.indptr[playlist_id+1]

        playlist = self.URM.indices[start_pos:end_pos]
        
        scores[playlist] = -np.inf

        return scores

In [45]:
recommender = Recommender(URM_train, W, H)

from Notebooks_utils.evaluation_function import evaluate_algorithm

evaluate_algorithm(URM_test, recommender, at=10)

Evaluated user 0 of 50446
Evaluated user 10000 of 50446
Evaluated user 20000 of 50446
Evaluated user 30000 of 50446
Evaluated user 40000 of 50446
Evaluated user 50000 of 50446
Recommender performance is: Precision = 0.0002, Recall = 0.0004, MAP = 0.0001


{'precision': 0.0002275456671512542,
 'recall': 0.00040667485390355376,
 'MAP': 0.00011600968423789306}