In [1]:
from fastai.collab import *
from fastai.tabular.all import *
path = untar_data(URLs.ML_100k)
pd.options.display.max_rows = 7

In [5]:
ratings = pd.read_csv(path/'u.data', delimiter='\t', header=None,
                      names=['user','item','rating','timestamp'])
ratings.head()

Unnamed: 0,user,item,rating,timestamp
0,196,242,3,881250949
1,186,302,3,891717742
2,22,377,1,878887116
3,244,51,2,880606923
4,166,346,1,886397596


In [6]:
dls = CollabDataLoaders.from_df(ratings, item_name='item', bs=64)
dls.show_batch()

Unnamed: 0,user,item,rating
0,181,1011,1
1,326,205,4
2,8,210,4
3,71,134,3
4,479,1016,3
5,823,204,4
6,381,526,4
7,82,220,2
8,6,202,3
9,268,169,5


In [100]:
n_users = len(dls.classes['user'])
n_items = len(dls.classes['item'])


def predict(embedding, bias, model, y_range=(0, 5.5)):
    result = []
    for i in range(len(model.item_factors.weight)):
        items = model.item_factors.weight[i]
        res = (embedding * items).sum(dim = [0], keepdim= True)
        res += bias + model.item_bias.weight[i]
        result.append(sigmoid_range(res, *y_range))
    return result

In [48]:
class DotProductBias(Module):
    def __init__(self, n_users, n_items, n_factors, y_range=(0, 5.5)):
        self.user_factors = Embedding(n_users, n_factors)
        self.user_bias = Embedding(n_users, 1)
        
        self.item_factors = Embedding(n_items, n_factors)
        self.item_bias = Embedding(n_items, 1)
        self.y_range = y_range
        
    def forward(self, x):
        users = self.user_factors(x[:,0])
        items = self.item_factors(x[:,1])
        res = (users * items).sum(dim=1, keepdim=True)
        res += self.user_bias(x[:,0]) + self.item_bias(x[:,1])
        return sigmoid_range(res, *self.y_range)

In [58]:
model = DotProductBias(n_users, n_items, 50)

In [42]:
learn = Learner(dls, model, loss_func=MSELossFlat())
learn.fit_one_cycle(5, 5e-3)

epoch,train_loss,valid_loss,time
0,0.942658,0.946143,00:18
1,0.84819,0.869783,00:17
2,0.632458,0.87136,00:20
3,0.40501,0.896552,00:27
4,0.305068,0.903306,00:16


In [101]:
user_emb = model.user_factors.weight.detach().numpy()
movie_emb = model.item_factors.weight.detach().numpy()

In [102]:
user_emb

array([[ 0.00300561,  0.00545409,  0.01611459, ..., -0.00802541,
         0.00308865,  0.00795598],
       [ 0.00669236,  0.00808851,  0.01297864, ..., -0.00057372,
        -0.00163801,  0.01362812],
       [-0.01565048, -0.00688873, -0.01458677, ...,  0.00641689,
        -0.00631771,  0.00134724],
       ...,
       [ 0.01662381,  0.00019943,  0.01268094, ...,  0.00234189,
         0.01166631,  0.00349365],
       [-0.007628  ,  0.01005244,  0.01886861, ...,  0.00227216,
        -0.00410348,  0.00831523],
       [ 0.0010237 ,  0.00279218, -0.00380604, ..., -0.00625532,
         0.00921197, -0.00944525]], dtype=float32)

In [103]:
movie_emb

array([[-5.8677136e-03,  3.9618937e-03,  1.5583890e-05, ...,
         1.5084865e-02,  1.7483649e-03, -1.4155526e-02],
       [ 7.2811120e-03,  1.4047501e-02,  4.5355987e-03, ...,
         9.4377827e-03, -3.7961430e-04,  1.5529155e-04],
       [ 1.8577771e-02,  8.4566325e-03, -9.7586011e-04, ...,
         7.3408271e-04,  4.1882214e-03, -1.7212344e-02],
       ...,
       [-2.9755672e-03,  5.8503868e-04,  1.1957907e-02, ...,
         6.0175359e-03, -2.2901578e-03, -1.9396229e-02],
       [-4.7475412e-03, -1.2245819e-02,  8.7677287e-03, ...,
        -1.8009342e-02, -9.6471095e-03, -1.8564990e-02],
       [-4.9810829e-03, -9.7797695e-04, -2.1940868e-03, ...,
        -1.2025703e-02,  1.0768735e-02, -5.8419355e-03]], dtype=float32)

In [None]:
def create_matrix(df):
     matrix = np.matrix(

In [None]:
def find_similar_user(user_ratings):
    """ Input user ratings of  1 x Items"""
    # compute cosine similarities:
   
    

In [50]:
model.item_factors.weight

Parameter containing:
tensor([[ 4.4804e-03, -6.5241e-03,  1.2256e-02,  ..., -1.1129e-02,
         -1.0121e-02, -1.9264e-02],
        [-5.7384e-03,  3.6114e-03, -7.6112e-04,  ..., -1.0173e-03,
          9.5150e-04, -1.1986e-02],
        [ 1.4777e-02,  9.5049e-03,  2.9941e-03,  ...,  4.5610e-03,
          5.8208e-03,  1.7355e-03],
        ...,
        [-2.4754e-03,  7.5255e-03,  4.3815e-03,  ...,  3.2147e-03,
         -1.3365e-03,  1.0554e-02],
        [-1.1059e-02, -2.6476e-03,  1.2577e-02,  ..., -8.3473e-03,
         -6.3037e-03, -4.5673e-05],
        [ 3.0410e-03,  6.8900e-03, -1.6294e-02,  ..., -2.3765e-03,
          4.4283e-03,  6.5018e-04]], requires_grad=True)

In [99]:
predict(test_emb, test_bias, model)

AttributeError: 'list' object has no attribute 'idxmax'

In [65]:
items = model.item_factors.weight[1]
items

tensor([ 0.0073,  0.0140,  0.0045,  0.0022,  0.0004,  0.0134,  0.0004,  0.0141,
         0.0106, -0.0015,  0.0021, -0.0028,  0.0034, -0.0046, -0.0010, -0.0039,
         0.0005,  0.0019, -0.0086,  0.0061,  0.0058, -0.0064, -0.0136, -0.0046,
        -0.0094,  0.0060,  0.0082, -0.0001,  0.0041, -0.0024, -0.0022,  0.0011,
        -0.0128,  0.0083,  0.0183,  0.0036,  0.0058,  0.0095, -0.0092,  0.0129,
         0.0025, -0.0079, -0.0078,  0.0120,  0.0074,  0.0011, -0.0115,  0.0094,
        -0.0004,  0.0002], grad_fn=<SelectBackward0>)

In [87]:
res = (test_emb * items).sum(dim = [0],keepdim= True)

In [88]:
res

tensor([0.0003], grad_fn=<SumBackward1>)

In [82]:
test_bias

tensor([0.0168], grad_fn=<SelectBackward0>)

In [89]:
res += test_bias

In [90]:
res

tensor([0.0171], grad_fn=<AddBackward0>)

In [91]:
res += model.item_bias.weight[1]

In [92]:
res

tensor([0.0175], grad_fn=<AddBackward0>)

In [104]:

user_emb = model.user_factors.weight.detach().numpy()
user_bias = model.user_bias.weight.detach().numpy()

item_emb = model.item_factors.weight.detach().numpy()
item_bias = model.item_bias.weight.detach().numpy()



In [137]:
item_bias = model.item_bias.weight.detach().numpy()
item_bias

array([[ 0.00235279],
       [ 0.00041885],
       [-0.00126595],
       ...,
       [ 0.00110694],
       [ 0.01301515],
       [ 0.00577711]], dtype=float32)

In [107]:
result = sorted([(i, np.dot(user_emb[1], item_emb[i])) for i in range(len(item_emb))], key=lambda x: x[1],reverse=True)[:10]

In [108]:
result

[(1366, 0.0019108081),
 (773, 0.0015904079),
 (774, 0.0015387635),
 (981, 0.0014481905),
 (1200, 0.0014476026),
 (668, 0.0014031582),
 (529, 0.0013899709),
 (934, 0.0013884301),
 (1615, 0.0013682997),
 (198, 0.0013165438)]

In [110]:
result = dict(result)
result = sorted(result, key=result.get, reverse=True)

In [111]:
result

[1366, 773, 774, 981, 1200, 668, 529, 934, 1615, 198]

In [146]:
input_range = (0,5.5)
def predict(prediction_embedding, prediction_bias, item_emb, item_bias, n_recommendations, y_range=input_range):
    """ Requires the input of a prediction embedding and prediction bias
        
        Outputs:
        - array of size n_recommendations containing the index of top predicted events
        
        
        For an existing user (id = idx) please input:
        - user_emb[idx] as the prediction_embedding
        - user_bias[idx] as the prediction_bias
    """
    recommentation_list = []
    for i in range(len(item_emb)):
        item_embedding = item_emb[i]
        item_bias_prediction = item_bias[i]
        
        # Making predictions for each event in the form (i, predictions)
        res = np.dot(prediction_embedding, item_embedding)
        res += prediction_bias 
        res += item_bias_prediction
        recommentation_list.append((i,sigmoid_range(torch.from_numpy(res), *y_range)))
        
    # sort into a list and get only the n_recommendations
    recommentation_list = sorted(recommentation_list, key=lambda x: x[1], reverse=True)[:n_recommendations]

    # getting only the keys
    recommentation_list = dict(recommentation_list)
    recommentation_list = sorted(recommentation_list, key=recommentation_list.get, reverse=True)
    return recommentation_list


    
#### SAMPLE USAGE ##############

user_idx = 0
predictions = predict(user_emb[user_idx], user_bias[user_idx], item_emb, item_bias, 10)
predictions

[494, 585, 1127, 1505, 1010, 1260, 1638, 1596, 718, 348]

In [144]:
prediction_embedding = user_emb[0]
prediction_bias = user_bias[0]
n_recommendations = 10

user_emb = model.user_factors.weight.detach().numpy()
user_bias = model.user_bias.weight.detach().numpy()

item_emb = model.item_factors.weight.detach().numpy()
item_bias = model.item_bias.weight.detach().numpy()


recommentation_list = []

for i in range(len(item_emb)):
    item_embedding = item_emb[i]
    item_bias_prediction = item_bias[i]

    # Making predictions for each event in the form (i, predictions)
    res = np.dot(prediction_embedding, item_embedding)
    res += prediction_bias 
    res += item_bias_prediction
    recommentation_list.append((i,sigmoid_range(torch.from_numpy(res), *input_range)))

# sort into a list and get only the n_recommendations
recommentation_list = sorted(recommentation_list, key=lambda x: x[1], reverse=True)[:n_recommendations]

# getting only the keys
recommentation_list = dict(recommentation_list)
recommentation_list = sorted(recommentation_list, key=recommentation_list.get, reverse=True)


In [145]:
recommentation_list


[494, 585, 1127, 1505, 1010, 1260, 1638, 1596, 718, 348]