In [43]:
import pandas as pd
import scipy.sparse as sp
import numpy as np
from collections import defaultdict
import copy
import os
import argparse
import tensorflow as tf

In [2]:
 df = pd.read_csv('ratings.csv', sep=',', header=None,names=['user_id', 'item_id', 'rating', 'time'], index_col=False)

In [3]:
df = df[1:3000]

In [4]:
df

Unnamed: 0,user_id,item_id,rating,time
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
5,1,70,3.0,964982400
6,1,101,5.0,964980868
7,1,110,4.0,964982176
8,1,151,5.0,964984041
9,1,157,5.0,964984100
10,1,163,5.0,964983650


In [5]:
print('First pass')
print('\tnum_users = ' + str(len(df['user_id'].unique())))
print('\tnum_items = ' + str(len(df['item_id'].unique())))
print('\tdf_shape  = ' + str(df.shape))

First pass
	num_users = 20
	num_items = 1768
	df_shape  = (2999, 4)


In [6]:
user_counts = df['user_id'].value_counts()
print('Collected user counts...')
item_counts = df['item_id'].value_counts()
print('Collected item counts...')

Collected user counts...
Collected item counts...


In [7]:
# Filter based on user and item counts
user_min=2
item_min=2
df = df[df.apply(
        lambda x: user_counts[x['user_id']] >= user_min, axis=1)]
print('User filtering done...')
df = df[df.apply(
         lambda x: item_counts[x['item_id']] >= item_min, axis=1)]
print('Item filtering done...')

User filtering done...
Item filtering done...


In [8]:
df

Unnamed: 0,user_id,item_id,rating,time
1,1,3,4.0,964981247
2,1,6,4.0,964982224
3,1,47,5.0,964983815
4,1,50,5.0,964982931
5,1,70,3.0,964982400
7,1,110,4.0,964982176
8,1,151,5.0,964984041
10,1,163,5.0,964983650
11,1,216,5.0,964981208
12,1,223,3.0,964980985


In [9]:
print('Second pass')
print('\tnum_users = ' + str(len(df['user_id'].unique())))
print('\tnum_items = ' + str(len(df['item_id'].unique())))
print('\tdf_shape  = ' + str(df.shape))

Second pass
	num_users = 20
	num_items = 626
	df_shape  = (1857, 4)


### Normalize temporal values

In [10]:

print('Normalizing temporal values...')
mean = df['time'].mean()
std  = df['time'].std()
ONE_YEAR = (60 * 60 * 24 * 365) / mean
ONE_DAY  = (60 * 60 * 24) / mean
df['time'] = (df['time'] - mean) / std

Normalizing temporal values...


In [11]:
df

Unnamed: 0,user_id,item_id,rating,time
1,1,3,4.0,-0.639465
2,1,6,4.0,-0.639461
3,1,47,5.0,-0.639454
4,1,50,5.0,-0.639458
5,1,70,3.0,-0.639460
7,1,110,4.0,-0.639461
8,1,151,5.0,-0.639453
10,1,163,5.0,-0.639455
11,1,216,5.0,-0.639465
12,1,223,3.0,-0.639466


In [12]:
print('Constructing datasets...')
training_set = defaultdict(list)
# Start counting users and items at 1 to facilitate sparse matrix
# computation.
num_users = 1
num_items = 1
item_to_idx = {}
user_to_idx = {}
idx_to_item = {}
idx_to_user = {}

Constructing datasets...


In [13]:
for row in df.itertuples():
            # New item
            if row.item_id not in item_to_idx:
                item_to_idx[row.item_id] = num_items
                idx_to_item[num_items] = row.item_id
                num_items += 1

            # New user
            if row.user_id not in user_to_idx:
                user_to_idx[row.user_id] = num_users
                idx_to_user[num_users] = row.user_id
                num_users += 1

            # Converts all ratings to positive implicit feedback
            training_set[user_to_idx[row.user_id]].append(
                    (item_to_idx[row.item_id], row.time))

In [14]:
training_set

defaultdict(list,
            {1: [(1, -0.6394651293444573),
              (2, -0.6394610571453883),
              (3, -0.6394544257546833),
              (4, -0.6394581103237488),
              (5, -0.6394603235660268),
              (6, -0.6394612572124868),
              (7, -0.6394534837720941),
              (8, -0.6394551134853347),
              (9, -0.6394652918989748),
              (10, -0.6394662213773704),
              (11, -0.639465412772847),
              (12, -0.639466542318341),
              (13, -0.6394633245725055),
              (14, -0.6394579602734248),
              (15, -0.6394606986918366),
              (16, -0.639465412772847),
              (17, -0.6394596441715046),
              (18, -0.6394663172428551),
              (19, -0.6394595399698907),
              (20, -0.6394631995305688),
              (21, -0.6394604777844153),
              (22, -0.6394667090409232),
              (23, -0.6394623700857227),
              (24, -0.6394605486415127),
       

In [15]:
for user in training_set:
            training_set[user].sort(key=lambda x: x[1])

In [16]:
training_set

defaultdict(list,
            {1: [(62, -0.6394682470567434),
              (99, -0.6394681470231942),
              (136, -0.6394681470231942),
              (165, -0.6394675426538339),
              (166, -0.6394674801328656),
              (171, -0.6394674342841555),
              (22, -0.6394667090409232),
              (144, -0.6394667090409232),
              (149, -0.6394665923351156),
              (12, -0.639466542318341),
              (48, -0.6394664756293081),
              (18, -0.6394663172428551),
              (10, -0.6394662213773704),
              (80, -0.6394662213773704),
              (141, -0.6394662213773704),
              (123, -0.6394657712263986),
              (155, -0.6394657712263986),
              (81, -0.6394657128734949),
              (157, -0.6394657128734949),
              (84, -0.6394656378483329),
              (90, -0.6394656378483329),
              (162, -0.6394655586551063),
              (11, -0.639465412772847),
              (16, -0.63946

### Map from user to set of items for easy lookup

In [17]:
training_times = {}
val_set = {}
val_times = {}
test_set = {}
test_times = {}

# Map from user to set of items for easy lookup
item_set_per_user = {}
for user in training_set:
    if len(training_set[user]) < 3:
        # Reviewed < 3 items, insert dummy values
        test_set[user]   = (-1, -1)
        test_times[user] = (-1, -1)
        val_set[user]    = (-1, -1)
        val_times[user]  = (-1, -1)
    else:
        test_item, test_time = training_set[user].pop()
        val_item, val_time   = training_set[user].pop()
        last_item, last_time = training_set[user][-1]
        test_set[user]       = (test_item, val_item)
        test_times[user]     = (test_time, val_time)
        val_set[user]        = (val_item, last_item)
        val_times[user]      = (val_time, last_time)

    # Separate timestamps and create item set
    training_times[user]    = copy.deepcopy(training_set[user])
    training_set[user]      = [x[0] for x in training_set[user]]
    item_set_per_user[user] = set(training_set[user])

In [18]:
val_times

{1: (-0.6394529502598313, -0.6394530169488641),
 2: (1.3642653367061666, 1.3642653200339085),
 3: (0.7838581753655122, 0.7838581170126083),
 4: (-0.4619335679086908, -0.4619548583824313),
 5: (-1.1294040702523511, -1.1294041786220295),
 6: (-1.1372343755497794, -1.137234854877203),
 7: (0.2137629891256461, 0.19787869934651317),
 8: (-1.1626288006750463, -1.1626297176492482),
 9: (-0.3073706138598511, -0.3073710223301774),
 10: (1.4046242222845249, 1.4046239930409743),
 11: (-0.8915114065538723, -0.8915114065538723),
 12: (0.5371095373803502, 0.5371095123719629),
 13: (-0.5439551596745911, -0.5439551596745911),
 14: (-1.1793930439066354, -1.1793931189317972),
 15: (1.634617703418825, 1.634617403318177),
 16: (1.0798477736359509, 1.0798477611317572),
 17: (0.869215656756674, 0.8512349055321727),
 18: (1.663576440611092, 1.6440892966720304),
 19: (-0.636415627434683, -0.6364173821898602),
 20: (-0.268269062466709, -0.2682690749709027)}

In [19]:
print('training_set = ')
print(training_set)

training_set = 
defaultdict(<class 'list'>, {1: [62, 99, 136, 165, 166, 171, 22, 144, 149, 12, 48, 18, 10, 80, 141, 123, 155, 81, 157, 84, 90, 162, 11, 16, 27, 46, 9, 25, 152, 173, 91, 1, 133, 130, 50, 56, 69, 42, 167, 13, 49, 54, 114, 20, 95, 97, 115, 143, 109, 113, 163, 104, 37, 116, 164, 93, 160, 57, 59, 64, 140, 172, 58, 100, 134, 23, 66, 67, 76, 169, 6, 94, 96, 2, 126, 132, 31, 73, 83, 145, 15, 79, 124, 131, 24, 85, 21, 137, 5, 38, 47, 105, 170, 41, 148, 55, 125, 110, 30, 17, 35, 102, 19, 128, 135, 82, 89, 127, 138, 28, 39, 44, 77, 108, 75, 43, 45, 103, 111, 112, 151, 33, 36, 106, 107, 101, 4, 34, 51, 63, 86, 14, 117, 78, 147, 92, 156, 122, 61, 119, 146, 65, 70, 74, 142, 139, 53, 159, 87, 154, 88, 158, 120, 71, 8, 118, 72, 68, 60, 168, 150, 32, 3, 121, 153, 26, 52, 7, 161, 40, 29], 2: [174, 184, 193, 188, 191, 165, 186, 182, 187, 183, 176, 177, 189, 180, 16, 178, 179, 190, 181, 175, 194], 3: [73, 109, 202, 199, 201, 195, 197, 169, 198, 196, 200, 85, 205, 99, 204, 107], 4: [211, 14

In [20]:
 print('item_set_per_user')
print(item_set_per_user)

item_set_per_user
{1: {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173}, 2: {16, 165, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 186, 187, 188, 189, 190, 191, 193, 194}, 3: {195, 196, 197, 198, 199, 200, 73, 202, 201, 169, 109, 205, 204, 107, 99, 85}, 4: {3, 12, 13, 14, 22, 23, 29, 32, 34

In [21]:
num_train_events = 0
for user in training_set:
    print('user' + str(user) + '=')
    print(training_set[user])
    print('---------------------------')
    num_train_events += len(training_set[user])

user1=
[62, 99, 136, 165, 166, 171, 22, 144, 149, 12, 48, 18, 10, 80, 141, 123, 155, 81, 157, 84, 90, 162, 11, 16, 27, 46, 9, 25, 152, 173, 91, 1, 133, 130, 50, 56, 69, 42, 167, 13, 49, 54, 114, 20, 95, 97, 115, 143, 109, 113, 163, 104, 37, 116, 164, 93, 160, 57, 59, 64, 140, 172, 58, 100, 134, 23, 66, 67, 76, 169, 6, 94, 96, 2, 126, 132, 31, 73, 83, 145, 15, 79, 124, 131, 24, 85, 21, 137, 5, 38, 47, 105, 170, 41, 148, 55, 125, 110, 30, 17, 35, 102, 19, 128, 135, 82, 89, 127, 138, 28, 39, 44, 77, 108, 75, 43, 45, 103, 111, 112, 151, 33, 36, 106, 107, 101, 4, 34, 51, 63, 86, 14, 117, 78, 147, 92, 156, 122, 61, 119, 146, 65, 70, 74, 142, 139, 53, 159, 87, 154, 88, 158, 120, 71, 8, 118, 72, 68, 60, 168, 150, 32, 3, 121, 153, 26, 52, 7, 161, 40, 29]
---------------------------
user2=
[174, 184, 193, 188, 191, 165, 186, 182, 187, 183, 176, 177, 189, 180, 16, 178, 179, 190, 181, 175, 194]
---------------------------
user3=
[73, 109, 202, 199, 201, 195, 197, 169, 198, 196, 200, 85, 205, 99, 2

In [22]:
num_train_events

1817

In [23]:
 # Create scipy.sparse matrices
user_one_hot = sp.identity(num_users - 1).tocsr()
item_one_hot = sp.identity(num_items - 1).tocsr()

In [24]:
user_one_hot

<20x20 sparse matrix of type '<class 'numpy.float64'>'
	with 20 stored elements in Compressed Sparse Row format>

In [25]:
item_one_hot

<626x626 sparse matrix of type '<class 'numpy.float64'>'
	with 626 stored elements in Compressed Sparse Row format>

In [26]:
# Sparse training matrices
train_rows = []
train_cols = []
train_vals = []
train_prev_vals = []
train_times = []
train_prev_times = []

for user in training_set:
    
    for i in range(1, len(training_set[user])):
        item                     = training_set[user][i]
        item_prev           = training_set[user][i-1]
        item_time           = training_times[user][i]
        item_prev_time = training_times[user][i-1]
        
        train_rows.append(user)
        train_cols.append(item)
        train_vals.append(1)
        train_prev_vals.append(item_prev)
        train_times.append(item_time[1])
        train_prev_times.append(item_prev_time[1])

sp_train = sp.coo_matrix((train_vals, (train_rows, train_cols)),
            shape=(num_users, num_items))
sp_train_prev = sp.coo_matrix((train_prev_vals, (train_rows, train_cols)),
            shape=(num_users, num_items))
sp_train_times = sp.coo_matrix((train_times, (train_rows, train_cols)),
            shape=(num_users, num_items))
sp_train_prev_times = sp.coo_matrix((train_prev_times, (train_rows, train_cols)),
            shape=(num_users, num_items))

In [27]:
sp_train
print(type(sp_train),sp_train)

<class 'scipy.sparse.coo.coo_matrix'>   (1, 99)	1
  (1, 136)	1
  (1, 165)	1
  (1, 166)	1
  (1, 171)	1
  (1, 22)	1
  (1, 144)	1
  (1, 149)	1
  (1, 12)	1
  (1, 48)	1
  (1, 18)	1
  (1, 10)	1
  (1, 80)	1
  (1, 141)	1
  (1, 123)	1
  (1, 155)	1
  (1, 81)	1
  (1, 157)	1
  (1, 84)	1
  (1, 90)	1
  (1, 162)	1
  (1, 11)	1
  (1, 16)	1
  (1, 27)	1
  (1, 46)	1
  :	:
  (19, 241)	1
  (19, 230)	1
  (19, 418)	1
  (19, 265)	1
  (19, 359)	1
  (20, 227)	1
  (20, 296)	1
  (20, 303)	1
  (20, 287)	1
  (20, 626)	1
  (20, 344)	1
  (20, 301)	1
  (20, 33)	1
  (20, 36)	1
  (20, 19)	1
  (20, 404)	1
  (20, 477)	1
  (20, 353)	1
  (20, 305)	1
  (20, 400)	1
  (20, 625)	1
  (20, 624)	1
  (20, 328)	1
  (20, 340)	1
  (20, 308)	1


In [28]:
val_set

{1: (129, 29),
 2: (192, 194),
 3: (26, 107),
 4: (285, 282),
 5: (214, 223),
 6: (47, 416),
 7: (178, 475),
 8: (326, 341),
 9: (481, 478),
 10: (466, 503),
 11: (517, 255),
 12: (513, 498),
 13: (530, 527),
 14: (389, 307),
 15: (458, 542),
 16: (175, 18),
 17: (548, 608),
 18: (575, 157),
 19: (252, 359),
 20: (311, 308)}

In [29]:
for  user in val_set :
    print(user)

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20


In [30]:
print(val_times)

{1: (-0.6394529502598313, -0.6394530169488641), 2: (1.3642653367061666, 1.3642653200339085), 3: (0.7838581753655122, 0.7838581170126083), 4: (-0.4619335679086908, -0.4619548583824313), 5: (-1.1294040702523511, -1.1294041786220295), 6: (-1.1372343755497794, -1.137234854877203), 7: (0.2137629891256461, 0.19787869934651317), 8: (-1.1626288006750463, -1.1626297176492482), 9: (-0.3073706138598511, -0.3073710223301774), 10: (1.4046242222845249, 1.4046239930409743), 11: (-0.8915114065538723, -0.8915114065538723), 12: (0.5371095373803502, 0.5371095123719629), 13: (-0.5439551596745911, -0.5439551596745911), 14: (-1.1793930439066354, -1.1793931189317972), 15: (1.634617703418825, 1.634617403318177), 16: (1.0798477736359509, 1.0798477611317572), 17: (0.869215656756674, 0.8512349055321727), 18: (1.663576440611092, 1.6440892966720304), 19: (-0.636415627434683, -0.6364173821898602), 20: (-0.268269062466709, -0.2682690749709027)}


In [31]:
for user in val_times:
    print(user)
    print(val_times[user])

1
(-0.6394529502598313, -0.6394530169488641)
2
(1.3642653367061666, 1.3642653200339085)
3
(0.7838581753655122, 0.7838581170126083)
4
(-0.4619335679086908, -0.4619548583824313)
5
(-1.1294040702523511, -1.1294041786220295)
6
(-1.1372343755497794, -1.137234854877203)
7
(0.2137629891256461, 0.19787869934651317)
8
(-1.1626288006750463, -1.1626297176492482)
9
(-0.3073706138598511, -0.3073710223301774)
10
(1.4046242222845249, 1.4046239930409743)
11
(-0.8915114065538723, -0.8915114065538723)
12
(0.5371095373803502, 0.5371095123719629)
13
(-0.5439551596745911, -0.5439551596745911)
14
(-1.1793930439066354, -1.1793931189317972)
15
(1.634617703418825, 1.634617403318177)
16
(1.0798477736359509, 1.0798477611317572)
17
(0.869215656756674, 0.8512349055321727)
18
(1.663576440611092, 1.6440892966720304)
19
(-0.636415627434683, -0.6364173821898602)
20
(-0.268269062466709, -0.2682690749709027)


In [32]:
val_times_temp = val_times
val_times_temp

{1: (-0.6394529502598313, -0.6394530169488641),
 2: (1.3642653367061666, 1.3642653200339085),
 3: (0.7838581753655122, 0.7838581170126083),
 4: (-0.4619335679086908, -0.4619548583824313),
 5: (-1.1294040702523511, -1.1294041786220295),
 6: (-1.1372343755497794, -1.137234854877203),
 7: (0.2137629891256461, 0.19787869934651317),
 8: (-1.1626288006750463, -1.1626297176492482),
 9: (-0.3073706138598511, -0.3073710223301774),
 10: (1.4046242222845249, 1.4046239930409743),
 11: (-0.8915114065538723, -0.8915114065538723),
 12: (0.5371095373803502, 0.5371095123719629),
 13: (-0.5439551596745911, -0.5439551596745911),
 14: (-1.1793930439066354, -1.1793931189317972),
 15: (1.634617703418825, 1.634617403318177),
 16: (1.0798477736359509, 1.0798477611317572),
 17: (0.869215656756674, 0.8512349055321727),
 18: (1.663576440611092, 1.6440892966720304),
 19: (-0.636415627434683, -0.6364173821898602),
 20: (-0.268269062466709, -0.2682690749709027)}

### Sparse validation matrices

In [33]:
val_rows = []
val_cols = []
val_vals = []
val_prev_vals = []
val_times = []
val_prev_times = []

for user in val_set:
    item = val_set[user][0]
    item_prev = val_set[user][1]
    item_time = val_times_temp[user][0]
    item_prev_time = val_times_temp[user][1]
    
    if item == -1 or item_prev == -1:
        continue

    val_rows.append(user)
    val_cols.append(item)
    val_vals.append(1)
    val_prev_vals.append(item_prev)
    val_times.append(item_time)
    val_prev_times.append(item_prev_time)

sp_val = sp.coo_matrix((val_vals, (val_rows, val_cols)),
                        shape=(num_users, num_items))
sp_val_prev = sp.coo_matrix((val_prev_vals, (val_rows, val_cols)),
                        shape=(num_users, num_items))
sp_val_times = sp.coo_matrix((val_times, (val_rows, val_cols)),
                        shape=(num_users, num_items))
sp_val_prev_times = sp.coo_matrix((val_prev_times, (val_rows, val_cols)),
                        shape=(num_users, num_items))

In [34]:
print(type(sp_val), sp_val)

<class 'scipy.sparse.coo.coo_matrix'>   (1, 129)	1
  (2, 192)	1
  (3, 26)	1
  (4, 285)	1
  (5, 214)	1
  (6, 47)	1
  (7, 178)	1
  (8, 326)	1
  (9, 481)	1
  (10, 466)	1
  (11, 517)	1
  (12, 513)	1
  (13, 530)	1
  (14, 389)	1
  (15, 458)	1
  (16, 175)	1
  (17, 548)	1
  (18, 575)	1
  (19, 252)	1
  (20, 311)	1


In [35]:
print(type(sp_val_prev), sp_val_prev)

<class 'scipy.sparse.coo.coo_matrix'>   (1, 129)	29
  (2, 192)	194
  (3, 26)	107
  (4, 285)	282
  (5, 214)	223
  (6, 47)	416
  (7, 178)	475
  (8, 326)	341
  (9, 481)	478
  (10, 466)	503
  (11, 517)	255
  (12, 513)	498
  (13, 530)	527
  (14, 389)	307
  (15, 458)	542
  (16, 175)	18
  (17, 548)	608
  (18, 575)	157
  (19, 252)	359
  (20, 311)	308


In [36]:
print(type(sp_val_times), sp_val_times)

<class 'scipy.sparse.coo.coo_matrix'>   (1, 129)	-0.6394529502598313
  (2, 192)	1.3642653367061666
  (3, 26)	0.7838581753655122
  (4, 285)	-0.4619335679086908
  (5, 214)	-1.1294040702523511
  (6, 47)	-1.1372343755497794
  (7, 178)	0.2137629891256461
  (8, 326)	-1.1626288006750463
  (9, 481)	-0.3073706138598511
  (10, 466)	1.4046242222845249
  (11, 517)	-0.8915114065538723
  (12, 513)	0.5371095373803502
  (13, 530)	-0.5439551596745911
  (14, 389)	-1.1793930439066354
  (15, 458)	1.634617703418825
  (16, 175)	1.0798477736359509
  (17, 548)	0.869215656756674
  (18, 575)	1.663576440611092
  (19, 252)	-0.636415627434683
  (20, 311)	-0.268269062466709


In [37]:
print(type(sp_val_prev_times), sp_val_prev_times)

<class 'scipy.sparse.coo.coo_matrix'>   (1, 129)	-0.6394530169488641
  (2, 192)	1.3642653200339085
  (3, 26)	0.7838581170126083
  (4, 285)	-0.4619548583824313
  (5, 214)	-1.1294041786220295
  (6, 47)	-1.137234854877203
  (7, 178)	0.19787869934651317
  (8, 326)	-1.1626297176492482
  (9, 481)	-0.3073710223301774
  (10, 466)	1.4046239930409743
  (11, 517)	-0.8915114065538723
  (12, 513)	0.5371095123719629
  (13, 530)	-0.5439551596745911
  (14, 389)	-1.1793931189317972
  (15, 458)	1.634617403318177
  (16, 175)	1.0798477611317572
  (17, 548)	0.8512349055321727
  (18, 575)	1.6440892966720304
  (19, 252)	-0.6364173821898602
  (20, 311)	-0.2682690749709027


### Sparse test matrices

In [38]:
test_times_tmp = test_times

In [39]:
test_rows = []
test_cols = []
test_vals = []
test_prev_vals = []
test_times = []
test_prev_times = []

for user in test_set:
    
    item = test_set[user][0]
    item_prev = test_set[user][1]
    item_time = test_times_tmp[user][0]
    item_prev_time = test_times_tmp[user][1]
    
    if item == -1 or item_prev == -1:
        continue

    test_rows.append(user)
    test_cols.append(item)
    test_vals.append(1)
    test_prev_vals.append(item_prev)
    test_times.append(item_time)
    test_prev_times.append(item_prev_time)

    sp_test = sp.coo_matrix((test_vals, (test_rows, test_cols)),
                shape=(num_users, num_items))
    sp_test_prev = sp.coo_matrix((test_prev_vals, (test_rows, test_cols)),
                shape=(num_users, num_items))
    sp_test_times = sp.coo_matrix((test_times, (test_rows, test_cols)),
                shape=(num_users, num_items))
    sp_test_prev_times = sp.coo_matrix((test_prev_times, (test_rows, test_cols)),
                shape=(num_users, num_items))

val_prev_cats = None
test_prev_cats = None

In [48]:
# Subtract 1 to account for missing 0 index
user_indices = sp_train.row - 1
prev_indices = sp_train_prev.data - 1
pos_indices = sp_train.col - 1
neg_indices = np.random.randint(1, sp_train.shape[1],
size=len(sp_train.row), dtype=np.int32) - 1


In [50]:
# Convert from indices to one hot matrices

user_one_hot = sp.identity(num_users - 1).tocsr()
item_one_hot = sp.identity(num_items - 1).tocsr()

users = user_one_hot[user_indices]
prev_items = item_one_hot[prev_indices]
pos_items = item_one_hot[pos_indices]
neg_items = item_one_hot[neg_indices]

In [51]:
# Horizontally stack sparse matrices to get single positive
# and negative feature matrices
pos_feats = sp.hstack([users, prev_items, pos_items])
neg_feats = sp.hstack([users, prev_items, neg_items])

In [40]:
print(type(sp_test), sp_test)

<class 'scipy.sparse.coo.coo_matrix'>   (1, 98)	1
  (2, 185)	1
  (3, 203)	1
  (4, 284)	1
  (5, 299)	1
  (6, 40)	1
  (7, 476)	1
  (8, 12)	1
  (9, 98)	1
  (10, 491)	1
  (11, 221)	1
  (12, 289)	1
  (13, 431)	1
  (14, 534)	1
  (15, 33)	1
  (16, 67)	1
  (17, 572)	1
  (18, 577)	1
  (19, 616)	1
  (20, 197)	1


In [52]:
# Sparse placeholders
pl_user_list = tf.placeholder(tf.int64, shape=[None], name='pos_list')

pl_pos_indices = tf.placeholder(tf.int64, shape=[None, 2], name='pos_indices')
pl_pos_values = tf.placeholder(tf.float32, shape=[None], name='pos_values')
pl_pos_shape = tf.placeholder(tf.int64, shape=[2], name='pos_shape')

pl_neg_indices = tf.placeholder(tf.int64, shape=[None, 2], name='neg_indices')
pl_neg_values = tf.placeholder(tf.float32, shape=[None], name='neg_values')
pl_neg_shape = tf.placeholder(tf.int64, shape=[2], name='neg_shape')


In [53]:
placeholders = {
            'pl_user_list': pl_user_list,
            'pl_pos_indices': pl_pos_indices,
            'pl_pos_values': pl_pos_values,
            'pl_pos_shape': pl_pos_shape,
            'pl_neg_indices': pl_neg_indices,
            'pl_neg_values': pl_neg_values,
            'pl_neg_shape': pl_neg_shape
}

In [54]:
feed_dict = {
                placeholders['pl_user_list']: users.nonzero()[1],
                placeholders['pl_pos_indices']: np.hstack((
                    pos_feats.nonzero()[0][:, None],
                    pos_feats.nonzero()[1][:, None],
                )),
                placeholders['pl_pos_values']: pos_feats.data,
                placeholders['pl_pos_shape']: pos_feats.shape,
                placeholders['pl_neg_indices']: np.hstack((
                    neg_feats.nonzero()[0][:, None],
                    neg_feats.nonzero()[1][:, None],
                )),
                placeholders['pl_neg_values']: neg_feats.data,
                placeholders['pl_neg_shape']: neg_feats.shape,
}

In [55]:
feed_dict

{<tf.Tensor 'pos_list_1:0' shape=(?,) dtype=int64>: array([ 0,  0,  0, ..., 19, 19, 19], dtype=int32),
 <tf.Tensor 'pos_indices_1:0' shape=(?, 2) dtype=int64>: array([[   0,    0],
        [   1,    0],
        [   2,    0],
        ...,
        [1794,  973],
        [1795,  985],
        [1796,  953]], dtype=int32),
 <tf.Tensor 'pos_values_1:0' shape=(?,) dtype=float32>: array([1., 1., 1., ..., 1., 1., 1.]),
 <tf.Tensor 'pos_shape_1:0' shape=(2,) dtype=int64>: (1797, 1272),
 <tf.Tensor 'neg_indices_1:0' shape=(?, 2) dtype=int64>: array([[   0,    0],
        [   1,    0],
        [   2,    0],
        ...,
        [1794,  841],
        [1795,  731],
        [1796, 1236]], dtype=int32),
 <tf.Tensor 'neg_values_1:0' shape=(?,) dtype=float32>: array([1., 1., 1., ..., 1., 1., 1.]),
 <tf.Tensor 'neg_shape_1:0' shape=(2,) dtype=int64>: (1797, 1272)}

In [66]:
g = tf.Graph()
feature_dim = pos_feats.shape[1]

In [67]:
# config
filename        = 'ratings.csv' 
model           = 'TransFM'  
features        = 'none' 
features_file   = 'none' 
max_iters       = '1000000' 
num_dims        = '10' 
linear_reg      = '10.0' 
emb_reg         = '1.0'
trans_reg       = '0.1' 
init_mean       = '0.1' 
starting_lr     = '0.02' 
lr_decay_factor = '1.0' 
lr_decay_freq   = '1000' 
eval_freq       = '50' 
quit_delta      = '1000'


In [68]:
def parse_args( filename,     model,            features,       features_file,  max_iters,       
                num_dims,     linear_reg,       emb_reg,        trans_reg,      init_mean,     
                starting_lr,  lr_decay_factor,  lr_decay_freq,  eval_freq,      quit_delta ):
    
    parser = argparse.ArgumentParser()
    parser.add_argument('--filename',
        help='Filename of the input dataset.',
        required=True)
    parser.add_argument('--model',
        help='Model to run.',
        choices=['TransFM', 'FM', 'PRME-FM', 'HRM-FM'],
        required=True)
    parser.add_argument('--features',
        help='Which features to include.',
        choices=['none', 'categories', 'time', 'content', 'geo'],
        default='none')
    parser.add_argument('--features_file',
        help='Filename(s) for content features. For content features, provide '
        '<user filename>,<item filename>. For categories and geo, provide a single '
        'filename. Temporal data should be included within the dataset file itself.')
    parser.add_argument('--max_iters',
        help='Max number of iterations to run',
        default=1000000,
        type=int)
    parser.add_argument('--num_dims',
        help='Model dimensionality.',
        default=10,
        type=int)
    parser.add_argument('--linear_reg',
        help='L2 regularization: linear_reg.',
        default=1.0,
        type=float)
    parser.add_argument('--emb_reg',
        help='L2 regularization: embbeding regularization.',
        default=1.0,
        type=float)
    parser.add_argument('--trans_reg',
        help='L2 regularization: translation regularization.',
        default=1.0,
        type=float)
    parser.add_argument('--init_mean',
        help='Initialization mean for model parameters.',
        default=0.1,
        type=float)
    parser.add_argument('--starting_lr',
        help='Initial learning rate.',
        default=0.001,
        type=float)
    parser.add_argument('--lr_decay_factor',
        help='Decay factor for learning rate.',
        default=1.0,
        type=float)
    parser.add_argument('--lr_decay_freq',
        help='Frequency at which to decay learning rate.',
        default=1000,
        type=int)
    parser.add_argument('--eval_freq',
        help='Frequency at which to evaluate model.',
        default=50,
        type=int)
    parser.add_argument('--quit_delta',
        help='Number of iterations at which to quit if no improvement.',
        default=1000,
        type=int)
    args = parser.parse_args(args = [ 
                            '--filename',        filename,
                            '--model',           model,
                            '--features',        features,
                            '--features_file',   features_file, 
                            '--max_iters',       max_iters,
                            '--num_dims',        num_dims,
                            '--linear_reg',      linear_reg,
                            '--emb_reg',         emb_reg,
                            '--trans_reg',       trans_reg,
                            '--init_mean',       init_mean,
                            '--starting_lr',     starting_lr,
                            '--lr_decay_factor', lr_decay_factor,
                            '--lr_decay_freq',   lr_decay_freq,
                            '--eval_freq' ,      eval_freq,
                            '--quit_delta' ,     quit_delta
                    ])
    print(args)
    print('')
    return args

In [69]:
args = parse_args(  filename,     model,            features,       features_file,  max_iters,       
                        num_dims,     linear_reg,       emb_reg,        trans_reg,      init_mean,     
                        starting_lr,  lr_decay_factor,  lr_decay_freq,  eval_freq,      quit_delta )

Namespace(emb_reg=1.0, eval_freq=50, features='none', features_file='none', filename='ratings.csv', init_mean=0.1, linear_reg=10.0, lr_decay_factor=1.0, lr_decay_freq=1000, max_iters=1000000, model='TransFM', num_dims=10, quit_delta=1000, starting_lr=0.02, trans_reg=0.1)



In [70]:
with g.as_default():
            # Define model variables
            var_linear = tf.get_variable('linear',
                    [feature_dim, 1],
                    initializer=tf.random_uniform_initializer(
                        -args.init_mean, args.init_mean))

            var_emb_factors = tf.get_variable('emb_factors',
                    [feature_dim, args.num_dims],
                    initializer=tf.random_uniform_initializer(
                        -args.init_mean, args.init_mean))

            var_trans_factors = tf.get_variable('trans_factors',
                    [feature_dim, args.num_dims],
                    initializer=tf.random_uniform_initializer(
                        -args.init_mean, args.init_mean))
            
            pl_user_list = tf.placeholder(tf.int64, shape=[None], name='pos_list')

            pl_pos_indices = tf.placeholder(tf.int64, shape=[None, 2], name='pos_indices')
            pl_pos_values = tf.placeholder(tf.float32, shape=[None], name='pos_values')
            pl_pos_shape = tf.placeholder(tf.int64, shape=[2], name='pos_shape')

            pl_neg_indices = tf.placeholder(tf.int64, shape=[None, 2], name='neg_indices')
            pl_neg_values = tf.placeholder(tf.float32, shape=[None], name='neg_values')
            pl_neg_shape = tf.placeholder(tf.int64, shape=[2], name='neg_shape')
            
            placeholders = {
                    'pl_user_list': pl_user_list,
                    'pl_pos_indices': pl_pos_indices,
                    'pl_pos_values': pl_pos_values,
                    'pl_pos_shape': pl_pos_shape,
                    'pl_neg_indices': pl_neg_indices,
                    'pl_neg_values': pl_neg_values,
                    'pl_neg_shape': pl_neg_shape
            }
            
            # Input positive features, shape = (batch_size * feature_dim)
            sparse_pos_feats = tf.SparseTensor(pl_pos_indices, pl_pos_values, pl_pos_shape)
            
            # Input negative features, shape = (batch_size * feature_dim)
            sparse_neg_feats = tf.SparseTensor(pl_neg_indices, pl_neg_values, pl_neg_shape)

In [72]:
pl_user_list

<tf.Tensor 'pos_list_2:0' shape=(?,) dtype=int64>

In [74]:
# Input positive features, shape = (batch_size * feature_dim)
sparse_pos_feats = tf.SparseTensor(pl_pos_indices, pl_pos_values, pl_pos_shape)

In [75]:
# Input negative features, shape = (batch_size * feature_dim)
sparse_neg_feats = tf.SparseTensor(pl_neg_indices, pl_neg_values, pl_neg_shape)

In [76]:
# Linear terms
pos_linear = tf.sparse_tensor_dense_matmul(sparse_pos_feats, var_linear)
neg_linear = tf.sparse_tensor_dense_matmul(sparse_neg_feats, var_linear)

ValueError: Tensor("linear:0", shape=(1272, 1), dtype=float32_ref) must be from the same graph as Tensor("pos_indices_2:0", shape=(?, 2), dtype=int64).