## Import and Load

In [68]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

In [73]:
# seeds 
seed = 12341
np.random.seed(seed)
torch.manual_seed(seed)

<torch._C.Generator at 0x7f7c50c55240>

In [98]:
train = pd.read_csv("data/ml-25m/ratings.csv").dropna().head(10000)
train['rating'] = train['rating'] / train['rating'].max()

train.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,296,1.0,1147880044
1,1,306,0.7,1147868817
2,1,307,1.0,1147868828
3,1,665,1.0,1147878820
4,1,899,0.7,1147868510


## preprocessing

In [172]:
def get_matrix(df):
    R = torch.zeros([df['userId'].max(), df['movieId'].max()])
    for _, row in df.iterrows():
        R[int(row['userId'] - 1), int(row['movieId']) - 1] = row['rating']
    return R

def get_temporal_matrix(old_df, t_steps):
    df = old_df[:]
    df['bin'] = pd.cut(df.timestamp, bins = t_steps, labels=range(t_steps))
    R = torch.zeros([df['userId'].max(), df['movieId'].max(), t_steps])
    for _, row in df.iterrows():
        R[int(row['userId'] - 1), int(row['movieId']) - 1, row['bin']] = row['rating']
    return R, df

In [173]:
time_steps = 100

R = get_matrix(train)
R_t, train_updated = get_temporal_matrix(train, t_steps=time_steps)

print(R.shape)
print(R_t.shape)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


torch.Size([75, 203519])
torch.Size([75, 203519, 100])


## PMF

In [145]:
from numpy.random import RandomState

class PMF(nn.Module):
	def __init__(self, n_users, n_items, n_factors=20, is_sparse=False, no_cuda=None):
		super(PMF, self).__init__()
		self.n_users = n_users
		self.n_items = n_items
		self.n_factors = n_factors
		self.random_state = RandomState(1)

		# M,D
		self.user_embeddings = nn.Embedding(n_users, n_factors, sparse=is_sparse)
		self.user_embeddings.weight.data = torch.from_numpy(0.1 * self.random_state.rand(n_users, n_factors)).float()
		
		# NxD
		self.item_embeddings = nn.Embedding(n_items, n_factors, sparse=is_sparse)
		self.item_embeddings.weight.data = torch.from_numpy(0.1 * self.random_state.rand(n_items, n_factors)).float()


		self.relu = nn.ReLU()
	
	def forward(self, users_index=None, items_index=None):
		if users_index!=None and items_index!=None:
			user_h1 = self.user_embeddings(users_index)
			item_h1 = self.item_embeddings(items_index)
			# print(user_h1.shape)
			# print(item_h1.shape)
			# R_h = (user_h1 * item_h1).sum(1)
			R_h = torch.dot(user_h1, item_h1)
		else:
			R_h = self.item_embeddings.weight.data @ self.user_embeddings.weight.data.T
		return R_h

	def __call__(self, *args):
		return self.forward(*args)


	def predict(self, users_index=None, items_index=None):
		# preds = self.forward(users_index, items_index)
		if users_index and items_index:
			preds = self.forward(users_index, items_index)
		else:
			user_h1 = self.user_embeddings(users_index)
			item_h1 = self.item_embeddings(items_index)
			preds = (user_h1 * item_h1).sum(1)
		return preds


def RMSE(preds, truth):
    return np.sqrt(np.mean(np.square(preds-truth)))

In [166]:
model = PMF(n_users=R.shape[0], n_items=R.shape[1], n_factors=20)
optimizer = optim.SGD(model.parameters(), lr=1e-3)

for name, param in model.named_parameters():
    if param.requires_grad:
        print(name, param.data.shape)
print('\n')


epochs = 10
lr = 0.1
reg = 0.01

loss_SGD = []

for i in range(epochs):
    print("epoch", i + 1)

    for _, row in train.iterrows():
      user, item = torch.tensor(int(row['userId']) - 1), torch.tensor(int(row['movieId']) - 1)
      # making a pridiction in forward pass
      y_hat = model.forward(users_index=user, items_index=item)
      error = R[user, item] - y_hat

      # updates
      # Pi ← Pi + α(eij Qj − λPi ) 
      model.user_embeddings.weight.data[user, :] = \
        model.user_embeddings.weight.data[user, :] + \
        lr * (error * model.item_embeddings.weight.data[item, :] - reg * model.user_embeddings.weight.data[user, :])
      # Qj ← Qj + α(eij Pi − λQj )
      model.item_embeddings.weight.data[item, :] = \
        model.item_embeddings.weight.data[item, :] + \
        lr * (error * model.user_embeddings.weight.data[user, :] - reg * model.item_embeddings.weight.data[item, :])

user_embeddings.weight torch.Size([75, 20])
item_embeddings.weight torch.Size([203519, 20])


epoch 1
epoch 2
epoch 3
epoch 4
epoch 5
epoch 6
epoch 7
epoch 8
epoch 9
epoch 10


In [164]:
# torch.linalg.norm(R - model().T)
# torch.linalg.norm(R)


tensor([[0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.7000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.8000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        ...,
        [0.6000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [0.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000],
        [1.0000, 0.0000, 0.0000,  ..., 0.0000, 0.0000, 0.0000]])

## TempMF

In [177]:
from numpy.random import RandomState

class TempMF(nn.Module):
	def __init__(self, n_users, t_steps, item_embeddings, n_factors=20, is_sparse=False, no_cuda=None):
		super(PMF, self).__init__()
		self.n_users = n_users

		self.n_factors = n_factors
		self.random_state = RandomState(1)

		# M,D,t
		self.user_embeddings = nn.Embedding(n_users, n_factors, t_steps, sparse=is_sparse)
		self.user_embeddings.weight.data = torch.from_numpy(0.1 * self.random_state.rand(n_users, n_factors)).float()
		# N,D
		self.item_embeddings = item_embeddings

		self.relu = nn.ReLU()
	
	def forward(self, time_index, users_index=None, items_index=None):
		if users_index!=None and items_index!=None:
			user_h1 = self.user_embeddings.weight.data[users_index,:,time_index]
			item_h1 = self.item_embeddings(items_index)
			# print(user_h1.shape)
			# print(item_h1.shape)
			# R_h = (user_h1 * item_h1).sum(1)
			R_h = torch.dot(user_h1, item_h1)
		else:
			R_h = self.item_embeddings.weight.data @ self.user_embeddings.weight.data[:,:,time_index].T
		return R_h

	def __call__(self, *args):
		return self.forward(*args)


	def predict(self, time_index, users_index=None, items_index=None):
		preds = self.forward(time_index, users_index, items_index)
		return preds

In [178]:
tempMF = TempMF(n_users=model.n_users, t_steps=time_steps, item_embeddings=model.item_embeddings, n_factors=model.n_factors)

epochs = 10
lr = 0.1
reg = 0.01

loss_SGD = []

# time
for t in range(time_steps):
    print("time:", i + 1)
    tempMF.user_embeddings.weight.data[:,:,t] = model.user_embeddings.weight.data

    # iterations
    for iter in range(epochs):
      print('iter:', iter + 1)

      # ratings
      for _, row in train_updated[train_updated['bin'] == t].iterrows():
        user, item = torch.tensor(int(row['userId']) - 1), torch.tensor(int(row['movieId']) - 1)
        # making a pridiction in forward pass
        y_hat = tempMF.forward(time_index=t, users_index=user, items_index=item)
        error = R_t[user, item, t] - y_hat

        tempMF.user_embeddings.weight.data[user, :, t] = \
          tempMF.user_embeddings.weight.data[user, :, t] + \
          lr * (error * tempMF.item_embeddings.weight.data[item, :] - reg * tempMF.user_embeddings.weight.data[user, :, t])


TypeError: super(type, obj): obj must be an instance or subtype of type

#### iteration through time