In [1]:
import copy
from tqdm import tqdm
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix, find
from sklearn.model_selection import train_test_split

parameters = {
    'lambda_a': 1e-2,
    'lambda_b': 1e-2,
    'momentum': 0.9,
    'num_features': 30,
    'epochs': 10,
    'lr': 3e-5,
    'batch_size':30000
}

df = pd.read_csv('./data/netflix/netflix.csv')
df = df.drop(['Unnamed: 0'], axis=1)

full_rows = df['user']
full_cols = df['movie']
full_ratings = df['rating']

full_csr = csr_matrix((full_ratings,(full_rows, full_cols)))

train, valid = train_test_split(df, test_size=0.4, shuffle=True, random_state=42)
valid, test = train_test_split(valid, test_size=0.5, shuffle=True, random_state=42)

valid_rows = valid['user']
valid_cols = valid['movie']
valid_ratings = valid['rating']

valid_csr = csr_matrix((valid_ratings, (valid_rows,valid_cols)))

In [2]:
class PMF():
    def __init__(self, full_data, valid_data, params):
        self.R = full_data
        self._lambda_alpha = params['lambda_a']
        self._lambda_beta = params['lambda_b']
        self.momentum = params['momentum']
        self.num_features = params['num_features']
        self.iterations = params['epochs']
        self.lr = params['lr']
        self.batch_size = params['batch_size']
        self.I = copy.deepcopy(self.R).astype('uint8')
        self.I[self.I != 0] = 1
        self.v_rows, self.v_cols, self.v_rat = find(valid_data)
        
        self.U = 0.1*np.random.randn(self.R.shape[0], self.num_features)
        self.V = 0.1*np.random.randn(self.R.shape[1], self.num_features)
    

    def loss(self, idx):
        '''
        The Frobenius norm is the same as the L2 norm, 
        and since the squares are added, the formula is written as follows.
        '''
        
        loss = np.sum(np.multiply(self.I[idx, :], (self.R[idx, :]-np.dot(self.U[idx, :], self.V.T))**2)) + self._lambda_alpha*np.sum(np.square(self.U[idx, :])) + self.lambda_beta*np.sum(np.square(self.V))
        return loss
    
    def predict(self):
        '''
        Validation-only function
        '''
        u_features = self.U.take(self.v_rows, axis=0)
        v_features = self.V.take(self.v_cols, axis=0)
        preds_value_array = np.sum(u_features*v_features, 1)
        return preds_value_array
    
    def train(self):
        
        train_loss_list = []
        vali_rmse_list = []
        last_vali_rmse = None

        # monemtum
        momuntum_u = np.zeros(self.U.shape)
        momuntum_v = np.zeros(self.V.shape)
        
        for epoch in tqdm(range(self.iterations)):
            
            shuffled_order = np.arange(full_csr.shape[0])
            np.random.shuffle(shuffled_order)
            train_loss = 0
            momuntum_v = 0
            
            for batch in range(int(len(shuffled_order / self.batch_size)) + 1):
                
                if batch == int(len(shuffled_order / self.batch_size)): #last batch
                    test = np.arange(self.batch_size*batch, full_csr.shape[0])
                    batch_idx = np.mod(test, shuffled_order.shape[0])
                else:
                    test = np.arange(self.batch_size * batch, self.batch_size * (batch+1))
                    batch_idx = np.mod(test, shuffled_order.shape[0])
                
                # get gradient
                mse = np.multiply(self.I[batch_idx,:], (self.R[batch_idx,:] - np.dot(self.U[batch_idx, :], self.V.T)))
                

                d_U = np.dot(mse, - self.V) + self._lambda_alpha*self.U[batch_idx, :]
                d_V = np.dot(mse.T, - self.U[batch_idx, :]) + self._lambda_alpha*self.V
                
                #update the parameters
                momuntum_u = (self.momuntum * momuntum_u) + self.lr * d_U
                momuntum_v += (self.momuntum * momuntum_v) + self.lr * d_V
    
                
                #training evaluation
                #save the loss before gradient update
                train_loss += np.sum(mse**2) + self._lambda_alpha*np.sum(np.square(self.U[idx, :])) + self.lambda_beta*np.sum(np.square(self.V))
                self.U[batch_idx, :] = self.U[batch_idx, :] - momuntum_u
                
            #cumlative sum of gradient V update
            self.V = self.V - momuntum_v 
            
            # Average over the cumulative sum of batch loss
            train_loss_list.append(train_loss/(int(len(shuffled_order / self.batch_size)) + 1))
                
                
            '''
            여기서는 최소한 batch단위 업데이트 끝나고 즉, U가 한바퀴는 다 돌리고 나서 수행됨.
            '''
            vali_preds = self.predict()
            vali_rmse = RMSE(self.v_rats, vali_preds)
            vali_rmse_list.append(vali_rmse)
            
            print('traning iteration:{: d} ,loss:{: f}, vali_rmse:{: f}'.format(it, train_loss, vali_rmse))

            if last_vali_rmse and (last_vali_rmse - vali_rmse) <= 0:
                print('convergence at iterations:{: d}'.format(it))
                break
            else:
                last_vali_rmse = vali_rmse


        return self.U, self.V, train_loss_list, vali_rmse_list

In [3]:
model = PMF(full_csr, valid_csr, params=parameters)

In [4]:
%%time
U, V, train_loss_list, vali_rmse_list = model.train()

  0%|                                                                                           | 0/10 [00:00<?, ?it/s]Exception ignored in: <generator object tqdm.__iter__ at 0x00000224591099E0>
Traceback (most recent call last):
  File "c:\users\user\anaconda3\envs\torch\lib\site-packages\tqdm\std.py", line 1193, in __iter__
    self.close()
  File "c:\users\user\anaconda3\envs\torch\lib\site-packages\tqdm\std.py", line 1287, in close
    fp_write('')
  File "c:\users\user\anaconda3\envs\torch\lib\site-packages\tqdm\std.py", line 1284, in fp_write
    self.fp.write(_unicode(s))
  File "c:\users\user\anaconda3\envs\torch\lib\site-packages\tqdm\utils.py", line 142, in inner
    return func(*args, **kwargs)
  File "c:\users\user\anaconda3\envs\torch\lib\site-packages\ipykernel\iostream.py", line 402, in write
    self.pub_thread.schedule(lambda : self._buffer.write(string))
  File "c:\users\user\anaconda3\envs\torch\lib\site-packages\ipykernel\iostream.py", line 203, in schedule
    sel

MemoryError: Unable to allocate 8.63 MiB for an array with shape (1130800,) and data type float64