In [43]:
import copy
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix, find
from sklearn.model_selection import train_test_split

parameters = {
    'lambda_a': 1e-2,
    'lambda_b': 1e-2,
    'momentum': 0.9,
    'num_features': 30,
    'epochs': 1000,
    'lr': 3e-5
}

df = pd.read_csv('./data/netflix/netflix.csv')
df = df.drop(['Unnamed: 0'], axis=1)

full_rows = df['user']
full_cols = df['movie']
full_ratings = df['rating']

full_csr = csr_matrix((full_ratings,(full_rows, full_cols)))

train, valid = train_test_split(df, test_size=0.4, shuffle=True, random_state=42)
valid, test = train_test_split(valid, test_size=0.5, shuffle=True, random_state=42)

valid_rows = valid['user']
valid_cols = valid['movie']
valid_ratings = valid['rating']

valid_csr = csr_matrix((valid_ratings, (valid_rows,valid_cols)))

In [56]:
class PMF():
    def __init__(self, full_data, valid_data, params):
        self.R = full_data
        self._lambda_alpha = params['lambda_a']
        self._lambda_beta = params['lambda_b']
        self.momentum = params['momentum']
        self.num_features = params['num_features']
        self.iterations = params['epochs']
        self.lr = params['lr']
        self.I = copy.deepcopy(self.R)
        self.I[self.I != 0] = 1
        self.v_rows, self.v_cols, self.v_rat = find(valid_data)
        
        self.U = 0.1*np.random.randn(self.R.shape[0], self.num_features)
        self.V = 0.1*np.random.randn(self.R.shape[1], self.num_features)
    

    def loss(self):
        '''
        The Frobenius norm is the same as the L2 norm, 
        and since the squares are added, the formula is written as follows.
        '''
        loss = np.sum(self.I*(self.R-np.dot(self.U, self.V.T))**2) + self._lambda_alpha*np.sum(np.square(self.U)) + self.lambda_beta*np.sum(np.square(self.V))
        return loss
    
    def predict(self):
        '''
        Validation-only function
        '''
        u_features = self.U.take(self.v_rows, axis=0)
        v_features = self.V.take(self.v_cols, axis=0)
        preds_value_array = np.sum(u_features*v_features, 1)
        return preds_value_array
    
    def train(self):
        
        train_loss_list = []
        vali_rmse_list = []
        last_vali_rmse = None

        # monemtum
        momuntum_u = np.zeros(self.U.shape)
        momuntum_v = np.zeros(self.V.shape)

        for it in range(self.iterations):
            # derivate of Vi
            import pdb;pdb.set_trace()
            grads_u = np.dot(self.I*(self.R-np.dot(self.U, self.V.T)), -self.V) + self._lambda_alpha*self.U

            # derivate of Tj
            grads_v = np.dot((self.I*(self.R-np.dot(self.U, self.V.T))).T, -self.U) + self._lambda_beta*self.V

            # update the parameters
            momuntum_u = (self.momuntum * momuntum_u) + self.lr * grads_u
            momuntum_v = (self.momuntum * momuntum_v) + self.lr * grads_v
            self.U = self.U - momuntum_u
            self.V = self.V - momuntum_v

            # training evaluation
            train_loss = self.loss()
            train_loss_list.append(train_loss)

            vali_preds = self.predict()
            vali_rmse = RMSE(self.v_rats, vali_preds)
            vali_rmse_list.append(vali_rmse)

            print('traning iteration:{: d} ,loss:{: f}, vali_rmse:{: f}'.format(it, train_loss, vali_rmse))

            if last_vali_rmse and (last_vali_rmse - vali_rmse) <= 0:
                print('convergence at iterations:{: d}'.format(it))
                break
            else:
                last_vali_rmse = vali_rmse

        return self.U, self.V, train_loss_list, vali_rmse_list

In [57]:
model = PMF(full_csr, valid_csr, params=parameters)

In [58]:
%%time
U, V, train_loss_list, vali_rmse_list = model.train()

> [1;32m<ipython-input-56-0f6bfdef95ba>[0m(48)[0;36mtrain[1;34m()[0m
[1;32m     46 [1;33m            [1;31m# derivate of Vi[0m[1;33m[0m[1;33m[0m[1;33m[0m[0m
[0m[1;32m     47 [1;33m            [1;32mimport[0m [0mpdb[0m[1;33m;[0m[0mpdb[0m[1;33m.[0m[0mset_trace[0m[1;33m([0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[0m[1;32m---> 48 [1;33m            [0mgrads_u[0m [1;33m=[0m [0mnp[0m[1;33m.[0m[0mdot[0m[1;33m([0m[0mself[0m[1;33m.[0m[0mI[0m[1;33m*[0m[1;33m([0m[0mself[0m[1;33m.[0m[0mR[0m[1;33m-[0m[0mnp[0m[1;33m.[0m[0mdot[0m[1;33m([0m[0mself[0m[1;33m.[0m[0mU[0m[1;33m,[0m [0mself[0m[1;33m.[0m[0mV[0m[1;33m.[0m[0mT[0m[1;33m)[0m[1;33m)[0m[1;33m,[0m [1;33m-[0m[0mself[0m[1;33m.[0m[0mV[0m[1;33m)[0m [1;33m+[0m [0mself[0m[1;33m.[0m[0m_lambda_alpha[0m[1;33m*[0m[0mself[0m[1;33m.[0m[0mU[0m[1;33m[0m[1;33m[0m[0m
[0m[1;32m     49 [1;33m[1;33m[0m[0m
[0m[1;32m     50 [1;33m     

BdbQuit: 

MF 쓰고나면 Latent Matrix간의 내적으로 인해 결국 엄청 큰 DenseMatrix 나오게됨<br>
근데 이거 메모리 터짐 어케해결?<br>
일단 라이브러리 소스 참고해보기.