In [1]:
import numpy as np
import pickle
import os
import tensorflow as tf

  from ._conv import register_converters as _register_converters


In [2]:
D = 5
K = 6
N = 10

In [3]:
ratings = [5,5,4,3,0]

In [4]:
W = {}

In [103]:
for k in range(K):
    W['w_'+str(k)] = np.random.randint(0,high = 5,size=(D,N) ,dtype='int16')

In [6]:
W2 = np.zeros((D,K,N),dtype='int16')

In [7]:
for k in range(K):
    for d in range(D): 
        W2[d][k] = W['w_'+str(k)][d]

In [8]:
W2.shape

(5, 6, 10)

In [399]:
class LRR_Model:
    global W2
    W = W2.astype('float64')
    SCORE_SQUARE = False
    T = 1000 # max iteration
    
    K = None # num of aspects
    D = None # num of documents
    N = None # num of words
    
    mu = None
    sigma = None
    sigma_inv = None
    delta_square = None
    beta = None
    
    aspect_weight = None # alpha
    alpha_hat = None 
    
    aspect_rating = None # S
    
    ratings = None
    
    _old_aspect_weight = None # old alpha
    _old_aspect_rating = None
    _old_beta = None
    
    def __init__(self,k,d,n,ratings:list):
        self.K = k
        self.D = d
        self.N = n
        self.ratings = np.array(ratings,dtype='float64')
        
        self.mu = np.array(np.random.rand(self.K) * 2.0 - np.ones(self.K) ,dtype='float64')
        self.sigma = np.array(np.identity(self.K),dtype='float64')
        self.sigma_inv = np.linalg.inv(self.sigma)
        
        self.beta = np.tile((2*np.random.rand(self.K, self.N)-1.0)/10,self.D).reshape(self.D,self.K,self.N) # W와 차원맞추려고 D만큼 tile
        
        self.delta_square = np.ones(1,dtype='float64')
        
        self.alpha_hat = np.random.normal(loc = self.mu, scale= self.sigma.diagonal(),size=(D,K) )
        
        self.aspect_weight = np.zeros((D,K))
        for d in range(D):
            self.aspect_weight[d] = np.exp(self.alpha_hat[d])/np.sum(np.exp(self.alpha_hat[d]))
            
        self._old_aspect_weight = np.zeros((D,K))
        self._old_aspect_rating = np.zeros((D,K))
        self._old_beta = np.zeros((D,K,N))
    
#     def calc_covariance(self,vct):
#         '''
#         :param vct:
#         :return:

#         double sum = 0, s;
# 		for(int i=0; i<m_k; i++){
# 			s = 0;
# 			for(int j=0; j<m_k; j++)
# 				s += vct[j] * m_sigma_inv[j][i];
# 			sum += s * vct[i];
# 		}
# 		return sum;

#         '''

#     def calc_det(self):
#         return np.linalg.det(self.sigma)

#     def calc_sigma_inv(self,scale):
#         self.sigma_inv = np.linalg.inv(self.sigma) * scale

    def save(self,file_name:str):
        print(os.path.join(os.getcwd(), os.path.join('model', file_name)))
        with open(os.path.join(os.getcwd(), os.path.join('model', file_name)), 'wb') as fp:
            pickle.dump(self,fp)

    @classmethod
    def load(cls,file_name:str):
        try:
            with open(os.path.join(os.getcwd(),os.path.join('model',file_name)),'rb') as fp:
                return pickle.load(fp)
        except Exception as e:
            print(e)
    
    def _set_aspect_rating(self):
        self._old_aspect_rating = self.aspect_rating
        tmp_aspect_rating = np.zeros((D,K))
        
        if self.SCORE_SQUARE==True:
            for d in range(D):
                tmp_aspect_rating[d] = 0.5*np.sum(np.multiply(self.beta[0],self.W[d]),axis=1)#to avoid negative rating 
            self.aspect_rating = tmp_aspect_rating
        else:
            for d in range(D):
                tmp_aspect_rating[d] = np.exp(np.sum(np.multiply(self.beta[0],self.W[d]),axis=1))#to avoid negative rating 
            self.aspect_rating = tmp_aspect_rating
            
    def _infer_aspect_weight(self):
        for d in range(D): # set the old aspect_weight
            self._old_aspect_weight[d] = np.exp(self.alpha_hat[d])/np.sum(np.exp(self.alpha_hat[d]))
            
        alpha_hat = tf.Variable(self.alpha_hat, name = 'aspect_weight_hat',dtype=tf.float64)
        
        mu = tf.placeholder_with_default(self.mu,shape=(K),name='mu')
        sigma_inv = tf.placeholder_with_default(self.sigma_inv,shape=(K,K),name='sigma_inv')
        delta_square = tf.placeholder_with_default(self.delta_square,shape=(1),name='delta_square')
        s = tf.placeholder_with_default(self.aspect_rating,shape=(D,K),name = 'aspect_rating')
        ratings = tf.placeholder_with_default(self.ratings,shape=(D),name = 'ratings')

#        # To get gradient
#         preceding_tmp = np.zeros((D,K),dtype='float32')
#         for d in range(D):
#             preceding_tmp[d] = (((self.ratings - np.matmul(self.alpha_hat_T[0],self.aspect_rating_T[0].T).diagonal())[d]*self.aspect_rating_T[0][d]) )

#         preceding = tf.placeholder_with_default(preceding_tmp,shape=(D,K),name='precending')
#         grad =  preceding/tf.square(delta) + tf.matmul((alpha_hat-mu),sigma_inv)

        loss = tf.square( ratings - tf.linalg.diag_part(tf.matmul(alpha_hat,tf.transpose(s)) ))/(2*tf.square(delta_square)) + 0.5*tf.matmul(tf.matmul(tf.subtract(alpha_hat,mu), sigma_inv),tf.transpose(tf.subtract(alpha_hat,mu)))

        optimizer = tf.train.GradientDescentOptimizer(0.01)
        train = optimizer.minimize(loss)
        
        with tf.Session() as sess0:
            sess0.run(tf.global_variables_initializer())

            for step in range(201):
                res = sess0.run([alpha_hat,train])
                if step % 20 == 0:
                    print(step,':')
                    print('loss:',sess0.run(loss),'\n') 
                    print('alpha_hat:',sess0.run(alpha_hat),'\n')
        
        self.alpha_hat = res[0] # set a new alpha_hat
        for d in range(D): # set a new aspect_weight
            self.aspect_weight[d] = np.exp(self.alpha_hat[d])/np.sum(np.exp(self.alpha_hat[d]))

    
    def e_step(self):
        self._set_aspect_rating() # step 1: estimate aspect rating & update
        self._infer_aspect_weight() # step 2: infer aspect weight & update
    
    def _infer_beta(self):
        self._old_beta = self.beta
        
        # 텐서플로까지(beta_rep이 Variable) 한번에 코드짤수있는 방안??
#         W_beta = np.zeros(self.D,self.K,self.N) # multiply of W & beta
#         for d in range(D):
#             W_beta = np.multiply(self.beta,self.W[d])
         
        beta = tf.Variable(self.beta, name = 'beta_rep',dtype=tf.float64)
        W = tf.placeholder_with_default(self.W,shape=(self.D,self.K,self.N),name='W')
        alpha = tf.placeholder_with_default(self.aspect_weight,shape=(self.D,self.K),name='aspect_weight')
    
        delta_square = tf.placeholder_with_default(self.delta_square,shape=None,name='delta_square')
        sigma_inv = tf.placeholder_with_default(self.sigma_inv,shape=(self.K,self.K),name='sigma_inv')
        ratings = tf.placeholder_with_default(self.ratings,shape=(self.D),name = 'ratings')
        
        loss = tf.reduce_sum(
                tf.divide(
                              tf.square(tf.subtract(ratings, tf.reduce_sum(tf.multiply(alpha,
                                                                                  tf.reduce_sum(
                                                                                        tf.multiply(beta,W)
                                                                                        ,axis=2)
                                                                                       )
                                                                    ,axis=1)
                                                    )
                                        )
                                ,delta_square)
                            )
    
        optimizer = tf.train.GradientDescentOptimizer(0.01)
        train = optimizer.minimize(loss)
        
        with tf.Session() as sess1:
            sess1.run(tf.global_variables_initializer())

            for step in range(201):
                res = sess1.run([beta,train])
                if step % 20 == 0:
                    print(step,':')
                    print('loss:',sess1.run(loss),'\n') 
                    print('beta:',sess1.run(beta),'\n')
        
            self.beta = res[0] # set a new beta
            
    
    
    def m_step(self): # m-step can only be applied to training samples
        # updateSigma = false; // shall we update Sigma?
        # Step 0: initialize the statistics
        # Step 1: ML for \mu
        self.mu = np.sum(self.aspect_weight,axis=0)/self.D
        # Step 2: ML for \sigma
        #######################
        
        # Step 3: ML for \delta
        self.delta_square = np.sum(np.square( self.ratings - np.diagonal(np.matmul(self.aspect_weight, np.transpose(self.aspect_rating)) ))) / self.D
        
        #Step 4: ML for \beta
        self._infer_beta()
        
        
        
    
    def em_est(self):#,converge:float):
        self.e_step()
        self.m_step()

In [401]:
model = LRR_Model(6,5,10,ratings)

In [402]:
model.save('test.model')

C:\Users\Chankoo\Desktop\GitHub\BOAZ-projects\airbnb-NLP\model\test.model


In [403]:
#model = LRR_Model.load('test.model')

In [404]:
model._set_aspect_rating()

In [405]:
model._old_aspect_rating

In [406]:
model.aspect_rating

array([[2.11826065, 1.10394819, 0.72307824, 1.68018556, 1.58267477,
        1.21720444],
       [1.2824899 , 1.40038486, 1.35366889, 1.32991356, 1.77380115,
        0.81468627],
       [0.91501496, 1.04434442, 1.17741458, 1.44043268, 0.87325847,
        1.71084035],
       [2.25749106, 1.35860304, 1.235249  , 1.69685702, 1.30444909,
        1.39400391],
       [1.37683517, 1.25256023, 1.53679727, 1.54773521, 1.44246026,
        1.00764324]])

In [407]:
model.aspect_rating

array([[2.11826065, 1.10394819, 0.72307824, 1.68018556, 1.58267477,
        1.21720444],
       [1.2824899 , 1.40038486, 1.35366889, 1.32991356, 1.77380115,
        0.81468627],
       [0.91501496, 1.04434442, 1.17741458, 1.44043268, 0.87325847,
        1.71084035],
       [2.25749106, 1.35860304, 1.235249  , 1.69685702, 1.30444909,
        1.39400391],
       [1.37683517, 1.25256023, 1.53679727, 1.54773521, 1.44246026,
        1.00764324]])

In [408]:
model._old_aspect_weight

array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]])

In [409]:
model.mu

array([-0.71520072,  0.50989094, -0.21517585, -0.72040505, -0.07151694,
       -0.38867586])

In [410]:
model.em_est()

0 :
loss: [[ 7.16529901  1.73327913  6.35468351  1.40723929  1.0460347 ]
 [ 4.27474741  3.47694906  6.22662588  1.19071227  0.85586205]
 [ 3.01622958  0.34670367  8.32856389  1.15967576 -0.26165504]
 [ 3.96210733  1.20411201  7.05299772  3.61101023 -0.81043056]
 [ 4.30994293  1.57830199  6.34070711 -0.10139037  3.92051961]] 

alpha_hat: [[-1.05743812  1.48749914  1.23611213 -0.79343957  1.13492674  1.37892827]
 [ 0.02925166  0.85002129 -0.2181315  -0.10543415  2.00796208 -0.70213002]
 [ 0.23703135  0.94678825 -0.52342315  0.22542262 -1.05301934  0.21414182]
 [ 1.103298   -0.13911595  1.13296708 -1.05509115 -0.02177842 -0.064055  ]
 [-0.76413286  3.06346079 -0.19552147 -1.46864721  0.45396027 -1.05434184]] 

20 :
loss: [[ 4.26737887  1.82105744  1.58870389  1.02596875  0.60012781]
 [ 1.80226085  3.57585881 -0.1022235   0.55422001  0.86300513]
 [ 1.55955289 -0.1125779   3.71026996  1.20528766 -0.57793307]
 [ 1.03368253  0.58073039  1.24215244  3.43303063 -1.18436126]
 [ 0.58374394  0.865

In [412]:
model.aspect_weight

array([[0.14787255, 0.13369588, 0.03522124, 0.07238946, 0.35929065,
        0.25153021],
       [0.00947933, 0.0924634 , 0.0354332 , 0.01807706, 0.84165786,
        0.00288915],
       [0.00638824, 0.06254451, 0.03437105, 0.11495219, 0.00272998,
        0.77901403],
       [0.77127825, 0.03127117, 0.09905916, 0.02545868, 0.02007528,
        0.05285747],
       [0.00979442, 0.77321249, 0.11443147, 0.01666097, 0.07885672,
        0.00704392]])

In [413]:
model.mu

array([0.18896256, 0.21863749, 0.06370322, 0.04950767, 0.2605221 ,
       0.21866696])

In [414]:
model.delta_square

6.301807399212153