In [1]:
import numpy as np
from scipy import spatial
import bookingreview
import preprocessreview

# BookingReview interface

class BookingReview(object):
    def __init__(self, company, id, rate, context, post_time, label,
                 review_id):
        self.company = company
        self.id = id
        self.rate = rate
        self.context = context
        self.post_time = post_time
        self.label = label
        self.review_id = review_id
        
        self.db_node = None
        
    def __str__(self):
        return ("BookingReview object {6}:\n"
                "  Company name = {0}\n"
                "  ID = {1}\n"
                "  Rating = {2}\n"
                "  Current Context = {3}\n"
                "  Post time = {4}\n"
                "  Spam/Ham = {5}\n"
                .format(self.company, self.id, self.rate,
                        self.context, self.post_time, self.label,
                        self.review_id))
    
    def __lt__(self, cmp):
        if(self.id > cmp.id):
            return False
        elif(self.id < cmp.id):
            return True
        elif(self.post_time > cmp.post_time):
            return False
        else:
            return True

# PreprocessReview interface

class PreprocessReview(object):
    def __init__(self, company_postag, company_word2vec, context_postag, context_word2vec, 
                 id, rate, context, post_time, label, review_id):
        self.company_postag = company_postag
        self.company_word2vec = company_word2vec
        self.context_postag = context_postag
        self.context_word2vec = context_word2vec
        
        self.id = id
        self.rate = rate
        self.post_time = post_time
        self.label = label
        self.review_id = review_id
        
        self.db_node = None
        
    def __str__(self):
        return "PreprocessReview object:\n"

# Implementation of ReviewDB

In [2]:
class ReviewNode:
    def __init__(self, val, parent):
        self.val = val
        self.leftChild = None
        self.rightChild = None
        self.parent = parent
        
        val.db_node = self
    
    def get(self):
        return self.val
    
    def set(self, val):
        self.val = val
        
    def max_value(self):
        current = self
        
        while(current is not None):
            if current.rightChild is None:
                break
            current = current.rightChild
            
        return current
    
    def previous_node(self, num = 1):
        if(num == 0):
            return self
        
        if self.leftChild is not None:
            return self.leftChild.max_value().previous_node(num-1)
        
        n = self
        p = self.parent
        while(p is not None):
            if n != p.leftChild:
                break
            n = p
            p = p.parent
            
        if p is None:
            return None
        
        return p.previous_node(num-1)

    def __str__(self):     
        return "ReviewNode - "+self.val.__str__() + '\n'
    
        
class ReviewTree:
    def __init__(self):
        self.root = None

    def set_root(self, val):
        self.root = ReviewNode(val, None)

    def insert(self, val):
        if(self.root is None):
            self.set_root(val)
        else:
            self.insert_node(self.root, val)

    def insert_node(self, currentNode, val):
        current = currentNode
        
        while(True):
            if(val < current.val):
                if(current.leftChild != None):
                    current = current.leftChild
                else:
                    current.leftChild = ReviewNode(val, currentNode)
                    break
            else:
                if(current.rightChild != None):
                    current = current.rightChild
                else:
                    current.rightChild = ReviewNode(val, currentNode)
                    break
                
                
    def __str__(self):
        if self.root is None:
            return "empty"
        
        return self.to_str(self.root)
    
    def to_str(self, node):
        string = ""
        if node.leftChild is not None:
            string = string + self.to_str(node.leftChild)
        
        string = string + node.__str__() + '\n'
        
        if node.rightChild is not None:
            string = string + self.to_str(node.rightcompanyChild)
        
        return string

In [3]:
class ReviewDB(object):
    def __init__(self):
        self.review_dict = {}
        self.review_tree = ReviewTree()
        self.id_dict = {}
    
    def add_review_list(self, bookingReview_list):
        for review in bookingReview_list:
            if not (review.review_id in self.review_dict):
                self.review_dict[review.review_id] = review
                self.review_tree.insert(review)
                
                #if not (review.id in self.id_dict):
                #    self.id_dict[review.id] = len(self.id_dict)
                
    def add_review(self, bookingReview):
        if not (bookingReview.review_id in self.review_dict):
            self.review_dict[bookingReview.review_id] = bookingReview
            self.review_tree.insert(bookingReview)      
        
    def get_review(self, review_id):
        if (review_id in self.review_dict):
            return self.review_dict[review_id]
        
    def get_id_spamRecord (self, id):
        if (id in self.id_dict):
            return (self.id_dict[id][0]/self.id_dict[id][1])
        else:
            return 0.0
    
    def add_spam_result (self, id, result):
        accumulate = 0
        if result:
            accumulate = 1
            
        if (id in self.id_dict):
            self.id_dict[id][0] += accumulate
            self.id_dict[id][1] += 1
        else:
            self.id_dict[id] = (accumulate, 1)
                
    def size(self):
        return len(self.review_dict)
    
    def __str__(self):
        return self.review_tree.__str__()

# Implementation of FormattedReview

In [4]:
class FormattedReview(object):
    reviewDB = None
    def __init__(self, preprocessReview):
        
        bookingReview = preprocessReview.db_node.val
        
        self.context = preprocessReview.context_word2vec
        self.context_bayes = preprocessReview.context_postag
        self.calc_comp_similarity(preprocessReview)
        self.rate = preprocessReview.rate / 10
        self.reiteration_context = self.calc_reiteration_context(bookingReview)
        self.reiteration_repeat = self.calc_reiteration_repeat(bookingReview)
        self.post_time = preprocessReview.post_time % 1
        self.post_vip = (int(preprocessReview.post_time) % 7) / 7
        
        self.id = self.reviewDB.get_id_spamRecord(preprocessReview.id)
        
        self.label = preprocessReview.label
        self.review_id = preprocessReview.review_id
    
    def calc_comp_similarity(self, preprocessReview):
        max_sim = -1;
        for company_vec in preprocessReview.company_word2vec:
            for context_vec in preprocessReview.context_word2vec:
                cos_sim = 1 - spatial.distance.cosine(company_vec, context_vec)
                max_sim = max_sim if (max_sim > cos_sim) else cos_sim
        
        self.comp_similarity = max_sim
    
    def calc_reiteration_context(self, bookingReview, num = 1):
        if num > 10:         # reiteration_context 최대 수치는 1
            return 0
        
        prev_review = bookingReview.db_node.previous_node(num)
        if prev_review is None:
            return 0

        prev_review = prev_review.val
        
        if(prev_review.id == bookingReview.id     #리뷰어 동일
               and prev_review.context == bookingReview.context   #텍스트 내용 동일
               and bookingReview.post_time - prev_review.post_time < 30):   #한달 이내 작성
            return 0.1 + self.calc_reiteration_context(bookingReview, num+1)
        
        
    def calc_reiteration_repeat(self, bookingReview, num = 1):     
        prev_review = bookingReview.db_node.previous_node(num)
        if prev_review is None:
            return 0

        prev_review = prev_review.val
        
        if(prev_review.company == bookingReview.company      #업체명 동일
               and prev_review.id == bookingReview.id):     #리뷰어 동일
            
            time_diff = bookingReview.post_time - prev_review.post_time
            
            if(time_diff < 1): #하루 이내 작성
                val = 0.1+self.calc_reiteration_repeat(bookingReview, num+1)
            
            elif(time_diff < 365): #1년 이내 작성
                val = 0.1*time_diff/365+self.calc_reiteration_repeat(bookingReview, num+1)
            
            else:
                val = 0.1
                
            return val if val<1.0 else 1.0
        
        else:
            return 0
        
    @classmethod    
    def setDB(self, reviewDB):
        self.reviewDB = reviewDB
    
    def __str__(self):
        return ("FormattedReview object {0}:\n"
                "  context = \n{1}\n"
                "  context_bayes = \n{9}\n"
                "  comp_similarity = {2}\n"
                "  rate = {3}\n"
                "  reiteration_context = {4}\n"
                "  reiteration_repeat = {5}\n"
                "  post_time = {6}\n"
                "  post_vip = {7}\n"
                "  label = {8}\n"
                .format(self.review_id, self.context, self.comp_similarity,
                        self.rate, self.reiteration_context, self.reiteration_repeat,
                        self.post_time, self.post_vip, self.label, self.context_bayes))


# 리뷰 생성 테스트 코드

In [9]:
test_review = bookingreview.BookingReview("회사", "asdf", 10, "너무 맛있어용", 400000, 0, 1)
#print(test_review)

company = np.array([[0,1,2,3,4,5,6,7,8,9], [10,11,12,13,14,15,16,17,18,19]])
context = np.array([[10,11,12,13,14,15,16,17,15,19], [9,8,7,6,5,4,3,2,1,0], [2,7,4,6,8,1,7,4,8,3]])
                    
test_review1 = bookingreview.BookingReview(company, "asdf", 8, context, 400000.26436, 0, 1)
#print(test_review2)
#print(test_review2.post_time)

test_list = [test_review1]


test_list.append(bookingreview.BookingReview(company+2, "2222", 2, context+9, 400001.22222, 0, 2))
test_list.append(bookingreview.BookingReview(company+2, "2222", 3, context+8, 400001.33333, 0, 3))
test_list.append(bookingreview.BookingReview(company+2, "2222", 4, context+7, 400001.44432, 0, 4))
test_list.append(bookingreview.BookingReview(company+5, "5555", 5, context+6, 400236.55622, 1, 5))
test_list.append(bookingreview.BookingReview(company+6, "7777", 6, context+5, 400348.66232, 0, 6))
test_list.append(bookingreview.BookingReview(company+6, "7777", 7, context+4, 400347.77482, 1, 7))
test_list.append(bookingreview.BookingReview(company+8, "5555", 8, context+3, 400236.88592, 0, 8))
test_list.append(bookingreview.BookingReview(company+2, "2222", 9, context+2, 400457.99922, 1, 9))
test_list.append(bookingreview.BookingReview(company+10, "asdf", 10, context+1, 399910.02642, 0, 10))


for item in test_list:
    print(item)


reviewDB = ReviewDB()
reviewDB.add_review_list(test_list)

BookingReview object:
  Company name = [[ 0  1  2  3  4  5  6  7  8  9]
 [10 11 12 13 14 15 16 17 18 19]]
  ID = asdf
  Rating = 8
  Context = [[10 11 12 13 14 15 16 17 15 19]
 [ 9  8  7  6  5  4  3  2  1  0]
 [ 2  7  4  6  8  1  7  4  8  3]]
  Post time = 400000.26436
  Label = 0
  Review id = 1

BookingReview object:
  Company name = [[ 2  3  4  5  6  7  8  9 10 11]
 [12 13 14 15 16 17 18 19 20 21]]
  ID = 2222
  Rating = 2
  Context = [[19 20 21 22 23 24 25 26 24 28]
 [18 17 16 15 14 13 12 11 10  9]
 [11 16 13 15 17 10 16 13 17 12]]
  Post time = 400001.22222
  Label = 0
  Review id = 2

BookingReview object:
  Company name = [[ 2  3  4  5  6  7  8  9 10 11]
 [12 13 14 15 16 17 18 19 20 21]]
  ID = 2222
  Rating = 3
  Context = [[18 19 20 21 22 23 24 25 23 27]
 [17 16 15 14 13 12 11 10  9  8]
 [10 15 12 14 16  9 15 12 16 11]]
  Post time = 400001.33333
  Label = 0
  Review id = 3

BookingReview object:
  Company name = [[ 2  3  4  5  6  7  8  9 10 11]
 [12 13 14 15 16 17 18 19 20 21

# 지금은 작동 안하는 테스트 코드

In [10]:
FormattedReview.reviewDB = reviewDB

test = [FormattedReview(review) for review in test_list]

for item in test:
    print(item)
    
#print("\nreviewDB - \n")
#print(reviewDB)

AttributeError: 'BookingReview' object has no attribute 'context_word2vec'