In [3]:
import pandas as pd
import os
import numpy as np

df = pd.read_csv('./input/data-house-warming.csv', sep='\t', encoding="UTF-16")

In [4]:
purchased_product_ids_per_row = df['purchased_product_ids'].values

In [5]:
house_warming_product_ids = set()
for purchased_product_ids in purchased_product_ids_per_row:
    items = purchased_product_ids.replace('[', '').replace(']', '').split(', ')
    house_warming_product_ids.update(items)
house_warming_product_ids.remove('')

In [6]:
house_warming_product_ids

{'106711',
 '45092',
 '38184',
 '75809',
 '33549',
 '282241',
 '13524',
 '169650',
 '342848',
 '157582',
 '152642',
 '104344',
 '83659',
 '1411',
 '200461',
 '173388',
 '24473',
 '3934',
 '132914',
 '370536',
 '145716',
 '71304',
 '228',
 '421952',
 '158128',
 '28655',
 '319878',
 '543116',
 '309408',
 '15334',
 '405167',
 '169446',
 '89741',
 '15856',
 '86685',
 '61367',
 '74573',
 '468121',
 '81777',
 '175122',
 '277486',
 '150145',
 '434',
 '602897',
 '203517',
 '471603',
 '239060',
 '453666',
 '262358',
 '232886',
 '126962',
 '32133',
 '36267',
 '102289',
 '13912',
 '289321',
 '406385',
 '502409',
 '170784',
 '29653',
 '57888',
 '37881',
 '241101',
 '129579',
 '149548',
 '77918',
 '436127',
 '238345',
 '68916',
 '36719',
 '36448',
 '3940',
 '308620',
 '1970',
 '270153',
 '4484',
 '13897',
 '206840',
 '40109',
 '2440',
 '1784',
 '111698',
 '410200',
 '154299',
 '86426',
 '81831',
 '31445',
 '52734',
 '148329',
 '152645',
 '252',
 '104745',
 '577398',
 '153852',
 '104351',
 '340164',

In [65]:
df.loc[df['purchased_product_ids'].isin(str(270153))]

TypeError: only list-like objects are allowed to be passed to isin(), you passed a [str]

In [8]:
# 웹 사이트로부터 데이터를 얻어오는 클래스
import requests
import json

class Requester:
    def get_data(self, url, parameters = {}):
        if (url is None) or (url.strip() is ''):
            raise Exception('url should not be empty')
        
        response = requests.get(url.strip(), params = parameters)
        if response.status_code is not 200:
            print('[ERROER]Can not receive response')

        response_text = (response.text is None) and '{}' or response.text
        json_response = json.loads(response_text)
        return json_response

In [9]:
# https://ohou.se/productions/41655/selling.json
from time import sleep

class ProductCrawler:
    def __init__(self, house_warming_product_ids = [], house_warming_df = None):
        self.products = list()
        self.house_warming_product_ids = house_warming_product_ids
        self.requester = Requester()
        self.product_n_reviews = list()
        self.house_warming_df = house_warming_df
        self.default_page_size = 5000
    
    def get_product_detail(self, product_id):
        url = 'https://ohou.se/productions/'+ str(product_id) + '/selling.json'
        response = self.requester.get_data(url)
        return response
    
    def collect_production(self, production = {}):
        self.products.append({
            'id': production['id'],
            'name': production['name'],
            'review_count': production['review_count'], #  제품에 남겨진 리뷰 총 갯수
            'review_average': production['review_avg'],  # 제품에 남겨진 평점 평균
            'scrap_count': production['scrap_count'],  # 제품을 스크랩한 총 갯수
            'view_count': production['view_count'],  # 제품을 본 횟수
            #'review_text': '' # 이 부분은 아래 코드 블록에서 수행 예정
        })
        
    def calculat_total_page(self, total_item_count):
        if (total_item_count is None) or (total_item_count <= 0):
            return 1
        total_page = (total_item_count // self.default_page_size)
        if (total_item_count % self.default_page_size) is not 0:
            total_page = total_page + 1
        return total_page
    
    def get_production_review_data_by_page(self, product_id, page):
        parameters = {
             'order': 'best', # 고정 queryParam 값. 베스트 리뷰순으로 가져오기위한 정렬값
             'per': str(self.default_page_size), # 고정 queryParam 값. 페이에 출력될 아이템 갯수
             'production_id': str(product_id),
             'page': str(page)
        }
        url = 'https://ohou.se/production_reviews.json'
        response = self.requester.get_data(url, parameters)
        return response
    
    def collect_product_review(self, product_review_template, reviews):
        for review in reviews:
            new_product_n_review = product_review_template.copy()
            product_review_template['review_user_id'] = review['writer_id']
            product_review_template['review_status'] = review['status']
            product_review_template['review_text'] = review['review']['comment']
            product_review_template['review_star_durability'] = review['review']['star_durability']
            product_review_template['review_star_design'] = review['review']['star_design']
            product_review_template['review_star_cost'] = review['review']['star_cost']
            product_review_template['review_star_delivery'] = review['review']['star_delivery']
            product_review_template['review_star_avg'] = review['review']['star_avg']
            product_review_template['product_info_id'] = review['production_information']['id']
            product_review_template['production_info_name'] = review['production_information']['name']
            product_review_template['production_info_brand_name'] = review['production_information']['brand_name']
            product_review_template['production_info_explain'] = review['production_information']['explain']
            product_review_template['production_info_is_purchased'] = review['production_information']['is_purchased']
            
            self.product_n_reviews.append(new_product_n_review)
        
    def collect(self):
        self.products = list()
        self.product_n_reviews = list()
        if (self.house_warming_product_ids is None) or (len(self.house_warming_product_ids) <= 0):
            return
        
        for product_id in self.house_warming_product_ids:
            response = self.get_product_detail(product_id)
            if (response['production'] is not None) and (len(response['production']) > 0):
                self.collect_production(response['production'])
                
                current_page = 1
                response = self.get_production_review_data_by_page(self.products[len(self.products) - 1]['id'], current_page)
                if (response['reviews'] is not None) and (len(response['reviews']) > 0):
                    self.collect_product_review(self.products[len(self.products) - 1], response['reviews'])

                total_page = self.calculat_total_page(response['count']) # count: 현재 제품의 모든 리뷰 갯수
                print('>> This product has total_page:', total_page)
                if current_page is total_page:
                    continue

                while (current_page <= total_page):
                    current_page = current_page + 1
                    print('>> Collect reviews at page:', current_page)
                    response = self.get_production_review_data_by_page(product_id, current_page)
                    if (response['reviews'] is not None) and (len(response['reviews']) > 0):
                        self.collect_product_review(self.products[len(self.products) - 1], response['reviews'])
                    sleep(3)

In [10]:
house_warming_product_crawler = ProductCrawler(house_warming_product_ids = house_warming_product_ids, house_warming_df = df)
house_warming_product_crawler.collect()

>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This produc

>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This produc

>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This produc

>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This product has total_page: 1
>> This produc

KeyError: 'reviews'

In [11]:
house_warming_product_crawler.product_n_reviews

[{'id': 106711,
  'name': 'patagonia black hole 90l duf',
  'review_count': 1,
  'review_average': 5.0,
  'scrap_count': 2,
  'view_count': 378},
 {'id': 38184,
  'name': '스톰트루퍼 알람시계',
  'review_count': 4,
  'review_average': 4.5,
  'scrap_count': 43,
  'view_count': 686},
 {'id': 38184,
  'name': '스톰트루퍼 알람시계',
  'review_count': 4,
  'review_average': 4.5,
  'scrap_count': 43,
  'view_count': 686,
  'review_user_id': 1241803,
  'review_status': '확인완료',
  'review_text': '인테리어 소품으로도 만족도가 높아요 ㅎ',
  'review_star_durability': 0,
  'review_star_design': 0,
  'review_star_cost': 0,
  'review_star_delivery': 0,
  'review_star_avg': 5.0,
  'product_info_id': 38184,
  'production_info_name': '스톰트루퍼 알람시계',
  'production_info_brand_name': '레고 스타워즈',
  'production_info_explain': '',
  'production_info_is_purchased': False},
 {'id': 38184,
  'name': '스톰트루퍼 알람시계',
  'review_count': 4,
  'review_average': 4.5,
  'scrap_count': 43,
  'view_count': 686,
  'review_user_id': 1034459,
  'review_status': '확

In [24]:
house_warming_product_review_df = pd.DataFrame(house_warming_product_crawler.product_n_reviews)
house_warming_product_review_df.head(10)

Unnamed: 0,id,name,review_count,review_average,scrap_count,view_count,review_user_id,review_status,review_text,review_star_durability,review_star_design,review_star_cost,review_star_delivery,review_star_avg,product_info_id,production_info_name,production_info_brand_name,production_info_explain,production_info_is_purchased
0,106711,patagonia black hole 90l duf,1,5.0,2,378,,,,,,,,,,,,,
1,38184,스톰트루퍼 알람시계,4,4.5,43,686,,,,,,,,,,,,,
2,38184,스톰트루퍼 알람시계,4,4.5,43,686,1241803.0,확인완료,인테리어 소품으로도 만족도가 높아요 ㅎ,0.0,0.0,0.0,0.0,5.0,38184.0,스톰트루퍼 알람시계,레고 스타워즈,,False
3,38184,스톰트루퍼 알람시계,4,4.5,43,686,1034459.0,확인완료,스톰트루퍼 레고시계는 화이트인테리어와도 잘 어울려서 만족스러워요~~~,0.0,0.0,0.0,0.0,5.0,38184.0,스톰트루퍼 알람시계,레고 스타워즈,,False
4,38184,스톰트루퍼 알람시계,4,4.5,43,686,5980427.0,확인완료,"모닝벨 몇년째 저와 함께 하고 있어요 , 너무 귀여워서 완전 추천해요 ㅎㅎㅎㅎ",0.0,0.0,0.0,0.0,5.0,38184.0,스톰트루퍼 알람시계,레고 스타워즈,,False
5,75809,Avellino 수납형 패브릭 소파베드,5,3.95,388,11535,,,,,,,,,,,,,
6,75809,Avellino 수납형 패브릭 소파베드,5,3.95,388,11535,2186610.0,확인완료,주문일 8.27일 배송일 9.12일 대전인데도 배송이 16일 걸렸네요.\n배송예정...,3.0,3.0,3.0,1.0,2.5,75809.0,Avellino 수납형 패브릭 소파베드,아리아퍼니쳐,본품,True
7,75809,Avellino 수납형 패브릭 소파베드,5,3.95,388,11535,7668475.0,확인완료,여러모로 디자인과 실용성은 확실히 좋아요.\n그런데 배송이 엄청 늦는편이고 수납공간...,3.0,4.0,2.0,1.0,2.5,75809.0,Avellino 수납형 패브릭 소파베드,아리아퍼니쳐,본품,True
8,75809,Avellino 수납형 패브릭 소파베드,5,3.95,388,11535,1935583.0,확인완료,티비 선반을 안놓기로 했더니 물건 넣어들 곳이 필요해서 수납 가능한 쇼파베드로 구매...,5.0,5.0,5.0,5.0,5.0,75809.0,Avellino 수납형 패브릭 소파베드,아리아퍼니쳐,본품,True
9,75809,Avellino 수납형 패브릭 소파베드,5,3.95,388,11535,6121565.0,확인완료,* 배송도 빨랐구요 (주말포함 일주일 정도)\n배송기사님 너무 너무 친절하시고 꼼꼼...,5.0,5.0,5.0,5.0,5.0,75809.0,Avellino 수납형 패브릭 소파베드,아리아퍼니쳐,본품,True


In [18]:
origin_review_df = pd.read_csv('./input/data-product-review-v1.csv', sep='\t', encoding="UTF-16")
len(origin_review_df)

59898

In [28]:
new_df = pd.concat([origin_review_df, house_warming_product_review_df])
len(new_df)

98484

In [32]:
new_df[98483:]

Unnamed: 0.1,Unnamed: 0,id,name,review_count,review_average,scrap_count,view_count,review_user_id,review_status,review_text,review_star_durability,review_star_design,review_star_cost,review_star_delivery,review_star_avg,product_info_id,production_info_name,production_info_brand_name,production_info_explain,production_info_is_purchased
38585,,388715,순수원목 A사이드테이블 3colors,28574,4.53,20610,162635,7101314.0,확인완료,조립 매우쉽고 이뻐요 강추입니다~~~,5.0,5.0,5.0,5.0,5.0,388715.0,순수원목 A사이드테이블 3colors,먼데이하우스,상품명: A사이드테이블 / 색상: 화이트,True


In [39]:
new_df.to_csv("./input/data-product-review-v2.csv", sep='\t', na_rep='', encoding="UTF-16")

In [40]:
pd.read_csv('./input/data-product-review-v2.csv', sep='\t', encoding="UTF-16")

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,id,name,review_count,review_average,scrap_count,view_count,review_user_id,review_status,...,review_star_durability,review_star_design,review_star_cost,review_star_delivery,review_star_avg,product_info_id,production_info_name,production_info_brand_name,production_info_explain,production_info_is_purchased
0,0,0.0,267218,[쿠폰할인] DK053 3인용 풀커버 그레이 발수 패브릭 소파 (스툴 기본포함),1930,4.64,30840,446469,1986643.0,확인완료,...,2.0,2.0,1.0,1.0,1.5,267218.0,[쿠폰할인] DK053 3인용 풀커버 그레이 발수 패브릭 소파 (스툴 기본포함),듀커소파,사이즈 선택: 208cm / 색상 선택: 라이트 그레이,True
1,1,1.0,267218,[쿠폰할인] DK053 3인용 풀커버 그레이 발수 패브릭 소파 (스툴 기본포함),1930,4.64,30840,446469,5804361.0,확인완료,...,5.0,5.0,5.0,5.0,5.0,267218.0,[쿠폰할인] DK053 3인용 풀커버 그레이 발수 패브릭 소파 (스툴 기본포함),듀커소파,사이즈 선택: 208cm / 색상 선택: 베이지,True
2,2,2.0,267218,[쿠폰할인] DK053 3인용 풀커버 그레이 발수 패브릭 소파 (스툴 기본포함),1930,4.64,30840,446469,4976972.0,확인완료,...,5.0,5.0,5.0,5.0,5.0,267218.0,[쿠폰할인] DK053 3인용 풀커버 그레이 발수 패브릭 소파 (스툴 기본포함),듀커소파,사이즈 선택: DK053 (208cm) / 색상 선택: 베이지,True
3,3,3.0,267218,[쿠폰할인] DK053 3인용 풀커버 그레이 발수 패브릭 소파 (스툴 기본포함),1930,4.64,30840,446469,2692433.0,확인완료,...,0.0,0.0,0.0,0.0,5.0,267218.0,[쿠폰할인] DK053 3인용 풀커버 그레이 발수 패브릭 소파 (스툴 기본포함),듀커소파,DK052(DK053) 3인용 풀커버 그레이 패브릭 소파 /발수 천 쇼파,False
4,4,4.0,267218,[쿠폰할인] DK053 3인용 풀커버 그레이 발수 패브릭 소파 (스툴 기본포함),1930,4.64,30840,446469,2692433.0,확인완료,...,0.0,0.0,0.0,0.0,5.0,267218.0,[쿠폰할인] DK053 3인용 풀커버 그레이 발수 패브릭 소파 (스툴 기본포함),듀커소파,DK052(DK053) 3인용 풀커버 그레이 패브릭 소파 /발수 천 쇼파,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98479,38581,,388715,순수원목 A사이드테이블 3colors,28574,4.53,20610,162635,6122042.0,확인완료,...,5.0,5.0,5.0,5.0,5.0,388715.0,순수원목 A사이드테이블 3colors,먼데이하우스,상품명: A사이드테이블 / 색상: 우드,True
98480,38582,,388715,순수원목 A사이드테이블 3colors,28574,4.53,20610,162635,6201606.0,확인완료,...,5.0,5.0,5.0,5.0,5.0,388715.0,순수원목 A사이드테이블 3colors,먼데이하우스,상품명: A사이드테이블 / 색상: 우드,True
98481,38583,,388715,순수원목 A사이드테이블 3colors,28574,4.53,20610,162635,6723568.0,확인완료,...,2.0,2.0,3.0,5.0,3.0,388715.0,순수원목 A사이드테이블 3colors,먼데이하우스,상품명: A사이드테이블 / 색상: 화이트,True
98482,38584,,388715,순수원목 A사이드테이블 3colors,28574,4.53,20610,162635,5816478.0,확인완료,...,1.0,1.0,1.0,1.0,1.0,388715.0,순수원목 A사이드테이블 3colors,먼데이하우스,상품명: A사이드테이블 / 색상: 우드,True


In [44]:
print(df.iloc[[0]])

   Unnamed: 0     id residence area region expertise color_list style_list  \
0           0  40578       아파트  30평    경기도     홈스타일링        NaN        NaN   

  constructions                              purchased_product_ids  \
0           NaN  [10865, 36591, 36592, 81695, 154085, 163351, 1...   

        family_list  like_count  reply_count  scrap_count  view_count  \
0  ['부모님과 함께 사는 집']          14            2          184        6311   

   share_count  
0            0  


In [45]:
purchased_product_ids_per_row =  df['purchased_product_ids'].values
purchased_product_ids_per_row

array(['[10865, 36591, 36592, 81695, 154085, 163351, 170233, 196822, 200864, 202802, 225435, 245841, 245842, 264792, 325601, 337047, 395253]',
       '[61601, 68072, 97064, 108193, 153647, 194353, 206237, 244003, 250348, 259166, 289680, 324445, 363925, 408563, 414334, 435480, 477917, 490328, 492346, 563743, 609395, 609408, 609417, 609420, 609433, 609438, 609443, 609445, 609449, 609451]',
       '[57577, 69119, 71718, 144799, 209658, 215597, 215770, 230216, 237059, 274302, 310706, 336837, 354736, 388715, 418107, 442226, 537368, 569826]',
       ..., '[1, 4, 5, 6, 472, 484, 488, 494, 81927]',
       '[8, 9, 10, 15, 16, 17, 18, 19, 20, 22, 49, 100, 371, 372, 373, 374, 435, 436, 11721, 13119, 15597, 16270, 30551, 33080, 33086, 46889, 387106]',
       '[55, 1490, 2254, 2255, 2256, 2257, 2258, 2259, 2261, 2262, 2263]'],
      dtype=object)

In [48]:
product_review_df = pd.read_csv('./input/data-product-review-v2.csv', sep='\t', encoding="UTF-16")
product_review_df.head(2)

Unnamed: 0.2,Unnamed: 0,Unnamed: 0.1,id,name,review_count,review_average,scrap_count,view_count,review_user_id,review_status,...,review_star_durability,review_star_design,review_star_cost,review_star_delivery,review_star_avg,product_info_id,production_info_name,production_info_brand_name,production_info_explain,production_info_is_purchased
0,0,0.0,267218,[쿠폰할인] DK053 3인용 풀커버 그레이 발수 패브릭 소파 (스툴 기본포함),1930,4.64,30840,446469,1986643.0,확인완료,...,2.0,2.0,1.0,1.0,1.5,267218.0,[쿠폰할인] DK053 3인용 풀커버 그레이 발수 패브릭 소파 (스툴 기본포함),듀커소파,사이즈 선택: 208cm / 색상 선택: 라이트 그레이,True
1,1,1.0,267218,[쿠폰할인] DK053 3인용 풀커버 그레이 발수 패브릭 소파 (스툴 기본포함),1930,4.64,30840,446469,5804361.0,확인완료,...,5.0,5.0,5.0,5.0,5.0,267218.0,[쿠폰할인] DK053 3인용 풀커버 그레이 발수 패브릭 소파 (스툴 기본포함),듀커소파,사이즈 선택: 208cm / 색상 선택: 베이지,True
