# Crawling 크롤링 + 전처리

In [2]:
# 웹 사이트로부터 데이터를 얻어오는 클래스
import requests
import json

class Requester:
    def get_data(self, url, parameters = {}):
        if (url is None) or (url.strip() is ''):
            raise Exception('url should not be empty')
        
        response = requests.get(url.strip(), params = parameters)
        if response.status_code is not 200:
            print('[ERROER]Can not receive response')

        response_text = (response.text is None) and '{}' or response.text
        json_response = json.loads(response_text)
        return json_response

## '집들이 목록의 고유키'을 크롤링하는 클래스

In [3]:
# 홈페이지 주소: https://ohou.se/projects?writer=self
from time import sleep

class HouseWarmingCrawler:
    def __init__(self):
        self.house_warming_item_ids = list()
        self.requester = Requester()
        self.default_page_size = 100 # 1페이지에 출력될 아이템 갯수
    
    def get_house_warming_data_by_page(self, page):
        parameters = {
             'v': str(2), # 고정 queryParam 값. v인걸 보아 API 버전 정보인듯?
             'writer': 'self', # 고정 queryParam 값. 뭔지 모르겠음.
             'per': str(self.default_page_size), # 고정 queryParam 값. 페이에 출력될 아이템 갯수
             'page': str(page)
        }
        url = 'https://ohou.se/projects.json'
        response = self.requester.get_data(url, parameters)
        return response
    
    def collect_house_warming_list(self, projects = []):
        for project in projects:
            self.house_warming_item_ids.append(project['id'])
        
    def collect(self):
        self.house_warming_item_ids = list()
        
        has_next_page = True
        current_page = 1
        while has_next_page is True:
            print('> Start to collect house warming list. current_page:', current_page)
            response = self.get_house_warming_data_by_page(current_page)
            if (response['projects'] is not None) and (len(response['projects']) > 0):
                self.collect_house_warming_list(response['projects'])
                print(response['next'])
                has_next_page = response['next']
                current_page = current_page + 1
                sleep(3)

In [4]:
house_warming_crawler = HouseWarmingCrawler()
house_warming_crawler.collect()

print('> Total main house warming item count:', len(house_warming_crawler.house_warming_item_ids))

> Start to collect house warming list. current_page: 1
True
> Start to collect house warming list. current_page: 2
True
> Start to collect house warming list. current_page: 3
True
> Start to collect house warming list. current_page: 4
True
> Start to collect house warming list. current_page: 5
True
> Start to collect house warming list. current_page: 6
True
> Start to collect house warming list. current_page: 7
True
> Start to collect house warming list. current_page: 8
True
> Start to collect house warming list. current_page: 9
True
> Start to collect house warming list. current_page: 10
True
> Start to collect house warming list. current_page: 11
True
> Start to collect house warming list. current_page: 12
True
> Start to collect house warming list. current_page: 13
True
> Start to collect house warming list. current_page: 14
True
> Start to collect house warming list. current_page: 15
True
> Start to collect house warming list. current_page: 16
True
> Start to collect house warming 

## '집들이 ' 항목의  '상품' 및 관련된 '리뷰텍스트 등'을 크롤링하는 클래스

In [5]:
from time import sleep

class HouseWarmingItemDetailCrawler:
    def __init__(self, item_ids = []):
        self.house_warming_details = list()
        self.item_ids = item_ids
        self.requester = Requester()
    
    def get_detail_data_by_item_id(self, item_id):
        parameters = {
             'v': str(4), # 고정 queryParam 값. v인걸 보아 API 버전 정보인듯?
        }
        url = 'https://ohou.se/projects/' + str(item_id) + '/detail.json'
        response = self.requester.get_data(url, parameters)
        return response
    
    def collect_house_warming_detail(self, project_detail = []):
        purchased_product_ids = [ product['id'] for product in project_detail['bucketplace_document_supplements']['products'] ]
        
        self.house_warming_details.append({
            'id': project_detail['id'],
            
            'residence': project_detail['residence'], # 주거 형태
            'area': project_detail['area'], # 평수
            'region': project_detail['region'],  # 주거지 지역
            'expertise': project_detail['expertise'], # 인테리어 포스트 카테고리
            
            'color_list': ','.join(project_detail['color_list']),
            'style_list': ','.join(project_detail['style_list']),
            'constructions': ','.join(project_detail['constructions']),
            
            'purchased_product_ids': purchased_product_ids, # 구매한 제품 아이디 목록
            
            'family_list': project_detail['family_list'], # 가족 구성원 정보
            'like_count': project_detail['like_count'], #  집 인테리어 포스트의 좋아요 갯수
            'reply_count': project_detail['reply_count'],  # 집 인테리어 포스트에 남겨진 답글 갯수
            'scrap_count': project_detail['scrap_count'],  # 집 인테리어 포스트를 스크랩한 총 갯수
            'view_count': project_detail['view_count'],  # 집 인테리어 포스트를 본 횟수
            'share_count': project_detail['share_count'] # 해당 집 인테리어 포스트를 공유한 횟수
        })
        
    def collect(self):
        self.house_warming_details = list()
        if (self.item_ids is None) or (len(self.item_ids) <= 0):
            return
        
        for item_id in self.item_ids:
            response = self.get_detail_data_by_item_id(item_id)
            print('Collect item detail item_id:', item_id)
            if (response['project'] is not None) and (len(response['project']) > 0):
                    self.collect_house_warming_detail(response['project'])
            print('...')

In [6]:
house_warming_item_detail_crawler = HouseWarmingItemDetailCrawler(item_ids = house_warming_crawler.house_warming_item_ids)
house_warming_item_detail_crawler.collect()

Collect item detail item_id: 40578
...
Collect item detail item_id: 39868
...
Collect item detail item_id: 39270
...
Collect item detail item_id: 39218
...
Collect item detail item_id: 38991
...
Collect item detail item_id: 40580
...
Collect item detail item_id: 40055
...
Collect item detail item_id: 39404
...
Collect item detail item_id: 39302
...
Collect item detail item_id: 38844
...
Collect item detail item_id: 40576
...
Collect item detail item_id: 39858
...
Collect item detail item_id: 39211
...
Collect item detail item_id: 38993
...
Collect item detail item_id: 38718
...
Collect item detail item_id: 40577
...
Collect item detail item_id: 39779
...
Collect item detail item_id: 39714
...
Collect item detail item_id: 39337
...
Collect item detail item_id: 38539
...
Collect item detail item_id: 40579
...
Collect item detail item_id: 39744
...
Collect item detail item_id: 39041
...
Collect item detail item_id: 40402
...
Collect item detail item_id: 39831
...
Collect item detail item_

Collect item detail item_id: 37305
...
Collect item detail item_id: 37848
...
Collect item detail item_id: 38116
...
Collect item detail item_id: 37379
...
Collect item detail item_id: 38152
...
Collect item detail item_id: 37670
...
Collect item detail item_id: 38191
...
Collect item detail item_id: 37483
...
Collect item detail item_id: 37194
...
Collect item detail item_id: 38205
...
Collect item detail item_id: 37693
...
Collect item detail item_id: 37680
...
Collect item detail item_id: 38188
...
Collect item detail item_id: 37724
...
Collect item detail item_id: 37536
...
Collect item detail item_id: 37522
...
Collect item detail item_id: 37598
...
Collect item detail item_id: 37469
...
Collect item detail item_id: 37399
...
Collect item detail item_id: 37679
...
Collect item detail item_id: 37576
...
Collect item detail item_id: 36456
...
Collect item detail item_id: 37676
...
Collect item detail item_id: 37467
...
Collect item detail item_id: 37924
...
Collect item detail item_

Collect item detail item_id: 35957
...
Collect item detail item_id: 35951
...
Collect item detail item_id: 35412
...
Collect item detail item_id: 36151
...
Collect item detail item_id: 36083
...
Collect item detail item_id: 36047
...
Collect item detail item_id: 35081
...
Collect item detail item_id: 35977
...
Collect item detail item_id: 35922
...
Collect item detail item_id: 34563
...
Collect item detail item_id: 33816
...
Collect item detail item_id: 36111
...
Collect item detail item_id: 35347
...
Collect item detail item_id: 34816
...
Collect item detail item_id: 32873
...
Collect item detail item_id: 35978
...
Collect item detail item_id: 35774
...
Collect item detail item_id: 34772
...
Collect item detail item_id: 33077
...
Collect item detail item_id: 34831
...
Collect item detail item_id: 34575
...
Collect item detail item_id: 33169
...
Collect item detail item_id: 30139
...
Collect item detail item_id: 35102
...
Collect item detail item_id: 34701
...
Collect item detail item_

Collect item detail item_id: 30787
...
Collect item detail item_id: 32248
...
Collect item detail item_id: 32206
...
Collect item detail item_id: 32194
...
Collect item detail item_id: 31766
...
Collect item detail item_id: 33106
...
Collect item detail item_id: 32510
...
Collect item detail item_id: 31934
...
Collect item detail item_id: 31135
...
Collect item detail item_id: 33243
...
Collect item detail item_id: 32646
...
Collect item detail item_id: 32035
...
Collect item detail item_id: 31301
...
Collect item detail item_id: 32667
...
Collect item detail item_id: 32731
...
Collect item detail item_id: 31749
...
Collect item detail item_id: 31507
...
Collect item detail item_id: 31468
...
Collect item detail item_id: 14153
...
Collect item detail item_id: 32068
...
Collect item detail item_id: 31649
...
Collect item detail item_id: 31566
...
Collect item detail item_id: 31173
...
Collect item detail item_id: 32113
...
Collect item detail item_id: 32026
...
Collect item detail item_

Collect item detail item_id: 29887
...
Collect item detail item_id: 29561
...
Collect item detail item_id: 28868
...
Collect item detail item_id: 28426
...
Collect item detail item_id: 29829
...
Collect item detail item_id: 29685
...
Collect item detail item_id: 29486
...
Collect item detail item_id: 28427
...
Collect item detail item_id: 28920
...
Collect item detail item_id: 29713
...
Collect item detail item_id: 29164
...
Collect item detail item_id: 27735
...
Collect item detail item_id: 29510
...
Collect item detail item_id: 27360
...
Collect item detail item_id: 29358
...
Collect item detail item_id: 27090
...
Collect item detail item_id: 28878
...
Collect item detail item_id: 27180
...
Collect item detail item_id: 29522
...
Collect item detail item_id: 28748
...
Collect item detail item_id: 27312
...
Collect item detail item_id: 15863
...
Collect item detail item_id: 29517
...
Collect item detail item_id: 28861
...
Collect item detail item_id: 27630
...
Collect item detail item_

Collect item detail item_id: 23781
...
Collect item detail item_id: 23690
...
Collect item detail item_id: 24384
...
Collect item detail item_id: 24064
...
Collect item detail item_id: 23796
...
Collect item detail item_id: 23665
...
Collect item detail item_id: 24695
...
Collect item detail item_id: 24223
...
Collect item detail item_id: 23688
...
Collect item detail item_id: 23728
...
Collect item detail item_id: 23380
...
Collect item detail item_id: 22297
...
Collect item detail item_id: 22334
...
Collect item detail item_id: 24258
...
Collect item detail item_id: 23802
...
Collect item detail item_id: 21951
...
Collect item detail item_id: 24180
...
Collect item detail item_id: 23783
...
Collect item detail item_id: 23739
...
Collect item detail item_id: 23478
...
Collect item detail item_id: 23782
...
Collect item detail item_id: 23599
...
Collect item detail item_id: 22274
...
Collect item detail item_id: 21977
...
Collect item detail item_id: 23775
...
Collect item detail item_

Collect item detail item_id: 20657
...
Collect item detail item_id: 19673
...
Collect item detail item_id: 19116
...
Collect item detail item_id: 20529
...
Collect item detail item_id: 20528
...
Collect item detail item_id: 19200
...
Collect item detail item_id: 19168
...
Collect item detail item_id: 19739
...
Collect item detail item_id: 20533
...
Collect item detail item_id: 18975
...
Collect item detail item_id: 18613
...
Collect item detail item_id: 20164
...
Collect item detail item_id: 19170
...
Collect item detail item_id: 20562
...
Collect item detail item_id: 20497
...
Collect item detail item_id: 19544
...
Collect item detail item_id: 19279
...
Collect item detail item_id: 19823
...
Collect item detail item_id: 20504
...
Collect item detail item_id: 20054
...
Collect item detail item_id: 19906
...
Collect item detail item_id: 20496
...
Collect item detail item_id: 20354
...
Collect item detail item_id: 19984
...
Collect item detail item_id: 17452
...
Collect item detail item_

Collect item detail item_id: 7660
...
Collect item detail item_id: 7490
...
Collect item detail item_id: 16332
...
Collect item detail item_id: 16307
...
Collect item detail item_id: 7968
...
Collect item detail item_id: 7874
...
Collect item detail item_id: 16558
...
Collect item detail item_id: 15986
...
Collect item detail item_id: 8917
...
Collect item detail item_id: 8770
...
Collect item detail item_id: 16109
...
Collect item detail item_id: 15757
...
Collect item detail item_id: 15985
...
Collect item detail item_id: 8508
...
Collect item detail item_id: 8267
...
Collect item detail item_id: 17125
...
Collect item detail item_id: 17054
...
Collect item detail item_id: 16482
...
Collect item detail item_id: 16276
...
Collect item detail item_id: 17097
...
Collect item detail item_id: 17055
...
Collect item detail item_id: 16237
...
Collect item detail item_id: 16236
...
Collect item detail item_id: 17007
...
Collect item detail item_id: 16902
...
Collect item detail item_id: 1237

Collect item detail item_id: 14866
...
Collect item detail item_id: 14772
...
Collect item detail item_id: 15212
...
Collect item detail item_id: 15178
...
Collect item detail item_id: 14600
...
Collect item detail item_id: 14598
...
Collect item detail item_id: 15208
...
Collect item detail item_id: 14985
...
Collect item detail item_id: 14782
...
Collect item detail item_id: 14764
...
Collect item detail item_id: 15225
...
Collect item detail item_id: 15175
...
Collect item detail item_id: 14887
...
Collect item detail item_id: 14669
...
Collect item detail item_id: 15206
...
Collect item detail item_id: 15167
...
Collect item detail item_id: 14884
...
Collect item detail item_id: 14325
...
Collect item detail item_id: 15134
...
Collect item detail item_id: 14942
...
Collect item detail item_id: 14417
...
Collect item detail item_id: 13857
...
Collect item detail item_id: 14830
...
Collect item detail item_id: 14584
...
Collect item detail item_id: 14722
...
Collect item detail item_

Collect item detail item_id: 12885
...
Collect item detail item_id: 12514
...
Collect item detail item_id: 12460
...
Collect item detail item_id: 12944
...
Collect item detail item_id: 11906
...
Collect item detail item_id: 8501
...
Collect item detail item_id: 8351
...
Collect item detail item_id: 12912
...
Collect item detail item_id: 12770
...
Collect item detail item_id: 8409
...
Collect item detail item_id: 8294
...
Collect item detail item_id: 12940
...
Collect item detail item_id: 12883
...
Collect item detail item_id: 12909
...
Collect item detail item_id: 8405
...
Collect item detail item_id: 13144
...
Collect item detail item_id: 12771
...
Collect item detail item_id: 12444
...
Collect item detail item_id: 13139
...
Collect item detail item_id: 12820
...
Collect item detail item_id: 12684
...
Collect item detail item_id: 8225
...
Collect item detail item_id: 8089
...
Collect item detail item_id: 13096
...
Collect item detail item_id: 12737
...
Collect item detail item_id: 125

Collect item detail item_id: 11024
...
Collect item detail item_id: 10954
...
Collect item detail item_id: 11241
...
Collect item detail item_id: 10504
...
Collect item detail item_id: 10230
...
Collect item detail item_id: 10727
...
Collect item detail item_id: 10612
...
Collect item detail item_id: 10314
...
Collect item detail item_id: 4579
...
Collect item detail item_id: 10767
...
Collect item detail item_id: 10722
...
Collect item detail item_id: 10718
...
Collect item detail item_id: 10119
...
Collect item detail item_id: 10657
...
Collect item detail item_id: 10457
...
Collect item detail item_id: 10641
...
Collect item detail item_id: 10571
...
Collect item detail item_id: 10181
...
Collect item detail item_id: 10051
...
Collect item detail item_id: 10383
...
Collect item detail item_id: 10652
...
Collect item detail item_id: 10291
...
Collect item detail item_id: 10227
...
Collect item detail item_id: 10415
...
Collect item detail item_id: 10724
...
Collect item detail item_i

Collect item detail item_id: 9189
...
Collect item detail item_id: 9503
...
Collect item detail item_id: 9362
...
Collect item detail item_id: 9017
...
Collect item detail item_id: 8879
...
Collect item detail item_id: 9314
...
Collect item detail item_id: 9260
...
Collect item detail item_id: 8979
...
Collect item detail item_id: 9312
...
Collect item detail item_id: 9417
...
Collect item detail item_id: 8738
...
Collect item detail item_id: 8606
...
Collect item detail item_id: 9349
...
Collect item detail item_id: 9353
...
Collect item detail item_id: 8987
...
Collect item detail item_id: 8511
...
Collect item detail item_id: 9357
...
Collect item detail item_id: 9239
...
Collect item detail item_id: 8925
...
Collect item detail item_id: 8841
...
Collect item detail item_id: 9332
...
Collect item detail item_id: 9272
...
Collect item detail item_id: 8874
...
Collect item detail item_id: 8751
...
Collect item detail item_id: 9331
...
Collect item detail item_id: 9304
...
Collect item

Collect item detail item_id: 8167
...
Collect item detail item_id: 8169
...
Collect item detail item_id: 8059
...
Collect item detail item_id: 7971
...
Collect item detail item_id: 8029
...
Collect item detail item_id: 8145
...
Collect item detail item_id: 8026
...
Collect item detail item_id: 8161
...
Collect item detail item_id: 8020
...
Collect item detail item_id: 8127
...
Collect item detail item_id: 7934
...
Collect item detail item_id: 7981
...
Collect item detail item_id: 8078
...
Collect item detail item_id: 8042
...
Collect item detail item_id: 8099
...
Collect item detail item_id: 8095
...
Collect item detail item_id: 8010
...
Collect item detail item_id: 8079
...
Collect item detail item_id: 8071
...
Collect item detail item_id: 7845
...
Collect item detail item_id: 7945
...
Collect item detail item_id: 7937
...
Collect item detail item_id: 7967
...
Collect item detail item_id: 7943
...
Collect item detail item_id: 4780
...
Collect item detail item_id: 8162
...
Collect item

Collect item detail item_id: 5370
...
Collect item detail item_id: 5274
...
Collect item detail item_id: 5270
...
Collect item detail item_id: 5238
...
Collect item detail item_id: 5116
...
Collect item detail item_id: 4940
...
Collect item detail item_id: 5070
...
Collect item detail item_id: 4875
...
Collect item detail item_id: 4986
...
Collect item detail item_id: 4850
...
Collect item detail item_id: 4985
...
Collect item detail item_id: 4845
...
Collect item detail item_id: 4696
...
Collect item detail item_id: 4563
...
Collect item detail item_id: 4840
...
Collect item detail item_id: 4787
...
Collect item detail item_id: 4839
...
Collect item detail item_id: 4786
...
Collect item detail item_id: 4785
...
Collect item detail item_id: 4775
...
Collect item detail item_id: 4774
...
Collect item detail item_id: 4768
...
Collect item detail item_id: 4728
...
Collect item detail item_id: 4706
...
Collect item detail item_id: 4705
...
Collect item detail item_id: 4724
...
Collect item

Collect item detail item_id: 3535
...
Collect item detail item_id: 3532
...
Collect item detail item_id: 3500
...
Collect item detail item_id: 3506
...
Collect item detail item_id: 3524
...
Collect item detail item_id: 3521
...
Collect item detail item_id: 3520
...
Collect item detail item_id: 3518
...
Collect item detail item_id: 3517
...
Collect item detail item_id: 3316
...
Collect item detail item_id: 3487
...
Collect item detail item_id: 3513
...
Collect item detail item_id: 3509
...
Collect item detail item_id: 2633
...
Collect item detail item_id: 3508
...
Collect item detail item_id: 3501
...
Collect item detail item_id: 3472
...
Collect item detail item_id: 3485
...
Collect item detail item_id: 3474
...
Collect item detail item_id: 3495
...
Collect item detail item_id: 3494
...
Collect item detail item_id: 3490
...
Collect item detail item_id: 3488
...
Collect item detail item_id: 3486
...
Collect item detail item_id: 3409
...
Collect item detail item_id: 3370
...
Collect item

Collect item detail item_id: 3067
...
Collect item detail item_id: 3086
...
Collect item detail item_id: 3085
...
Collect item detail item_id: 3084
...
Collect item detail item_id: 3082
...
Collect item detail item_id: 3081
...
Collect item detail item_id: 3080
...
Collect item detail item_id: 3055
...
Collect item detail item_id: 3027
...
Collect item detail item_id: 3079
...
Collect item detail item_id: 3078
...
Collect item detail item_id: 3077
...
Collect item detail item_id: 3075
...
Collect item detail item_id: 3074
...
Collect item detail item_id: 2984
...
Collect item detail item_id: 3069
...
Collect item detail item_id: 3066
...
Collect item detail item_id: 3064
...
Collect item detail item_id: 3063
...
Collect item detail item_id: 3062
...
Collect item detail item_id: 3060
...
Collect item detail item_id: 3053
...
Collect item detail item_id: 3052
...
Collect item detail item_id: 3059
...
Collect item detail item_id: 3058
...
Collect item detail item_id: 3051
...
Collect item

Collect item detail item_id: 2691
...
Collect item detail item_id: 2690
...
Collect item detail item_id: 2689
...
Collect item detail item_id: 2687
...
Collect item detail item_id: 2686
...
Collect item detail item_id: 2680
...
Collect item detail item_id: 2670
...
Collect item detail item_id: 2674
...
Collect item detail item_id: 2392
...
Collect item detail item_id: 2675
...
Collect item detail item_id: 2495
...
Collect item detail item_id: 2673
...
Collect item detail item_id: 2176
...
Collect item detail item_id: 1734
...
Collect item detail item_id: 2667
...
Collect item detail item_id: 2666
...
Collect item detail item_id: 2664
...
Collect item detail item_id: 2560
...
Collect item detail item_id: 2662
...
Collect item detail item_id: 2657
...
Collect item detail item_id: 2400
...
Collect item detail item_id: 2659
...
Collect item detail item_id: 2652
...
Collect item detail item_id: 2609
...
Collect item detail item_id: 2655
...
Collect item detail item_id: 2653
...
Collect item

Collect item detail item_id: 1960
...
Collect item detail item_id: 1955
...
Collect item detail item_id: 1957
...
Collect item detail item_id: 1954
...
Collect item detail item_id: 1946
...
Collect item detail item_id: 1926
...
Collect item detail item_id: 1922
...
Collect item detail item_id: 1924
...
Collect item detail item_id: 1916
...
Collect item detail item_id: 1911
...
Collect item detail item_id: 1910
...
Collect item detail item_id: 1903
...
Collect item detail item_id: 1895
...
Collect item detail item_id: 1892
...
Collect item detail item_id: 1889
...
Collect item detail item_id: 1880
...
Collect item detail item_id: 1877
...
Collect item detail item_id: 1875
...
Collect item detail item_id: 1868
...
Collect item detail item_id: 1863
...
Collect item detail item_id: 1861
...
Collect item detail item_id: 1860
...
Collect item detail item_id: 1858
...
Collect item detail item_id: 1853
...
Collect item detail item_id: 1849
...
Collect item detail item_id: 1851
...
Collect item

Collect item detail item_id: 1363
...
Collect item detail item_id: 1362
...
Collect item detail item_id: 1357
...
Collect item detail item_id: 1351
...
Collect item detail item_id: 1348
...
Collect item detail item_id: 1345
...
Collect item detail item_id: 1344
...
Collect item detail item_id: 1343
...
Collect item detail item_id: 1334
...
Collect item detail item_id: 1328
...
Collect item detail item_id: 1333
...
Collect item detail item_id: 1330
...
Collect item detail item_id: 1325
...
Collect item detail item_id: 1324
...
Collect item detail item_id: 1323
...
Collect item detail item_id: 1322
...
Collect item detail item_id: 1321
...
Collect item detail item_id: 1309
...
Collect item detail item_id: 1305
...
Collect item detail item_id: 1302
...
Collect item detail item_id: 1299
...
Collect item detail item_id: 1297
...
Collect item detail item_id: 1295
...
Collect item detail item_id: 1292
...
Collect item detail item_id: 1286
...
Collect item detail item_id: 1284
...
Collect item

Collect item detail item_id: 747
...
Collect item detail item_id: 744
...
Collect item detail item_id: 743
...
Collect item detail item_id: 741
...
Collect item detail item_id: 740
...
Collect item detail item_id: 738
...
Collect item detail item_id: 737
...
Collect item detail item_id: 736
...
Collect item detail item_id: 730
...
Collect item detail item_id: 727
...
Collect item detail item_id: 724
...
Collect item detail item_id: 723
...
Collect item detail item_id: 722
...
Collect item detail item_id: 719
...
Collect item detail item_id: 717
...
Collect item detail item_id: 716
...
Collect item detail item_id: 715
...
Collect item detail item_id: 713
...
Collect item detail item_id: 711
...
Collect item detail item_id: 709
...
Collect item detail item_id: 707
...
Collect item detail item_id: 706
...
Collect item detail item_id: 700
...
Collect item detail item_id: 695
...
Collect item detail item_id: 692
...
Collect item detail item_id: 688
...
Collect item detail item_id: 686
...
C

In [1]:
print('Total product count:', len(house_warming_item_detail_crawler.house_warming_details))
print(house_warming_item_detail_crawler.house_warming_details[0])

NameError: name 'house_warming_item_detail_crawler' is not defined

# `input/data-house-warming.csv` 생성

In [9]:
import os
import shutil

input_dir_path = os.path.join(os.getcwd(), 'input')
if os.path.exists(input_dir_path) is False:
    # 현재 위치에 input 폴더 생성
    os.mkdir(input_dir_path)

In [10]:
# input 폴더 만들고 data.csv 에 product_n_reviews 내용 저장하기
import pandas

df = pandas.DataFrame(house_warming_item_detail_crawler.house_warming_details)
df.head(3)

Unnamed: 0,id,residence,area,region,expertise,color_list,style_list,constructions,purchased_product_ids,family_list,like_count,reply_count,scrap_count,view_count,share_count
0,40578,아파트,30평,경기도,홈스타일링,,,,"[10865, 36591, 36592, 81695, 154085, 163351, 1...",[부모님과 함께 사는 집],14,2,184,6311,0
1,39868,아파트,33평,울산광역시,리모델링,"화이트,그레이",미니멀&심플,"주방리모델링,조명시공,발코니확장","[61601, 68072, 97064, 108193, 153647, 194353, ...",[신혼부부],450,42,902,40882,211
2,39270,원룸&오피스텔,8평,경기도,홈스타일링,,미니멀&심플,,"[57577, 69119, 71718, 144799, 209658, 215597, ...",[싱글라이프],82,23,293,8092,14


In [11]:
df.to_csv("./input/data-house-warming.csv", sep='\t', na_rep='', encoding="UTF-16")

In [12]:
# 잘 만들어졌는지 확인
pandas.read_csv('./input/data-house-warming.csv', sep='\t', encoding="UTF-16")

Unnamed: 0.1,Unnamed: 0,id,residence,area,region,expertise,color_list,style_list,constructions,purchased_product_ids,family_list,like_count,reply_count,scrap_count,view_count,share_count
0,0,40578,아파트,30평,경기도,홈스타일링,,,,"[10865, 36591, 36592, 81695, 154085, 163351, 1...",['부모님과 함께 사는 집'],14,2,184,6311,0
1,1,39868,아파트,33평,울산광역시,리모델링,"화이트,그레이",미니멀&심플,"주방리모델링,조명시공,발코니확장","[61601, 68072, 97064, 108193, 153647, 194353, ...",['신혼부부'],450,42,902,40882,211
2,2,39270,원룸&오피스텔,8평,경기도,홈스타일링,,미니멀&심플,,"[57577, 69119, 71718, 144799, 209658, 215597, ...",['싱글라이프'],82,23,293,8092,14
3,3,39218,단독주택,15평,서울특별시 종로구,홈스타일링,브라운,"미니멀&심플,내추럴",,"[611, 25202, 32236, 45201, 89696, 119201, 1657...",['신혼부부'],51,8,125,6124,12
4,4,38991,아파트,30평,부산광역시,홈스타일링,"그레이,베이지,브라운","모던,미니멀&심플,내추럴,북유럽","조명시공,중문","[6752, 34365, 38044, 40513, 42263, 71714, 8525...",['신혼부부'],149,16,586,13621,43
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4150,4150,442,아파트,20평대,,홈스타일링,브라운,내추럴,,"[68, 69, 70, 71, 72, 73, 74, 75, 416, 14749, 3...",['신혼부부'],65,10,244,31543,37
4151,4151,439,아파트,20평대,,리모델링,"화이트,그레이",내추럴,,"[56, 57, 58, 59, 61, 66, 313, 409, 410, 411, 4...",['신혼부부'],79,17,320,41496,50
4152,4152,434,아파트,24평,,홈스타일링,브라운,내추럴,,"[1, 4, 5, 6, 472, 484, 488, 494, 81927]",['아기가 있는 집'],26,6,65,14829,20
4153,4153,432,아파트,20평대,,홈스타일링,"라이트 브라운,블루",내추럴,"조명시공,중문,발코니확장","[8, 9, 10, 15, 16, 17, 18, 19, 20, 22, 49, 100...",['신혼부부'],41,8,60,15149,17
