### 2. 오설록 : Scrapy

- spider.py 파일에서 아래의 절차로 상품데이터를 수집하세요.
    - 오설록 사이트의 전체상품 페이지에서 각 페이지 URL 크롤링
        - 오설록 전체 상품 페이지 URL : https://www.osulloc.com/kr/ko/shop/item/list?category=teashop
    - 각 상품 페이지 크롤링
    - 상품의 상세 페이지 크롤링
    
- 수집 데이터
    - 상품명, 상품설명, 가격정보, 별점, 이미지링크, 상품링크
    - title, desc, price, star, img, link
    
- 수집된 데이터를 파이프라인을 이용하여 mongodb에 저장하세요.

In [2]:
import scrapy
import requests
from scrapy.http import TextResponse
from fake_useragent import UserAgent

In [1]:
# 프로젝트 생성

In [1]:
!rm -rf osulloc/

In [2]:
!scrapy startproject osulloc

New Scrapy project 'osulloc', using template directory 'c:\users\samsung\anaconda\lib\site-packages\scrapy\templates\project', created in:
    C:\Users\samsung\Desktop\fastcampus\programming\programming_1\crawling exam\osulloc

You can start your first spider with:
    cd osulloc
    scrapy genspider example example.com


#### 1. 아이템 모델 코드 추가

In [4]:
%%writefile osulloc/osulloc/items.py


import scrapy


class OsullocItem(scrapy.Item):
    title = scrapy.Field()
    desc = scrapy.Field()
    price = scrapy.Field()
    star = scrapy.Field()
    img = scrapy.Field()
    link = scrapy.Field()

Overwriting osulloc/osulloc/items.py


In [None]:
# xpath 확인

In [16]:
req = requests.get("https://www.osulloc.com/kr/ko/shop/item/list?category=teashop")
response = TextResponse(req.url, body=req.text, encoding="utf-8")
response

<200 https://www.osulloc.com/kr/ko/shop/item/list?category=teashop>

In [17]:
#링크
links = response.xpath('//*[@id="contents"]/div[2]/div/div[2]/div[3]/div[1]/div/div/a/@href').extract()
links = list(map(response.urljoin, links))
links[:3]


['https://www.osulloc.com/kr/ko/shop/item/teashop/1461',
 'https://www.osulloc.com/kr/ko/shop/item/teashop/16945',
 'https://www.osulloc.com/kr/ko/shop/item/teashop/16943']

In [18]:
url = links[0]
headers = { "User-Agent": UserAgent().chrome }
req = requests.get(url, headers=headers)
response = TextResponse(req.url, body=req.text, encoding="utf-8") 
response

<200 https://www.osulloc.com/kr/ko/shop/item/teashop/1461>

In [27]:
link = response.xpath('//*[@id="pagination"]/a[1]')
link

[]

In [26]:
title = response.xpath('//*[@id="contents"]/div[1]/div[1]/div[2]/p[1]/text()')[0].extract()
desc = response.xpath('//*[@id="onePointArea"]/text()')[0].extract()
price = response.xpath('//*[@id="contents"]/div[1]/div[1]/div[2]/div[2]/div[2]/p/strong/text()')[0].extract()
star = response.xpath('//*[@id="contents"]/div[1]/div[2]/div/div/span/text()')[0].extract()
img = response.xpath('//*[@id="contents"]/div[1]/div[1]/div[1]/div/div[1]/img/@src').extract()

title, desc, price, star, img

('러블리티박스',
 '즐겁고 행복한 티타임을 선사하는 달콤하고 향긋한 오설록만의 특별한 블렌디드 티 선물세트 입니다.',
 '20,000',
 '4.6',
 ['/upload/kr/ko/adminImage/PM/FK/20180510132548266RY.png?quality=80'])

#### 2. 스파이더 코드 추가

In [None]:
https://www.osulloc.com/kr/ko/shop/item/list?category=teashop
https://www.osulloc.com/kr/ko/shop/item/list?category=bakery
    

In [69]:
!pip install scrapy-fake-useragent

Collecting scrapy-fake-useragent
  Downloading https://files.pythonhosted.org/packages/cc/8d/faa730b8d1cb5114cb8d314b078167694d17c3d394992490551c2308928d/scrapy_fake_useragent-1.2.0-py2.py3-none-any.whl
Installing collected packages: scrapy-fake-useragent
Successfully installed scrapy-fake-useragent-1.2.0


In [91]:
%%writefile osulloc/osulloc/spiders/spider.py

import scrapy
from osulloc.items import OsullocItem

class OsullocSpider(scrapy.Spider):
    name = "Osulloc"
    custom_settings = {
        "DOWNLOADER_MIDDLEWARES": {
            "scrapy.downloadmiddlewares.useragent.UserAgentMiddleware": None,
            "scrapy_fake_useragent.middleware.RandomUserAgentMiddleware": 400,
        }
    }
    
    def __init__(self, category="", **kwargs):
        self.start_urls = ["https://www.osulloc.com/kr/ko/shop/item/list?category={}".format(category)]
        super().__init__(**kwargs)
        
    def start_requests(self):
        urls = ["https://www.osulloc.com/kr/ko/shop/item/list?pack=tealeaf&sort=review&category=teashop"]
        for url in urls:
            yield scrapy.Request(url, callback=self.parse)
            
    def parse(self, response):
        links = response.xpath('//*[@id="contents"]/div[2]/div/div[2]/div[3]/div[1]/div/div/a/@href').extract()
        links = list(map(lambda data:response.urljoin(data), links))
        for link in links:
            yield scrapy.Request(link, callback=self.parse_content)
            
    def parse_content(self, response):
        item = OsullocItem()
        item["title"] = response.xpath('//*[@id="contents"]/div[1]/div[1]/div[2]/p[1]/text()')[0].extract()
        item["desc"] = response.xpath('//*[@id="onePointArea"]/text()')[0].extract()
        item["price"] = response.xpath('//*[@id="contents"]/div[1]/div[1]/div[2]/div[2]/div[2]/p/strong/text()')[0].extract()
        item["star"] = response.xpath('//*[@id="contents"]/div[1]/div[2]/div/div/span/text()')[0].extract()
        item["img"] = response.xpath('//*[@id="contents"]/div[1]/div[1]/div[1]/div/div[1]/img/@src').extract()
        item["link"] = response.url
        yield item

Overwriting osulloc/osulloc/spiders/spider.py


In [71]:
# 5. 스크래피 실행시 아규먼트 설정

In [72]:
# teashop, bakery, themashop

In [92]:
%%writefile run.sh
cd osulloc
scrapy crawl Osulloc -o osulloc1.csv -a category=teashop

Overwriting run.sh


In [66]:
# 실행권한 부여
!chmod +x run.sh

In [74]:
! ./run.sh

In [93]:
df = pd.read_csv("osulloc/osulloc1.csv")
df.tail(2)

Unnamed: 0,desc,img,link,price,star,title
22,제주 차밭의 어린 찻잎으로 만든 작설차\r\n,/upload/kr/ko/adminImage/DA/ZO/201909192046228...,https://www.osulloc.com/kr/ko/shop/item/teasho...,40000,4.8,세작 80g(잎차)
23,상쾌하고 맑은 기운을 전하는 건강차\r\n,/upload/kr/ko/adminImage/FU/NE/201910142044377...,https://www.osulloc.com/kr/ko/shop/item/teasho...,35000,4.8,도라지차 80g(잎차)


#### 3. 데이터 베이스 코드 추가

In [81]:
import pymongo

In [82]:
client = pymongo.MongoClient('mongodb://54.180.179.245:27017/')
client

MongoClient(host=['54.180.179.245:27017'], document_class=dict, tz_aware=False, connect=True)

In [94]:
db = client.osulloc
collection = db.teashop
collection

Collection(Database(MongoClient(host=['54.180.179.245:27017'], document_class=dict, tz_aware=False, connect=True), 'osulloc'), 'teashop')

In [95]:
data = {"title":"teashop"}
collection.insert(data)

ObjectId('5f0ea7207effc4ff9392dd0a')

In [96]:
%%writefile osulloc/osulloc/mongodb.py

import pymongo

client = pymongo.MongoClient('mongodb://54.180.179.245:27017/')
db = client.osulloc
collection = db.teashop

Overwriting osulloc/osulloc/mongodb.py


#### 4. 파이프라인 코드 추가

In [107]:
%%writefile osulloc/osulloc/pipelines.py

from .mongodb import collection

class OsullocPipeline(object):
    
from .mongodb import collection

class OsullocPipeline(object):

    def process_item(self, item, spider):

        # mongodb에 저장
        columns = ["title", "desc", "price", "star", "img", "link"]
        data = {column: item[column] for column in columns}
        collection.insert(data)

        return item

Overwriting osulloc/osulloc/pipelines.py


In [13]:
# 파이프 라인 설정

In [87]:
!echo "ITEM_PIPELINES = {" >> osulloc/osulloc/settings.py

In [100]:
!echo "   'osulloc.pipelines.OsullocPipeline': 300," >> osulloc/osulloc/settings.py

In [101]:
!echo "}" >> osulloc/osulloc/settings.py

In [102]:
!tail -n 3 osulloc/osulloc/settings.py

"}" 
"   'osulloc.pipelines.OsullocPipeline': 300," 
"}" 


In [18]:
# 코드의 실행

In [19]:
# teashop, bakery, themashop

In [20]:
%%writefile run.sh
cd osulloc
scrapy crawl Osulloc -o osulloc_teashop.csv

Overwriting run.sh


In [108]:
!rm osulloc/*.csv

In [22]:
!source run.sh

2020-03-16 01:41:40 [scrapy.utils.log] INFO: Scrapy 1.6.0 started (bot: osulloc)
2020-03-16 01:41:40 [scrapy.utils.log] INFO: Versions: lxml 4.4.1.0, libxml2 2.9.9, cssselect 1.0.3, parsel 1.5.1, w3lib 1.20.0, Twisted 19.2.1, Python 3.7.3 (default, Mar 27 2019, 16:54:48) - [Clang 4.0.1 (tags/RELEASE_401/final)], pyOpenSSL 19.1.0 (OpenSSL 1.1.1  11 Sep 2018), cryptography 2.8, Platform Darwin-19.3.0-x86_64-i386-64bit
2020-03-16 01:41:40 [scrapy.crawler] INFO: Overridden settings: {'BOT_NAME': 'osulloc', 'FEED_FORMAT': 'csv', 'FEED_URI': 'osulloc_teashop.csv', 'NEWSPIDER_MODULE': 'osulloc.spiders', 'ROBOTSTXT_OBEY': True, 'SPIDER_MODULES': ['osulloc.spiders']}
2020-03-16 01:41:40 [scrapy.extensions.telnet] INFO: Telnet Password: 0fa170f352e50849
2020-03-16 01:41:40 [scrapy.middleware] INFO: Enabled extensions:
['scrapy.extensions.corestats.CoreStats',
 'scrapy.extensions.telnet.TelnetConsole',
 'scrapy.extensions.memusage.MemoryUsage',
 'scrapy.extensions.feedexport.FeedExporter',
 'scra

2020-03-16 01:41:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/teashop/14123>
{'desc': '마스터의 블렌딩으로 탄생한 오설록만의 제주의 향기를 담은 대표 블렌디드 티 모음 세트 입니다.',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/RB/SP/20170418183503104KB.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/teashop/14123',
 'price': '50,000',
 'star': '4.7',
 'title': '마스터블렌드'}
2020-03-16 01:41:43 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/teashop/16344> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=teashop&p=0)
2020-03-16 01:41:43 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/teashop/4526> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=teashop&p=0)
2020-03-16 01:41:43 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/teashop/17003>
{'desc': '어린 찻잎을 정성껏 갈아 만든 깊은 풍미의 유기농 말차\r\n\r\n',
 'img': 'https://www.

2020-03-16 01:41:45 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/teashop/16183>
{'desc': '해당 제품은 리뉴얼 예정입니다. 새로운 모습으로 찾아뵙겠습니다.',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/IC/NJ/20190705173534077WG.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/teashop/16183',
 'price': '22,500',
 'star': '0.0',
 'title': '달빛보틀 + WATER+ 핑크사워 30입(분말)'}
2020-03-16 01:41:45 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/teashop/16185> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=teashop&p=5)
2020-03-16 01:41:45 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/teashop/16192>
{'desc': '깔끔하고 휴대성이 좋은 오설록 텀블러와 함께 제주 햇살 받고 자란 싱그러운 제주 영귤 아이스티를 시원하게 즐기세요.\r\n',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/SS/NW/20190705173735416TR.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/teashop/16192',
 'price': '20,000',


2020-03-16 01:41:46 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/teashop/16350>
{'desc': '기분좋은 상쾌함을 선사하는 허브차\r\n',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/HJ/UD/20190919203145281QH.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/teashop/16350',
 'price': '12,000',
 'star': '4.0',
 'title': '모로칸민트티 10입(피라미드)'}
2020-03-16 01:41:46 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/teashop/16145> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=teashop&p=5)
2020-03-16 01:41:46 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/teashop/16045> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=teashop&p=5)
2020-03-16 01:41:46 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/teashop/16784> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=teashop&p=5)
2020-03-16 01:

2020-03-16 01:41:48 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/teashop/16783>
{'desc': '어린 차싹의 형상을 그대로 간직한 오설록의 최고 명차입니다. 중요한 분께 가치있는 선물로 추천합니다.',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/ET/KA/20200224163154862PM.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/teashop/16783',
 'price': '170,000',
 'star': '4.9',
 'title': '일로향 60 g(잎차)'}
2020-03-16 01:41:48 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/teashop/16263>
{'desc': '[온라인몰 한정판매]중후하고 구수한 반발효차',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/AN/SQ/20190808150625126WD.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/teashop/16263',
 'price': '8,500',
 'star': '4.4',
 'title': '제주 한라발효차 20EA(티백)'}
2020-03-16 01:41:48 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/teashop/16704>
{'desc': '예로부터 환절기에 차로 만들어 마시곤 했던 도라지차를 피라미드 티백으로 간편하게, 합리적인 가격으로 즐

2020-03-16 01:41:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/teashop/16644>
{'desc': '한라산의 화산암석층이 키워낸 구수한 풍미의 반발효차',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/LU/OP/20191007151511229GH.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/teashop/16644',
 'price': '15,000',
 'star': '4.9',
 'title': '제주화산암차 30 g(잎차)'}
2020-03-16 01:41:49 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/teashop/14384> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=teashop&p=2)
2020-03-16 01:41:49 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/teashop/15244> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=teashop&p=2)
2020-03-16 01:41:49 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/teashop/16361>
{'desc': '조화로운 풍미와 오설록 역사를 간직한 녹차\r\n',
 'img': 'https://www.osulloc.com/upload/kr/ko/

2020-03-16 01:41:50 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/teashop/17065> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=teashop&p=2)
2020-03-16 01:41:50 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/teashop/17065>
{'desc': ' 달큰한 배향이 달빛처럼 은은하게 감도는 후발효차 \r\n',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/UH/LQ/20200103141020063JJ.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/teashop/17065',
 'price': '15,000',
 'star': '4.4',
 'title': '달빛걷기 35 g(잎차)'}
2020-03-16 01:41:50 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/teashop/16967> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=teashop&p=2)
2020-03-16 01:41:50 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/teashop/16765> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=teashop&p=2)
2020-03

2020-03-16 01:41:52 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/teashop/16713>
{'desc': '달콤 쌉싸름한 아포가토향이 은은하게 퍼지는 블렌디드티를 고급스러운 지함에 담았습니다. 소중한 사람에게 선물해보세요.',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/RE/IZ/20191017181033328JU.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/teashop/16713',
 'price': '15,000',
 'star': '4.6',
 'title': '바닐라에스프레소 10입(피라미드)'}
2020-03-16 01:41:52 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/teashop/5662>
{'desc': '제주 삼나무의 그윽한 풍미에 싱그러운 영귤의 상큼함이 매력적인 블렌디드 티입니다. 제주 차밭의 맛과 향을 가벼운 가격으로 '
         '즐겨보세요.',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/KQ/WI/20160829162627548VB.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/teashop/5662',
 'price': '8,500',
 'star': '4.5',
 'title': '제주삼다영귤티 20입(티백)'}
2020-03-16 01:41:52 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/teashop/618

2020-03-16 01:41:53 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/teashop/5664>
{'desc': '제주 동백꽃의 달큰한 향미가 아찔하고 짙은 정열적이고 매혹적인 블렌디드 티입니다.  제주 차밭의 맛과 향을 가벼운 가격으로 '
         '즐겨보세요.',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/GC/HY/20160829162553549AQ.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/teashop/5664',
 'price': '8,500',
 'star': '4.6',
 'title': '제주동백꽃티 20입(티백)'}
2020-03-16 01:41:53 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/teashop/17063>
{'desc': '제주 왕벚꽃향의 화사함에 달콤새콤한 과실향이 어우러진 블렌디드티\r\n',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/CS/TM/20200103140038011DF.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/teashop/17063',
 'price': '23,000',
 'star': '4.8',
 'title': '벚꽃향가득한올레 20입(피라미드)'}
2020-03-16 01:41:53 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/teashop/16969>
{'desc': '달큰한 배향이 달빛처

2020-03-16 01:41:55 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/teashop/16352>
{'desc': '제주의 따뜻한 햇살로 약발효한 부드러운 향미의 우롱차\r\n',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/QF/OV/20191104184148887FE.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/teashop/16352',
 'price': '15,000',
 'star': '5.0',
 'title': '청우롱 10입'}
2020-03-16 01:41:55 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/teashop/16124>
{'desc': '제주 햇살 받고 자란 싱그러운 제주 영귤 아이스티입니다. 찬물에도 잘 우러나와 간편하게 즐기실 수 있습니다.',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/SR/HE/20190528165408391LI.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/teashop/16124',
 'price': '24,000',
 'star': '4.5',
 'title': '영귤섬 (20입)*2ea'}
2020-03-16 01:41:55 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/teashop/15228> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category

2020-03-16 01:41:56 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/teashop/15543>
{'desc': '해당 제품은 단종되었습니다. 더 좋은 제품으로 찾아뵙겠습니다.',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/YB/DV/20180911120416298AY.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/teashop/15543',
 'price': '40,000',
 'star': '4.9',
 'title': '세작아트콜라보 80 g (잎차)'}
2020-03-16 01:41:57 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/teashop/16709> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=teashop&p=1)
2020-03-16 01:41:57 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/teashop/14164> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=teashop&p=4)
2020-03-16 01:41:57 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/teashop/16643> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=teashop&p=0)
2

2020-03-16 01:41:58 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
{'downloader/request_bytes': 58909,
 'downloader/request_count': 151,
 'downloader/request_method_count/GET': 151,
 'downloader/response_bytes': 8513037,
 'downloader/response_count': 151,
 'downloader/response_status_count/200': 151,
 'finish_reason': 'finished',
 'finish_time': datetime.datetime(2020, 3, 15, 16, 41, 58, 180174),
 'item_scraped_count': 144,
 'log_count/DEBUG': 297,
 'log_count/INFO': 10,
 'memusage/max': 59346944,
 'memusage/startup': 59342848,
 'request_depth_max': 1,
 'response_received_count': 151,
 'robotstxt/request_count': 1,
 'robotstxt/response_count': 1,
 'robotstxt/response_status_count/200': 1,
 'scheduler/dequeued': 150,
 'scheduler/dequeued/memory': 150,
 'scheduler/enqueued': 150,
 'scheduler/enqueued/memory': 150,
 'start_time': datetime.datetime(2020, 3, 15, 16, 41, 40, 966755)}
2020-03-16 01:41:58 [scrapy.core.engine] INFO: Spider closed (finished)
2020-03-16 01:41:59 [scrapy.uti

2020-03-16 01:42:02 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/bakery/16103>
{'desc': '정성스레 겹겹이 쌓은 크레이프 사이에 진한 녹차와 마스카포네 치즈를 넣어 달콤하고 깊은 풍미의 크레이프 케이크로 소중한 '
         '사람과 달콤한 시간을 보내보세요.',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/YY/EO/20200122161702161VW.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/bakery/16103',
 'price': '42,500',
 'star': '4.4',
 'title': '제주 녹차 크레이프 케이크'}
2020-03-16 01:42:02 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/bakery/16363>
{'desc': '진한 녹차 크림의 풍미를 느낄 수 있는 프리미엄 웨하스\r\n',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/QT/IG/20190919205017968RP.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/bakery/16363',
 'price': '5,000',
 'star': '4.7',
 'title': '그린티웨하스 100g'}
2020-03-16 01:42:03 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/bakery/6402> (referer: https://www.osull

2020-03-16 01:42:07 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/themashop/17407> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=themashop&p=3)
2020-03-16 01:42:08 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/themashop/15903> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=themashop&p=3)
2020-03-16 01:42:08 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/themashop/13703> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=themashop&p=3)
2020-03-16 01:42:08 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/themashop/17404> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=themashop&p=3)
2020-03-16 01:42:08 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/themashop/16164>
{'desc': '차의 시원함을 느끼며 깔끔하고 휴대성이 좋은 오설록 텀블러\r\n',
 'img': 'https:

2020-03-16 01:42:09 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/teashop/1461> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=themashop&p=0)
2020-03-16 01:42:09 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/themashop/14563> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=themashop&p=2)
2020-03-16 01:42:09 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/teashop/1461>
{'desc': '즐겁고 행복한 티타임을 선사하는 달콤하고 향긋한 오설록만의 특별한 블렌디드 티 선물세트 입니다.',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/PM/FK/20180510132548266RY.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/teashop/1461',
 'price': '20,000',
 'star': '4.6',
 'title': '러블리티박스'}
2020-03-16 01:42:09 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/themashop/14563>
{'desc': '몸속부터 아름답게 이너뷰티의 시작은 오설록 티라이프로부터 입니다. 아름다워지고 건강해지는 생활습관을

2020-03-16 01:42:10 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/themashop/14770>
{'desc': '새로 리뉴얼된 패키지로 배송될 예정입니다. 상세페이지 이미지는 변경 작업 중이오니 양해 부탁드립니다.\r\n',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/NB/ZE/20200211204357439RJ.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/themashop/14770',
 'price': '23,000',
 'star': '4.8',
 'title': '오피스Tea-프리미엄(레드파파야)'}
2020-03-16 01:42:10 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/themashop/15763> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=themashop&p=1)
2020-03-16 01:42:10 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/themashop/15103>
{'desc': '녹차스프레드와 별도 포장이 필요 없는 고급 패키지로 구성된 쨈 2종 세트입니다.',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/WY/UK/20180319145258513JZ.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/themashop/15103',
 'price': '19,0

2020-03-16 01:42:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/themashop/14304>
{'desc': '어린 녹차로 만든 명차수와 고급 시트마스크가 피부에 보습감을 빈틈없이 채워 줍니다.',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/EJ/RS/20170717123419850QU.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/themashop/14304',
 'price': '12,000',
 'star': '4.8',
 'title': '그린티 시트 마스크'}
2020-03-16 01:42:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/themashop/15823>
{'desc': '제주만의 특별함을 담아내어 블렌딩한 프리미엄 티백 라인입니다. 그윽한 후발효차의 은은한 풍미를 다양하게 즐길 수 있는 '
         '세트입니다.',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/UL/IX/20181126162854315JE.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/themashop/15823',
 'price': '27,500',
 'star': '4.6',
 'title': '은은 박스'}
2020-03-16 01:42:12 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/themashop/13863>
{'desc': '해당 제품은 단종되었

2020-03-16 01:42:13 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/themashop/14774> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=themashop&p=2)
2020-03-16 01:42:13 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/themashop/16743> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=themashop&p=2)
2020-03-16 01:42:13 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/themashop/15143> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=themashop&p=2)
2020-03-16 01:42:13 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/themashop/14774>
{'desc': '새로 리뉴얼된 패키지로 배송될 예정입니다. 상세페이지 이미지는 변경 작업 중이오니 양해 부탁드립니다.',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/QI/HF/20200211204639828PZ.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/themashop/14774',
 'price': '23,000',
 'star': '

2020-03-16 01:42:15 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/teashop/15524> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=themashop&p=0)
2020-03-16 01:42:15 [scrapy.core.engine] DEBUG: Crawled (200) <GET https://www.osulloc.com/kr/ko/shop/item/teashop/2684> (referer: https://www.osulloc.com/kr/ko/shop/item/list?category=themashop&p=0)
2020-03-16 01:42:15 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/bakery/16363>
{'desc': '진한 녹차 크림의 풍미를 느낄 수 있는 프리미엄 웨하스\r\n',
 'img': 'https://www.osulloc.com/upload/kr/ko/adminImage/QT/IG/20190919205017968RP.png?quality=80',
 'link': 'https://www.osulloc.com/kr/ko/shop/item/bakery/16363',
 'price': '5,000',
 'star': '4.7',
 'title': '그린티웨하스 100g'}
2020-03-16 01:42:15 [scrapy.core.scraper] DEBUG: Scraped from <200 https://www.osulloc.com/kr/ko/shop/item/themashop/14765>
{'desc': 'office공간에서, 업무 중에 언제든 오설록 티하우스에 있는 것 처럼~ 차 한잔으로 업무로 지친 당신에게 여유를 '
      

2020-03-16 01:42:16 [scrapy.core.engine] INFO: Closing spider (finished)
2020-03-16 01:42:16 [scrapy.extensions.feedexport] INFO: Stored csv feed (83 items) in: osulloc_themashop.csv
2020-03-16 01:42:16 [scrapy.statscollectors] INFO: Dumping Scrapy stats:
{'downloader/request_bytes': 34748,
 'downloader/request_count': 89,
 'downloader/request_method_count/GET': 89,
 'downloader/response_bytes': 4872619,
 'downloader/response_count': 89,
 'downloader/response_status_count/200': 89,
 'finish_reason': 'finished',
 'finish_time': datetime.datetime(2020, 3, 15, 16, 42, 16, 540339),
 'item_scraped_count': 83,
 'log_count/DEBUG': 174,
 'log_count/INFO': 10,
 'memusage/max': 58982400,
 'memusage/startup': 58978304,
 'request_depth_max': 1,
 'response_received_count': 89,
 'robotstxt/request_count': 1,
 'robotstxt/response_count': 1,
 'robotstxt/response_status_count/200': 1,
 'scheduler/dequeued': 88,
 'scheduler/dequeued/memory': 88,
 'scheduler/enqueued': 88,
 'sched

In [23]:
# 크롤링 결과 확인

In [24]:
df = pd.read_csv("osulloc/osulloc_teashop.csv")

In [29]:
df.tail(2)

Unnamed: 0,title,desc,price,star,img,link
239,메모리인제주세트 20입,제주 꽃길에서 느꼈던 설렘의 향기와 달콤했던 사랑의 추억을 떠 올려줄 제주 꽃을 모...,38000,4.8,https://www.osulloc.com/upload/kr/ko/adminImag...,https://www.osulloc.com/kr/ko/shop/item/teasho...


In [30]:
# 데이터 베이스 저장 확인

In [104]:
import pymongo

client = pymongo.MongoClient('mongodb://54.180.179.245:27017/')
db = client.osulloc
collection = db.items

In [105]:
datas = collection.find({}, {"_id": False})

In [106]:
pd.DataFrame(datas).head(2)