# Lohbs 상품 정보 스크래핑

In [1]:
# import packages
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
import pandas as pd
import time

# 검색 대상
search_word = "립밤"

# today's date
today = time.strftime("%y-%m-%d", time.localtime())

In [2]:
# target url format
url_lohbs = "https://www.lohbs.co.kr/searchEngine/search"

In [3]:
# 셀레니움으로 해당 페이지 정보 가져오기
driver = webdriver.Chrome("../driver/chromedriver")
driver.get(f"{url_lohbs}?keyword={search_word}")

# 해당 키워드 검색시 전체 상품 리스트를 불러오기 위한 스크롤 횟수 반환
r = driver.page_source
soup = BeautifulSoup(r, 'html.parser')
prd_num = soup.select_one(".search-page-num").string[1:-2]
scroll_num = int(prd_num) / 12 + 2
scroll_num = int(scroll_num)

# 횟수만큼 스크롤 
for i in range(scroll_num):
    driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
    time.sleep(2)
    
# 스크롤 완료후 페이지 소스 읽기
r = driver.page_source

In [4]:
# 상품별 정보 추출 및 반환
def extract_prd(html):
    img_link = html.select_one("a .prd-image img[src]")["src"]
    brand = html.select_one(".prd-content .prd-name strong").string.split("]", 1)[0][1:]
    name = html.select_one(".prd-content .prd-name strong").string.split("]", 1)[1].strip()
    if html.select_one(".prd-info .price-group .discount em") is None:
        org_price = html.select_one(".prd-info .price-group .price strong").string 
    else:
        org_price = html.select_one(".prd-info .price-group .discount em").string
    cur_price = html.select_one(".prd-info .price-group .price strong").string
    date = today
    return {
        "img_link": img_link,
        "brand": brand,
        "name": name,
        "org_price": org_price,
        "cur_price": cur_price,
        "date": today
    }

In [5]:
# 첫번째 상품부터 마지막 상품까지 정보 스크래핑
def extract_lohbs_prds():
    soup = BeautifulSoup(r, "html.parser")
    prd_list = soup.select_one(".layout-category-list .lohbs-prd-list")    
    prd_infos = prd_list.select("li")
    prds = []
    for prd_info in prd_infos:
        prd = extract_prd(prd_info)
        prds.append(prd)
    return prds

In [6]:
# 스크랩핑 결과를 DataFrame으로 변환 및 컬럼 정렬
prd_df = pd.DataFrame(extract_lohbs_prds(), columns=["date", "brand", "name", "org_price", "cur_price", "img_link"])

# 결과 확인
prd_df

Unnamed: 0,date,brand,name,org_price,cur_price,img_link
0,19-12-08,히말라야,코코아버터 립밤 10g,3900,3900,https://www.lohbs.co.kr/lohbsUpload/productIma...
1,19-12-08,아벤느,(1+1 기획) 시칼파트 립밤 10ml,12600,12600,https://www.lohbs.co.kr/lohbsUpload/productIma...
2,19-12-08,멘소래담,메디케이티드 립밤 3.5g,2700,2700,https://www.lohbs.co.kr/lohbsUpload/productIma...
3,19-12-08,아토팜,키즈 컬러 립밤 3.3g 중 택1,10000,10000,https://www.lohbs.co.kr/lohbsUpload/productIma...
4,19-12-08,온더바디,리틀카카오 비타민E 립밤 4.5g,6400,6400,https://www.lohbs.co.kr/lohbsUpload/productIma...
5,19-12-08,눅스,레브드미엘 립밤 15g 기획,17000,15300,https://www.lohbs.co.kr/lohbsUpload/productIma...
6,19-12-08,허바신,우타카밀 립밤 4.8g,7900,7900,https://www.lohbs.co.kr/lohbsUpload/productIma...
7,19-12-08,온더바디,리틀어피치 키링 틴티드 립밤 6g,9900,9900,https://www.lohbs.co.kr/lohbsUpload/productIma...
8,19-12-08,비레디,비레디 웨이크업 생기 립밤,10000,10000,https://www.lohbs.co.kr/lohbsUpload/productIma...
9,19-12-08,온더바디,리틀무지 키링 틴티드 립밤 6g,9900,9900,https://www.lohbs.co.kr/lohbsUpload/productIma...
