## Import Libraries

In [1]:
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium import webdriver
from bs4 import BeautifulSoup as bs
import pyperclip

import pandas as pd
import time

### Clipboard copy

In [2]:
def clipboard_input(driver, user_input):
    # save existing clipboard data
    existing_clipboard = pyperclip.paste()

    # copy user_input to clipboard. then paste it
    pyperclip.copy(user_input)
    ActionChains(driver).key_down(Keys.CONTROL).send_keys('v').key_up(Keys.CONTROL).perform()

    # recover the original clipboard data
    pyperclip.copy(existing_clipboard)  
    time.sleep(1)

### Load user info

In [3]:
def load_user_info():
    f = open('user_info.txt', 'r')
    user_info = f.readlines()
    f.close()
    
    return user_info[0][:-1], user_info[1][:-1]

### Naver Login

In [4]:
def login_naver(driver, user_id, user_pw):
    # naver login page
    driver.get('https://nid.naver.com/nidlogin.login?svctype=262144&url=http://m.naver.com/aside/')

    # input id 
    driver.find_element_by_xpath('//*[@id="id"]').click()
    clipboard_input(driver, user_id)

    # input pw
    driver.find_element_by_xpath('//*[@id="pw"]').click()
    clipboard_input(driver, user_pw)

    # click login btn
    driver.find_element_by_xpath('//*[@id="frmNIDLogin"]/fieldset/input').click()
    time.sleep(1)

    # click 
    driver.find_element_by_xpath('//span[@class="btn_cancel"]').click()

### Enter Details to search in Joonggonara

In [5]:
def get_idx_of_selected_option(option_list, input_text):
    for idx, option in enumerate(option_list):
        if option.text == input_text:
            return idx
    return -1

In [6]:
def enter_details(driver, search_option, item, detail_option):    
    # show 50 items
    driver.find_element_by_xpath('//div[@id="listSizeSelectDiv"]').click()
    time.sleep(1)
    listSize_list = driver.find_elements_by_xpath('//div[@id="listSizeSelectDiv"]/ul/li')
    listSize_list[-1].click()
    time.sleep(1)
    
    # set search options
    driver.find_element_by_xpath('//div[@id="searchOptionSelectDiv"]').click()
    searchBoard_list = driver.find_elements_by_xpath('//div[@id="searchOptionSelectDiv"]/ul/li')
    searchBoard_list[get_idx_of_selected_option(searchBoard_list, search_option['board'])].click()
    time.sleep(1)
    
    driver.find_element_by_xpath('//div[@id="divSearchDateTop"]').click()
    searchDate_list = driver.find_elements_by_xpath('//div[@id="divSearchDateTop"]/ul/li')
    searchDate_list[get_idx_of_selected_option(searchDate_list, search_option['period'])].click()
    
    driver.find_element_by_xpath('//div[@id="divSearchMenuTop"]').click()
    searchMenu_list = driver.find_elements_by_xpath('//div[@id="divSearchMenuTop"]/ul/li')
    searchMenu_list[get_idx_of_selected_option(searchMenu_list, search_option['menu'])].click()
    
    driver.find_element_by_xpath('//div[@id="divSearchByTop"]').click()
    searchBy_list = driver.find_elements_by_xpath('//div[@id="divSearchByTop"]/ul/li')
    searchBy_list[get_idx_of_selected_option(searchBy_list, search_option['by'])].click()
    
    # enter item
    driver.find_element_by_xpath('//input[@placeholder="검색어를 입력해주세요"]').send_keys(item)
    
    # click detail search btn
    driver.find_element_by_xpath('//*[@id="detailSearchBtn"]').click()

    # enter detail options
    driver.find_element_by_xpath('//input[@placeholder="다음 단어 모두 포함"]').send_keys(detail_option['keywords_and'])
    driver.find_element_by_xpath('//input[@placeholder="다음 단어 제외"]').send_keys(detail_option['keywords_not'])
    driver.find_element_by_xpath('//input[@placeholder="다음 단어 중 1개 이상 포함"]').send_keys(detail_option['keywords_or'])
    driver.find_element_by_xpath('//input[@placeholder="다음 어절, 어구 정확히 일치"]').send_keys(detail_option['sentence'])
    
    
    # search
    driver.find_element_by_xpath('//form[@name="frmSearchTop"]/div[@class="input_search_area"]/button[@class="btn-search-green"]').click()
    
    time.sleep(1)

### Get Product Info

In [7]:
def get_post_info(driver, href):
    driver.get(href)
    time.sleep(1)
    driver.switch_to.frame('cafe_main')
    soup = bs(driver.page_source, 'html.parser')

    # post_title
    title = soup.select('div.tit-box span.b')[0].get_text()
    
    # posted_cost
    try:
        cost = soup.select('span.cost')[0].get_text()
    except:
        cost = 0

    # merge contents to single text
    content_tags = soup.select('#tbody')[0].select('p')
    content = ' '.join([ tags.get_text() for tags in content_tags ])
    
    time.sleep(1)

    return {'cost':cost, 'title' : title, 'content' : content}

#### Run ChromeDriver

In [8]:
driver = webdriver.Chrome()
driver.implicitly_wait(2)

#### Login Naver

In [9]:
user_id, user_pw = load_user_info()
login_naver(driver, user_id, user_pw)

#### Search items from Joonggonara and get boards

In [10]:
# log onto Joonggonara and enter details
driver.get('https://cafe.naver.com/joonggonara?iframe_url=/ArticleSearchList.nhn%3Fsearch.clubid=10050146%26search.searchBy=0')
time.sleep(1)
driver.switch_to.frame(driver.find_element_by_name("cafe_main"))

search_option={
    'board' : "게시글 전체",
    'period' : "전체기간",
    'menu' : "주변기기/악세사리",
    'by' : "제목만"
}

item = "애플워치"

detail_option = {
    'keywords_and' : "미개봉 5",
    'keywords_not' : "40 40mm 40MM", 
    'keywords_or' : "스페이스그레이 스그", 
    'sentence' : ""
}
# 'keywords_not' : "LTE 중고폰 삽니다 사요 구매 구함 구해요 구합니다"
num_of_items=500

enter_details(driver, search_option, item, detail_option)

In [11]:
board_navigator = driver.find_element_by_xpath('//div[@class="prev-next"]')
board_list = board_navigator.find_elements_by_tag_name('a')
board_href = board_list[0].get_attribute('href')[:-1]
board_idx = 0

#### Get Posts Links

In [12]:
post_key_list=[]
author_list=set([])
while True:
    # change board
    board_idx+=1
    driver.get(board_href+str(board_idx))
    time.sleep(1)
    driver.switch_to.frame(driver.find_element_by_name("cafe_main"))
    
    # get posts in current board
    posts = driver.find_elements_by_css_selector('div.article-board > table > tbody > tr')
    num_of_new_posts = len(posts)
    
    for post in posts:
        
        # get valid posts
        try:
            author = post.find_element_by_class_name('td_name').text.strip()
            href = post.find_element_by_class_name('article').get_attribute('href')
        except:
            num_of_new_posts-=1
            continue
            
        # filter duplicated postings by author
        if author in author_list:
            num_of_new_posts-=1
            continue
        post_key_list.append({"author": author, "href":href})
        author_list.add(author)
        
    # check
    print("read {} pages {} posts. and {} new posts".format(board_idx, len(post_key_list), num_of_new_posts))
    
    # terminate condition
    if num_of_new_posts == 0 or len(post_key_list)>num_of_items:
        break

read 1 pages 30 posts. and 30 new posts
read 2 pages 30 posts. and 0 new posts


#### Get Price Info

In [13]:
price_info=[]
for post_key in post_key_list:
    try:
        post_info = get_post_info(driver, post_key["href"])
        print(post_info['cost'], post_key["author"], post_info['title'])
        price_info.append(post_info)
    except:
        print("cannot attach post")

price_info_pd=pd.DataFrame(price_info)
price_info_pd.to_csv("output/20200313_"+item+"_price_info.csv")

510,000원 바다와 달 미개봉 새 제품 애플워치 5 스페이스 그레이 알루미늄 케이스 블랙스포츠 밴드 44MM GPS
10,000원 지져스아미 애플워치3 나이키 플러스 GPS, 42mm 스페이스그레이 미개봉 새제품 / 애플워치5 나이키 플러스 GPS, 44mm 
520,000원 쩡 (미개봉) 애플워치 5 44mm 스페이스그레이 52만원에 팔아요!  
530,000원 산다라방방 애플워치5 GPS 44mm 스페이스그레이 미개봉 팝니다.
480,000원 TeslaX 애플워치5 44mm 스그, 실버 GPS 미개봉 - 48만
560,000원 귀족주의 [미개봉 북미판 애플워치5 44mm] 스페이스 그레이 심전도 활성화 가능
570,000원 네이버맞나요 애플워치5 40m gps+lte 셀룰러 스페이스그레이 미개봉 
600,000원 딸귀 애플워치5 미개봉 셀룰러 40.44 스그 팝니다
500,000원 테이크로이스 애플워치5 44mm 스페이스그레이 알루미늄 미개봉 삽니다. (나이키에디션 가능)
520,000원 llnozz 미개봉 애플워치5 스페이스그레이44mm gps팝니다
520,000원 신쨩 [미개봉] 애플워치5 스페이스그레이 알루미늄 케이스/블랙스포츠밴드/44mm/애플코리아정품/팝니다.
500,000원 Penoid 애플워치5 알루미늄 스페이스그레이 44mm 미개봉 새상품 팝니다
500,000원 s10234 애플워치5 나이키 스페이스그레이 44mm gps (미개봉) 구합니다
490,000원 jps01 애플워치 5 [미개봉] 44mm 스페이스 그레이 블랙 스포츠 밴드
500,000원 iti128 애플워치5 미개봉 스페이스그레이 44mm GPS 500,000원 
500,000원 두봉이 애플워치5 44mm 스그 gps 미개봉 블랙스포츠밴드
480,000원 hs121148 (미개봉)애플워치5 스페이스그레이 44mm gps
490,000원 사랑서버 애플워치5 44mm 스페이스그레이 미개봉 제품49만원(대구) 삽니다
450,000원 Tknya 애플워치5 44mm 미개봉 스그 45만삽