In [190]:

from selenium import webdriver
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver import Keys, ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.service import Service
from bs4 import BeautifulSoup as bs
from selenium.webdriver.chrome.options import Options

from collections import deque
import re
import random
import time
import concurrent.futures
from datetime import datetime

ARCA_LIVE_LINK = "https://arca.live/b/hotdeal"
RULI_WEB_LINK = "https://bbs.ruliweb.com/market/board/1020?view=gallery"
PPOM_PPU_LINK = "https://www.ppomppu.co.kr/zboard/zboard.php?id=ppomppu"
QUASAR_ZONE_LINK = "https://quasarzone.com/bbs/qb_saleinfo"
FM_KOREA_LINK = "https://www.fmkorea.com/hotdeal"

class PAGES:
    def __init__(self):
        self.refresh_delay = 60 # sec
        self.item_link_queue = deque()
        self.previous_items_queue = deque()
        
    def set_drvier(self, site_name):
        chrome_options = Options()
        service = Service(executable_path=ChromeDriverManager().install())
        # chrome_options.add_argument("--headless")
        driver = webdriver.Chrome(options = chrome_options, service = service)
        driver.implicitly_wait(10)
        driver.get(site_name)

        return driver
    
class ARCA_LIVE(PAGES): # shopping_mall_link, shopping_mall, item_name, price, delivery, content, comment
    def __init__(self):
        super().__init__()
        self.hot_deal_page = ARCA_LIVE_LINK
    
    def get_item_links(self):
        driver = self.set_drvier(self.hot_deal_page)
        for i in range(4, 49):
            try:
                item = driver.find_element(By.CSS_SELECTOR, f"body > div.root-container > div.content-wrapper.clearfix > article > div > div.article-list > div.list-table.hybrid > div:nth-child({i}) > div > div > span.vcol.col-title > a")
                item_link = item.get_attribute("href")
            except Exception as e:
                print(e)
                
            if item_link not in self.previous_items_queue:
                self.item_link_queue.append((item_link, 0))
                self.previous_items_queue.appendleft(item_link)
                if len(self.previous_items_queue) > 100:
                    self.previous_items_queue.pop()
            else:
                pass
            
    def crawling(self):
        driver = self.set_drvier(self.hot_deal_page)
        while True:
            try:
                item_link, retry_attempt = self.item_link_queue.popleft()
                print(item_link, retry_attempt)
            except:
                print("Empty Queue")
                break
            driver.get(item_link)
            time.sleep(5)
            try: # 신고 처리, 보안 검사 등
                table = driver.find_element(By.TAG_NAME, "table")
                rows = table.find_elements(By.TAG_NAME, "tr")
                details = [row.text for row in rows]
                shopping_mall_link, shopping_mall, item_name, price, delivery = list(map(lambda x: "".join(x.split()[1:]), details))
                content = driver.find_element(By.CSS_SELECTOR, "body > div.root-container > div.content-wrapper.clearfix > article > div > div.article-wrapper > div.article-body > div.fr-view.article-content").text
                comment_box = driver.find_element(By.CSS_SELECTOR, "#comment > div.list-area")
                comment = list(map(lambda x: x.text, comment_box.find_elements(By.CLASS_NAME, "text")))
            except Exception as e:
                if retry_attempt >= 3:
                    print(e)
                    false.append((item_link, retry_attempt + 1))
                else:
                    self.item_link_queue.append((item_link, retry_attempt + 1))
                continue
            
            print(shopping_mall_link, shopping_mall, price, item_name, delivery, content, comment)

# shopping_mall_link가 누락된 채로 게시글이 올라옴
class RULI_WEB(PAGES): # shopping_mall_link, item_name, content, comment
    def __init__(self):
        super().__init__()
        self.hot_deal_page = RULI_WEB_LINK
    
    def get_item_links(self):
        driver = self.set_drvier(self.hot_deal_page)
        for i in range(1, 29):
            try:
                item = driver.find_element(By.CSS_SELECTOR, f"#board_list > div > div.board_main.theme_default.theme_white.theme_white.theme_gallery > table > tbody > tr:nth-child(7) > td > div > div:nth-child({i}) > div > div.article_info > div > div > a.deco")
                item_link = item.get_attribute("href")
            except Exception as e:
                print(e)
                
            if item_link not in self.previous_items_queue:
                self.item_link_queue.append((item_link, 0))
                self.previous_items_queue.appendleft(item_link)
                if len(self.previous_items_queue) > 100:
                    self.previous_items_queue.pop()
            else:
                pass
            
    def crawling(self):
        driver = self.set_drvier(self.hot_deal_page)
        while True:
            try:
                item_link, retry_attempt = self.item_link_queue.popleft()
                print(item_link, retry_attempt)
            except:
                print("Empty Queue")
                break
            driver.get(item_link)
            time.sleep(5)
            try: # 신고 처리, 보안 검사 등
                item_name = driver.find_element(By.CSS_SELECTOR, "#board_read > div > div.board_main > div.board_main_top > div.user_view > div:nth-child(1) > h4 > span > span.subject_inner_text").text
                shopping_mall_link = driver.find_element(By.CSS_SELECTOR, "#board_read > div > div.board_main > div.board_main_view > div.source_url > a").text
                content = driver.find_element(By.TAG_NAME, "article").text
                comment = list(map(lambda x: x.text, driver.find_elements(By.CLASS_NAME, "comment")))
            except Exception as e:
                if retry_attempt >= 3:
                    print(e)
                    false.append((item_link, retry_attempt + 1))
                else:
                    self.item_link_queue.append((item_link, retry_attempt + 1))
                continue
            
            print(shopping_mall_link, item_name, content, comment)
        
class FM_KOREA(PAGES): # shopping_mall_link, shopping_mall, item_name, price, delivery, content, comment
    def __init__(self):
        super().__init__()
        self.hot_deal_page = FM_KOREA_LINK
    
    def get_item_links(self):
        driver = self.set_drvier(self.hot_deal_page)
        for i in range(1, 21):
            try:
                item = driver.find_element(By.CSS_SELECTOR, f"#bd_1196365581_0 > div > div.fm_best_widget._bd_pc > ul > li:nth-child({i}) > div > h3 > a")
                item_link = item.get_attribute("href")
            except Exception as e:
                print(e)
                
            if item_link not in self.previous_items_queue:
                self.item_link_queue.append((item_link, 0))
                self.previous_items_queue.appendleft(item_link)
                if len(self.previous_items_queue) > 100:
                    self.previous_items_queue.pop()
            else:
                pass
            
    def crawling(self):
        driver = self.set_drvier(self.hot_deal_page)
        
        while True:
            try:
                item_link, retry_attempt = self.item_link_queue.popleft()
                print(item_link, retry_attempt)
            except:
                print("Empty Queue")
                break
            driver.get(item_link)
            time.sleep(5)
            try: # 신고 처리, 보안 검사 등
                details = driver.find_elements(By.CLASS_NAME, "xe_content")
                shopping_mall_link, shopping_mall, item_name, price, delivery, content, *comment = details
                comment = list(map(lambda x: x.text, comment))
            except Exception as e:
                if retry_attempt >= 3:
                    print(e)
                    false.append((item_link, retry_attempt + 1))
                else:
                    self.item_link_queue.append((item_link, retry_attempt + 1))
                continue
            
            print(shopping_mall_link, shopping_mall, item_name, price, delivery, content, comment)
            
            
            
class QUASAR_ZONE(PAGES): # shopping_mall_link, shopping_mall, item_name, price, delivery, content, comment
    def __init__(self):
        super().__init__()
        self.hot_deal_page = QUASAR_ZONE_LINK
        
        
    def get_item_links(self):
        driver = self.set_drvier(self.hot_deal_page)
        for i in range(1, 31):
            try:
                item = driver.find_element(By.CSS_SELECTOR, f"#frmSearch > div > div.list-board-wrap > div.market-type-list.market-info-type-list.relative > table > tbody > tr:nth-child({i}) > td:nth-child(2) > div > div.market-info-list-cont > p > a")
                item_link = item.get_attribute("href")
            except Exception as e:
                print(e)
            if item_link not in self.previous_items_queue:
                self.item_link_queue.append((item_link, 0))
                self.previous_items_queue.appendleft(item_link)
                if len(self.previous_items_queue) > 100:
                    self.previous_items_queue.pop()
            else:
                pass
        
    def crawling(self):
        driver = self.set_drvier(self.hot_deal_page)
        
        while True:
            try:
                item_link, retry_attempt = self.item_link_queue.popleft()
                print(item_link, retry_attempt)
            except:
                print("Empty Queue")
                break
            driver.get(item_link)
            time.sleep(5)
            try: # 신고 처리, 보안 검사 등
                item_name = driver.find_element(By.CSS_SELECTOR, "#content > div.sub-content-wrap > div.left-con-wrap > div.common-view-wrap.market-info-view-wrap > div > dl > dt > div:nth-child(1) > h1").text.split()[2:]
                item_name = " ".join(item_name)
                table = driver.find_element(By.TAG_NAME, "table")
                rows = table.find_elements(By.TAG_NAME, "tr")
                content = driver.find_element(By.CSS_SELECTOR, "#new_contents").text
                comment = list(map(lambda x: x.text, driver.find_elements(By.CSS_SELECTOR, "#content > div.sub-content-wrap > div.left-con-wrap > div.reply-wrap > div.reply-area > div.reply-list")))
            except Exception as e:
                if retry_attempt >= 3:
                    print(e)
                    false.append((item_link, retry_attempt + 1))
                else:
                    self.item_link_queue.append((item_link, retry_attempt + 1))
                continue
            details = [row.text for row in rows]
            shopping_mall_link, shopping_mall, price, delivery, *_ = list(map(lambda x: "".join(x.split()[1:]), details))
            print(shopping_mall_link, shopping_mall, item_name, price, delivery, content, comment)

# shopping_mall이 tag되지 않은 채로 올라옴
class PPOM_PPU(PAGES):
    def __init__(self):
        super().__init__()
        self.hot_deal_page = PPOM_PPU_LINK
        
    def get_item_links(self):
        driver = self.set_drvier(self.hot_deal_page)
        for i in range(10, 49, 2):
            try:
                item = driver.find_element(By.CSS_SELECTOR, f"#revolution_main_table > tbody > tr:nth-child({i}) > td:nth-child(3) > table > tbody > tr > td:nth-child(2) > div > a")
                item_link = item.get_attribute("href")
            except Exception as e:
                print(e)
            if item_link not in self.previous_items_queue:
                self.item_link_queue.append((item_link, 0))
                self.previous_items_queue.appendleft(item_link)
                if len(self.previous_items_queue) > 100:
                    self.previous_items_queue.pop()
            else:
                pass
        
    def crawling(self):
        driver = self.set_drvier(self.hot_deal_page)
        
        while True:
            try:
                item_link, retry_attempt = self.item_link_queue.popleft()
                print(item_link, retry_attempt)
            except:
                print("Empty Queue")
                break
            driver.get(item_link)
            time.sleep(5)
            try: # 신고 처리, 보안 검사 등
                item_name = driver.find_element(By.CSS_SELECTOR, "body > div.wrapper > div.contents > div.container > div > table:nth-child(9) > tbody > tr:nth-child(3) > td > table > tbody > tr > td:nth-child(5) > div > div.sub-top-text-box > font.view_title2").text
                content = driver.find_element(By.CSS_SELECTOR, "body > div.wrapper > div.contents > div.container > div > table:nth-child(15) > tbody > tr:nth-child(1) > td > table > tbody > tr > td").text
                comments = driver.find_element(By.ID, "quote").text
                shopping_mall_link = driver.find_element(By.CSS_SELECTOR, "body > div.wrapper > div.contents > div.container > div > table:nth-child(9) > tbody > tr:nth-child(3) > td > table > tbody > tr > td:nth-child(5) > div > div.sub-top-text-box > div > a").get_attribute("href")
                shopping_mall = driver.find_element(By.CSS_SELECTOR, "body > div.wrapper > div.contents > div.container > div > table:nth-child(9) > tbody > tr:nth-child(3) > td > table > tbody > tr > td:nth-child(5) > div > div.sub-top-text-box > font.view_title2 > span").text
            except Exception as e:
                if retry_attempt >= 3:
                    print(e)
                    false.append((item_link, retry_attempt + 1))
                else:
                    self.item_link_queue.append((item_link, retry_attempt + 1))
                continue
                
            print(item_name, content, comments, shopping_mall, shopping_mall_link)


In [189]:
false = []

In [191]:
quasar_zone = QUASAR_ZONE()
ppom_ppu = PPOM_PPU()
fm_korea = FM_KOREA()
ruli_web = RULI_WEB()
arca_live = ARCA_LIVE()


In [192]:
quasar_zone.get_item_links()
ppom_ppu.get_item_links()
fm_korea.get_item_links()
ruli_web.get_item_links()
arca_live.get_item_links()

In [193]:
quasar_zone.crawling()
ppom_ppu.crawling()
fm_korea.crawling()
ruli_web.crawling()
arca_live.crawling()

https://quasarzone.com/bbs/qb_saleinfo/views/1557354 0
https://smartstore.naver.com/steelseries-seller/products/10011647843?NaPm=ct%3Dludxt7x4%7Cci%3D83620057513cd63410f1c22e0f533bda8a9f08e2%7Ctr%3Dsbfu%7Csn%3D8919284%7Chk%3D4e1750d69e4a5a4497f7bf971ffc11b8d6b0d532 네이버쇼핑 아크티스 7P+ 화이트 무선 (169,100원/무배) ￦169,100(KRW) 무배 무선 헤드셋 플스5랑 완전 호환되는거 화이트라서 이쁩니담

아크티스노바보다 그냥 아크티스가 대두호환은 더 잘되는거같구용

닌텐도 아이패드 폰 피씨 다 연결되구요

완충하면 30시간에 C타입 급속충전되서 15분에 3시간 사용 가능하다네용

한동안 해외직구로만 판매되다가 최근에 다시 정발로 나온것 같은데

정발제품이 16.9만이면 진짜 괜찮은 것 같아서 공유드려요오오 ['용2\n2024.03.30 19:22:51\n스틸시리즈라 성능은 좋을꺼같고,,\n집에 jbl 퀀텀원 있는데 선때문에그런지 가죽이라 여름땀나서 그런지콘덴서 들이고나선 잘 안쓰게되더라고요. 얘는 머리통이 커도 낄때 불편함없을까요?육군 베레모 58이였는데\n서라\n2024.03.30 19:28:18\n어우 이거 추천목록에서 봤는데 예전에 ㅋㅋ \n\n이미 노바7을 사서 아쉽 \n루이비\n2024.03.30 19:41:29\n 이거 후속기기가 노바7p 맞죠?\nanshu\n2024.03.30 19:46:17\nG733사려다 이걸 골랐네요 ㅎㅎ']
https://quasarzone.com/bbs/qb_saleinfo/views/1557342 0
https://campaign2-api.naver.com/click-point/?eventId=cr_shoppinglive_240330_1037 기타 적립 10원 이니스프리 ￦10(KRW) 0 네이버페이 

In [194]:
false

[('https://www.ppomppu.co.kr/zboard/view.php?id=ppomppu&page=1&divpage=89&no=530099',
  4),
 ('https://bbs.ruliweb.com/market/board/1020/read/84440?view=gallery', 4),
 ('https://bbs.ruliweb.com/market/board/1020/read/84435?view=gallery', 4),
 ('https://bbs.ruliweb.com/market/board/1020/read/84433?view=gallery', 4),
 ('https://bbs.ruliweb.com/market/board/1020/read/84427?view=gallery', 4),
 ('https://bbs.ruliweb.com/market/board/1020/read/84418?view=gallery', 4),
 ('https://bbs.ruliweb.com/market/board/1020/read/84417?view=gallery', 4),
 ('https://bbs.ruliweb.com/market/board/1020/read/84413?view=gallery', 4)]

In [None]:
for _ in range(10):
    quasar_zone.get_item_links()
    quasar_zone.crawling()
    time.sleep(quasar_zone.refresh_delay)

['링크\nhttps://www.lotteon.com/p/product/bundle/LO2285077420?ch_no=100870&ch_dtl_no=1035921', '판매처 롯데온', '가격 ￦ 4,960 (KRW)', '배송비/직배 무료']
['링크\nhttps://campaign2-api.naver.com/click-point/?eventId=cr_shoppinglive_240330_1202', '판매처 기타', '가격 ￦ 10 (KRW)', '배송비/직배 0']
['링크\nhttps://www.enuri.com/move/Redirect.jsp?cmd=move_link&vcode=6641&modelno=101608311&pl_no=17434305358&cate=07080442&urltype=0&coupon=0&from=detail&showPrice=405930&buycnt=1&referrer=srp&keyword=x670', '판매처 티몬', '가격 ￦ 365,040 (KRW)', '배송비/직배 3,000']
['링크\nhttps://m.redbitmall.com/goods/goods_view.php?goodsNo=1000003016', '판매처 기타', '가격 ￦ 50,000 (KRW)', '배송비/직배 3,000']
['링크\nhttps://www.coupang.com/vp/products/5258006001?itemId=7463595015&vendorItemId=88375570336&q=hdmi+2.1&itemsCount=27&searchId=cb10ad9387e342d497e7a2997f913806&rank=6&isAddedCart=', '판매처 쿠팡', '가격 ￦ 4,900 (KRW)', '배송비/직배 와우무료']
['링크\nhttps://www.compuzone.co.kr/product/product_detail.htm?ProductNo=1117410', '판매처 기타', '가격 ￦ 1,179,000 (KRW)', '배송비/직배 없음', '기타