In [12]:
import requests
import hmac
import hashlib
import base64
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from openpyxl import Workbook
import os
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor
import logging
import subprocess
from tqdm import tqdm

# 로깅 설정
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def generate_signature(timestamp, method, uri, secret_key):
    try:
        message = f"{timestamp}.{method}.{uri}"
        signature = hmac.new(secret_key.encode(), message.encode(), hashlib.sha256).digest()
        signature_b64 = base64.b64encode(signature).decode()
        return signature_b64
    except Exception as e:
        logging.error(f"Error generating signature: {e}")
        return None

def get_naver_search_volume(api_key, secret_key, customer_id, keyword):
    max_retries = 5
    retry_delay = 5  # seconds

    for attempt in range(max_retries):
        try:
            timestamp = str(int(time.time() * 1000))
            method = "GET"
            uri = "/keywordstool"
            signature = generate_signature(timestamp, method, uri, secret_key)
            
            if not signature:
                return None

            url = "https://api.naver.com" + uri
            headers = {
                "X-API-KEY": api_key,
                "X-API-SECRET": secret_key,
                "X-CUSTOMER": customer_id,
                "X-Timestamp": timestamp,
                "X-Signature": signature,
                "Content-Type": "application/json",
                "Accept": "*/*"
            }
            params = {
                "hintKeywords": keyword,
                "showDetail": "1"
            }
            
            response = requests.get(url, headers=headers, params=params)
            
            if response.status_code == 200:
                return response.json()
            elif response.status_code == 429:
                logging.warning(f"429 Too Many Requests. Retrying in {retry_delay} seconds...")
                time.sleep(retry_delay)
            else:
                logging.error(f"Error: {response.status_code}, {response.text}")
                return None
        except Exception as e:
            logging.error(f"Error fetching search volume: {e}")
            return None
    
    logging.error("Max retries reached. Unable to fetch search volume.")
    return None

def write_to_excel(results, search_date, file_path="search_volume.xlsx"):
    try:
        wb = Workbook()
        ws = wb.active
        ws.title = "Search Volume"
        
        # 한글 헤더 추가
        headers = ["순위", "검색어", "월간총검색량", "검색 날짜"]
        ws.append(headers)
        
        # 결과 데이터 추가
        for rank, (keyword, total_search_volume) in enumerate(results.items(), start=1):
            ws.append([rank, keyword, total_search_volume, search_date])
        
        wb.save(file_path)
        logging.info(f"Data saved to {file_path}")
    except Exception as e:
        logging.error(f"Error writing to Excel: {e}")

def main():
    api_key = "0100000000f7219646b1189747046631aa4d35e62954247e750a216e37f7072b5174cbe5c2"
    secret_key = "AQAAAAD3IZZGsRiXRwRmMapNNeYpncLjvqyOrRpn2cZDTV49Ug=="
    customer_id = "373124"

    # 현재 날짜를 yyyymmdd 형식으로 얻기
    current_date = datetime.now().strftime("%Y%m%d")
    search_date = datetime.now().strftime("%Y-%m-%d")  # 검색 날짜를 yyyy-mm-dd 형식으로 얻기

    # 사용자로부터 저장할 Excel 파일 기본 이름 입력받기
    base_file_name = input("저장할 Excel 파일의 기본 이름을 입력하세요 (확장자 제외): ")

    # 입력받은 파일 이름에 오늘 날짜 추가
    excel_file_name = f"{base_file_name}_{current_date}.xlsx"

    # 현재 작업 중인 폴더의 경로를 얻기
    current_working_directory = os.getcwd()
    file_path = os.path.join(current_working_directory, excel_file_name)  # 파일 경로 결합

    # 검색할 페이지 수 입력받기
    num_pages = int(input("검색할 페이지 수를 입력하세요: "))

    # WebDriver 설정(Chrome 사용)
    try:
        driver = webdriver.Chrome()
    except Exception as e:
        logging.error(f"Error initializing WebDriver: {e}")
        return

    # Excel 설정
    wb = Workbook()
    ws = wb.create_sheet('검색어순위')
    wb.remove(wb['Sheet'])  # 기본 생성된 시트 제거
    ws.append(['순위', '인기검색어', '검색 날짜'])

    try:
        # 네이버 데이터랩 쇼핑 인사이트 페이지로 이동
        driver.get("https://datalab.naver.com/shoppingInsight/sCategory.naver")

        # 필요한 요소가 로드될 때까지 대기하는 WebDriverWait 인스턴스 생성
        wait = WebDriverWait(driver, 20)

        # 드롭다운 옵션 선택을 위한 사용자 입력 받기
        first_option = int(input("첫 번째 드롭다운 옵션을 입력하세요 (필수): "))
        second_option = input("두 번째 드롭다운 옵션을 입력하세요 (선택, 없으면 0 또는 공백): ")
        third_option = input("세 번째 드롭다운 옵션을 입력하세요 (선택, 없으면 0 또는 공백): ")
        fourth_option = input("네 번째 드롭다운 옵션을 입력하세요 (선택, 없으면 0 또는 공백): ")

        # 드롭다운 및 옵션 선택 예시 (첫 번째 옵션 선택)
        dropdown1 = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="content"]/div[2]/div/div[1]/div/div/div[1]/div/div[1]/span')))
        dropdown1.click()
        first_option_xpath = f'//*[@id="content"]/div[2]/div/div[1]/div/div/div[1]/div/div[1]/ul/li[{first_option}]'
        option1 = wait.until(EC.element_to_be_clickable((By.XPATH, first_option_xpath)))
        option1.click()
        time.sleep(1)  # 드롭다운 선택 후 대기

        # 두 번째 옵션 선택 (선택 사항)
        if second_option not in ('0', ''):
            try:
                second_option = int(second_option)
                dropdown2 = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="content"]/div[2]/div/div[1]/div/div/div[1]/div/div[2]/span')))
                dropdown2.click()
                second_option_xpath = f'//*[@id="content"]/div[2]/div/div[1]/div/div/div[1]/div/div[2]/ul/li[{second_option}]/a'
                option2 = wait.until(EC.element_to_be_clickable((By.XPATH, second_option_xpath)))
                option2.click()
                time.sleep(1)  # 드롭다운 선택 후 대기
            except Exception as e:
                logging.error(f"Error occurred while selecting the second option: {e}")

        # 세 번째 옵션 선택 (선택 사항)
        if third_option not in ('0', ''):
            try:
                third_option = int(third_option)
                dropdown3 = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="content"]/div[2]/div/div[1]/div/div/div[1]/div/div[3]/span')))
                dropdown3.click()
                third_option_xpath = f'//*[@id="content"]/div[2]/div/div[1]/div/div/div[1]/div/div[3]/ul/li[{third_option}]/a'
                option3 = wait.until(EC.element_to_be_clickable((By.XPATH, third_option_xpath)))
                option3.click()
                time.sleep(1)  # 드롭다운 선택 후 대기
            except Exception as e:
                logging.error(f"Error occurred while selecting the third option: {e}")

        # 네 번째 옵션 선택 (선택 사항)
        if fourth_option not in ('0', ''):
            try:
                fourth_option = int(fourth_option)
                dropdown4 = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="content"]/div[2]/div/div[1]/div/div/div[1]/div/div[4]/span')))
                dropdown4.click()
                fourth_option_xpath = f'//*[@id="content"]/div[2]/div/div[1]/div/div/div[1]/div/div[4]/ul/li[{fourth_option}]/a'
                option4 = wait.until(EC.element_to_be_clickable((By.XPATH, fourth_option_xpath)))
                option4.click()
                time.sleep(1)  # 드롭다운 선택 후 대기
            except Exception as e:
                logging.error(f"Error occurred while selecting the fourth option: {e}")

        # 기기별, 성별, 연령 전체 선택
        driver.find_element(By.XPATH, '//*[@id="18_device_0"]').click()
        driver.find_element(By.XPATH, '//*[@id="19_gender_0"]').click()

        # 연령대 선택
        age_checkbox = driver.find_element(By.XPATH, '//*[@id="20_age_0"]')
        driver.execute_script("arguments[0].scrollIntoView(true);", age_checkbox)
        age_checkbox.click()

        time.sleep(2)  # 필요한 요소 로딩 대기

        # '조회하기' 버튼 클릭
        search_button = wait.until(EC.element_to_be_clickable((By.XPATH, '//*[@id="content"]/div[2]/div/div[1]/div/a')))
        search_button.click()

        # 결과 스크래핑 및 Excel 파일 저장
        keywords = []
        for i in range(num_pages):  # 사용자로부터 입력받은 페이지 수만큼 반복
            # 페이지 로딩 대기
            time.sleep(1)
            
            for j in range(1, 21):  # 한 페이지당 최대 20개 아이템
                try:
                    path = f'//*[@id="content"]/div[2]/div/div[2]/div[2]/div/div/div[1]/ul/li[{j}]/a'
                    result = driver.find_element(By.XPATH, path).text
                    result_data = result.split('\n')
                    result_data[0] = int(result_data[0])  # 순위를 숫자 형식으로 변환
                    result_data.append(search_date)  # 검색 날짜 추가
                    logging.info(result_data)
                    ws.append(result_data)
                    keywords.append(result_data[1])  # 인기검색어 수집
                except Exception as e:
                    logging.error(f"Error occurred while scraping data: {e}")
            
            # 다음 페이지로 넘어가는 버튼 클릭 로직 (실제 페이지 구조에 따라 달라질 수 있음)
            try:
                next_button = driver.find_element(By.XPATH, '//*[@id="content"]/div[2]/div/div[2]/div[2]/div/div/div[2]/div/a[2]')
                next_button.click()
                time.sleep(1)  # 페이지 로딩 대기
            except Exception as e:
                logging.error(f"Error occurred while clicking next button: {e}")
                break

        # 파일 저장
        try:
            wb.save(file_path)
            wb.close()
            logging.info(f"'{excel_file_name}' 파일이 현재 폴더에 저장되었습니다.")
        except Exception as e:
            logging.error(f"Error occurred while saving the file: {e}")

    except Exception as e:
        logging.error(f"An error occurred during scraping: {e}")
    finally:
        driver.quit()

    # 검색어에 대한 검색량 수집
    logging.info("검색량 수집이 수분 이상 걸릴 수 있으니 잠시만 기다려 주세요.")
    results = {}
    with ThreadPoolExecutor(max_workers=5) as executor:
        futures = {executor.submit(get_naver_search_volume, api_key, secret_key, customer_id, keyword): keyword for keyword in keywords}
        
        # 진행 상황을 퍼센트로 표시
        for future in tqdm(futures, desc="Fetching search volumes", unit="keyword", ncols=100):
            keyword = futures[future]
            try:
                data = future.result()
                if data and "keywordList" in data and len(data["keywordList"]) > 0:
                    item = data["keywordList"][0]
                    monthlyPcQcCnt = item.get("monthlyPcQcCnt", 0)
                    monthlyMobileQcCnt = item.get("monthlyMobileQcCnt", 0)

                    # '< 10' 처리
                    if isinstance(monthlyPcQcCnt, str) and '<' in monthlyPcQcCnt:
                        monthlyPcQcCnt = 5  # '< 10'인 경우, 5로 설정 (임의 값)
                    else:
                        monthlyPcQcCnt = int(monthlyPcQcCnt)

                    if isinstance(monthlyMobileQcCnt, str) and '<' in monthlyMobileQcCnt:
                        monthlyMobileQcCnt = 5  # '< 10'인 경우, 5로 설정 (임의 값)
                    else:
                        monthlyMobileQcCnt = int(monthlyMobileQcCnt)

                    total_search_volume = monthlyPcQcCnt + monthlyMobileQcCnt
                    results[keyword] = total_search_volume
            except Exception as e:
                logging.error(f"Error occurred while fetching search volume for {keyword}: {e}")

            # 요청 간의 딜레이 추가
            time.sleep(2)  # 딜레이를 2초로 설정

    write_to_excel(results, search_date, file_path)

    # 저장된 파일 열기
    try:
        subprocess.Popen(['start', file_path], shell=True)
    except Exception as e:
        logging.error(f"Error occurred while opening the file: {e}")

if __name__ == "__main__":
    main()


저장할 Excel 파일의 기본 이름을 입력하세요 (확장자 제외): 키링순위
검색할 페이지 수를 입력하세요: 25
첫 번째 드롭다운 옵션을 입력하세요 (필수): 2
두 번째 드롭다운 옵션을 입력하세요 (선택, 없으면 0 또는 공백): 14
세 번째 드롭다운 옵션을 입력하세요 (선택, 없으면 0 또는 공백): 14
네 번째 드롭다운 옵션을 입력하세요 (선택, 없으면 0 또는 공백): 0


2024-06-11 16:45:16,509 - INFO - [1, '키링', '2024-06-11']
2024-06-11 16:45:16,529 - INFO - [2, '바비키링', '2024-06-11']
2024-06-11 16:45:16,541 - INFO - [3, '인형키링', '2024-06-11']
2024-06-11 16:45:16,552 - INFO - [4, '젤리캣키링', '2024-06-11']
2024-06-11 16:45:16,564 - INFO - [5, '키링인형', '2024-06-11']
2024-06-11 16:45:16,586 - INFO - [6, '자동차키링', '2024-06-11']
2024-06-11 16:45:16,600 - INFO - [7, '그로밋키링', '2024-06-11']
2024-06-11 16:45:16,613 - INFO - [8, '키홀더', '2024-06-11']
2024-06-11 16:45:16,625 - INFO - [9, '가방키링', '2024-06-11']
2024-06-11 16:45:16,637 - INFO - [10, '네잎클로버키링', '2024-06-11']
2024-06-11 16:45:16,649 - INFO - [11, '바비인형키링', '2024-06-11']
2024-06-11 16:45:16,666 - INFO - [12, '키링부자재', '2024-06-11']
2024-06-11 16:45:16,681 - INFO - [13, '레고키링', '2024-06-11']
2024-06-11 16:45:16,694 - INFO - [14, '키티키링', '2024-06-11']
2024-06-11 16:45:16,709 - INFO - [15, '포카홀더', '2024-06-11']
2024-06-11 16:45:16,723 - INFO - [16, '리본키링', '2024-06-11']
2024-06-11 16:45:16,734 - INFO - [17, '질스튜어

2024-06-11 16:45:29,977 - INFO - [135, '변우석키링', '2024-06-11']
2024-06-11 16:45:29,989 - INFO - [136, '자동차키링가격', '2024-06-11']
2024-06-11 16:45:29,998 - INFO - [137, '닥스키링', '2024-06-11']
2024-06-11 16:45:30,008 - INFO - [138, '비즈키링만들기', '2024-06-11']
2024-06-11 16:45:30,016 - INFO - [139, '손흥민키링', '2024-06-11']
2024-06-11 16:45:30,026 - INFO - [140, '해파리키링', '2024-06-11']
2024-06-11 16:45:32,060 - INFO - [141, '가방키링인형', '2024-06-11']
2024-06-11 16:45:32,069 - INFO - [142, '챠미키티', '2024-06-11']
2024-06-11 16:45:32,077 - INFO - [143, '미니가방키링', '2024-06-11']
2024-06-11 16:45:32,086 - INFO - [144, '에어팟키링', '2024-06-11']
2024-06-11 16:45:32,095 - INFO - [145, '파우치키링', '2024-06-11']
2024-06-11 16:45:32,103 - INFO - [146, '포토카드홀더', '2024-06-11']
2024-06-11 16:45:32,112 - INFO - [147, '거울키링', '2024-06-11']
2024-06-11 16:45:32,120 - INFO - [148, '레고스파이더맨키링', '2024-06-11']
2024-06-11 16:45:32,129 - INFO - [149, '가방악세사리', '2024-06-11']
2024-06-11 16:45:32,137 - INFO - [150, '에쎄모빈티지', '2024-06-11'

2024-06-11 16:45:45,335 - INFO - [268, '호텔키링', '2024-06-11']
2024-06-11 16:45:45,344 - INFO - [269, '유키오', '2024-06-11']
2024-06-11 16:45:45,352 - INFO - [270, '패딩리본키링', '2024-06-11']
2024-06-11 16:45:45,361 - INFO - [271, '금붕어키링', '2024-06-11']
2024-06-11 16:45:45,369 - INFO - [272, '명품키홀더', '2024-06-11']
2024-06-11 16:45:45,377 - INFO - [273, '리본진주키링', '2024-06-11']
2024-06-11 16:45:45,385 - INFO - [274, '가챠키링', '2024-06-11']
2024-06-11 16:45:45,394 - INFO - [275, '액자키링', '2024-06-11']
2024-06-11 16:45:45,402 - INFO - [276, '지브리키링', '2024-06-11']
2024-06-11 16:45:45,411 - INFO - [277, '아크릴키링주문제작', '2024-06-11']
2024-06-11 16:45:45,422 - INFO - [278, '젤리캣옷', '2024-06-11']
2024-06-11 16:45:45,430 - INFO - [279, '캉골키링', '2024-06-11']
2024-06-11 16:45:45,439 - INFO - [280, '월레스와그로밋키링', '2024-06-11']
2024-06-11 16:45:47,471 - INFO - [281, '실버키티키링', '2024-06-11']
2024-06-11 16:45:47,480 - INFO - [282, '헬로키티50주년키링', '2024-06-11']
2024-06-11 16:45:47,489 - INFO - [283, '키링줄', '2024-06-11']
2

2024-06-11 16:45:58,655 - INFO - [400, '산리오카메라키링', '2024-06-11']
2024-06-11 16:46:00,693 - INFO - [401, '키링주문제작', '2024-06-11']
2024-06-11 16:46:00,704 - INFO - [402, '워킹맘상점', '2024-06-11']
2024-06-11 16:46:00,717 - INFO - [403, '산리오오바케누', '2024-06-11']
2024-06-11 16:46:00,730 - INFO - [404, '연결고리', '2024-06-11']
2024-06-11 16:46:00,740 - INFO - [405, '위글위글키링', '2024-06-11']
2024-06-11 16:46:00,749 - INFO - [406, '수영장키링', '2024-06-11']
2024-06-11 16:46:00,757 - INFO - [407, '토마스키링', '2024-06-11']
2024-06-11 16:46:00,766 - INFO - [408, '짱구인형키링', '2024-06-11']
2024-06-11 16:46:00,774 - INFO - [409, '일본가챠키링', '2024-06-11']
2024-06-11 16:46:00,783 - INFO - [410, '땡땡키링', '2024-06-11']
2024-06-11 16:46:00,791 - INFO - [411, '이미스키링', '2024-06-11']
2024-06-11 16:46:00,799 - INFO - [412, '자수키링', '2024-06-11']
2024-06-11 16:46:00,807 - INFO - [413, '미니백키링', '2024-06-11']
2024-06-11 16:46:00,816 - INFO - [414, '남자키링', '2024-06-11']
2024-06-11 16:46:00,825 - INFO - [415, '마크라메키링', '2024-06-11']
20