## 키워드 검색량 도구 API test

In [7]:
import random
import urllib
import time
from datetime import datetime 
from datetime import timedelta
from dateutil.relativedelta import relativedelta
import json
import signaturehelper
import requests

In [8]:
config_secret_debug = json.loads(open("./.config_secret/settings_debug.json").read())
customer_id = config_secret_debug["NAVER_SEARCH"]["CUSTOMER_ID"]
client_id = config_secret_debug["NAVER_SEARCH"]["CLIENT_ID"]
client_secret = config_secret_debug["NAVER_SEARCH"]["CLIENT_SECRET"]

In [5]:
## 입력

keyword = input("키워드 입력: ")
encText = urllib.parse.quote(keyword)
start_date = "2016-01-01"
end_date = str(datetime.now()-timedelta(days=1))[:10]
time_unit = "date"
keyword_groups = keyword
print(start_date, end_date)

키워드 입력: 폼클렌징
2016-01-01 2021-03-07


In [9]:
def get_search_header(method, uri, api_key, secret_key, customer_id):
    timestamp = str(round(time.time() * 1000))
    signature = signaturehelper.Signature.generate(timestamp, method, uri, SECRET_KEY)
    
    return {'Content-Type': 'application/json; charset=UTF-8', 
            'X-Timestamp': timestamp, 
            'X-API-KEY': API_KEY, 
            'X-Customer': str(CUSTOMER_ID),
            'X-Signature': signature}

In [10]:
def get_datalab_header(api_key, secret_key):
    return {
        "Content-Type":"application/json",
        "X-Naver-Client-Id": api_key,
        "X-Naver-Client-Secret":secret_key        
    }

In [11]:
BASE_URL = 'https://api.naver.com'
API_KEY = client_id
SECRET_KEY = client_secret
CUSTOMER_ID = customer_id

In [12]:
uri = '/keywordstool'
method = 'GET'
search_r = requests.get(BASE_URL + uri+'?hintKeywords={}&showDetail=1'.format(keyword),
                 params = {"sort":"date"},
                 headers=get_search_header(method, uri, API_KEY, SECRET_KEY, CUSTOMER_ID))

In [13]:
search_result = search_r.json()["keywordList"][0]
search_result

{'relKeyword': '폼클렌징',
 'monthlyPcQcCnt': 5910,
 'monthlyMobileQcCnt': 45400,
 'monthlyAvePcClkCnt': 12.7,
 'monthlyAveMobileClkCnt': 229.8,
 'monthlyAvePcCtr': 0.23,
 'monthlyAveMobileCtr': 0.54,
 'plAvgDepth': 15,
 'compIdx': '높음'}

In [14]:
import os
import sys
import urllib.request

In [15]:
client_id =  config_secret_debug["NAVER"]["CLIENT_ID"]
client_secret = config_secret_debug["NAVER"]["CLIENT_SECRET"]
url = "https://openapi.naver.com/v1/datalab/search"
body = { "startDate":start_date,
         "endDate":end_date,
         "timeUnit":time_unit,
         "keywordGroups" : [{"groupName": keyword, "keywords" : [keyword]}]
       }

In [15]:
datalab_r = requests.post(url,data = json.dumps(body),
                 headers=get_datalab_header(client_id, client_secret))

In [16]:
datalab_result = json.loads(datalab_r.content)
datalab_result

{'startDate': '2016-01-01',
 'endDate': '2021-02-21',
 'timeUnit': 'date',
 'results': [{'title': '폼클렌징',
   'keywords': ['폼클렌징'],
   'data': [{'period': '2016-01-01', 'ratio': 47.66626},
    {'period': '2016-01-02', 'ratio': 46.98239},
    {'period': '2016-01-03', 'ratio': 48.16207},
    {'period': '2016-01-04', 'ratio': 44.94785},
    {'period': '2016-01-05', 'ratio': 40.29748},
    {'period': '2016-01-06', 'ratio': 45.28979},
    {'period': '2016-01-07', 'ratio': 43.34074},
    {'period': '2016-01-08', 'ratio': 43.30654},
    {'period': '2016-01-09', 'ratio': 44.75978},
    {'period': '2016-01-10', 'ratio': 43.06719},
    {'period': '2016-01-11', 'ratio': 48.53821},
    {'period': '2016-01-12', 'ratio': 49.94016},
    {'period': '2016-01-13', 'ratio': 45.85399},
    {'period': '2016-01-14', 'ratio': 48.53821},
    {'period': '2016-01-15', 'ratio': 50.2308},
    {'period': '2016-01-16', 'ratio': 44.70849},
    {'period': '2016-01-17', 'ratio': 47.56368},
    {'period': '2016-01-18', 

### 최근 30일간 검색량 

In [17]:
import pandas as pd

In [16]:
month_ago = str(datetime.now()-relativedelta(months=1))[:10]
current_date = str(datetime.now())[:10]

In [19]:
dlab_list = datalab_result["results"][0]["data"]
month_index = next((index for (index, d) in enumerate(dlab_list) if d["period"] == month_ago), None)
dlab_list = dlab_list[month_index:]
ratio_sum = sum(dlab_list[i]["ratio"] for i in range(len(dlab_list)))
ratio_sum

805.7785999999999

In [20]:
print(str(datetime.now()-relativedelta(months=1))[:10])

2021-01-22


In [21]:
df = pd.DataFrame(search_result, index = ["values"])
df.drop(["compIdx",
         "plAvgDepth",
         "monthlyAveMobileCtr",
         "monthlyAvePcCtr",
         "monthlyAveMobileClkCnt",
         "monthlyAvePcClkCnt"], axis = 1 ,inplace = True)
df.rename(columns={"relKeyword":"검색어","monthlyPcQcCnt":"PC", "monthlyMobileQcCnt":"Mobile"}, inplace = True)
df["Total"] = search_result["monthlyPcQcCnt"] + search_result["monthlyMobileQcCnt"]
df["전월 대비"] = "_"

In [22]:
df["Total"] / ratio_sum

values    61.381625
Name: Total, dtype: float64

In [23]:
display(df)

Unnamed: 0,검색어,PC,Mobile,Total,전월 대비
values,폼클렌징,5960,43500,49460,_


### 최근 컨텐츠 발행량

In [24]:
url_blog = "https://openapi.naver.com/v1/search/blog"
url_cafe = "https://openapi.naver.com/v1/search/cafearticle?query=" + encText

In [25]:
content_blog_r = requests.get(url_blog,
                              params = {"sort":"sim", "query" : encText, "display":100},
                              headers=get_datalab_header(client_id, client_secret))
content_cafe_r = requests.get(url_cafe,
                              headers=get_datalab_header(client_id, client_secret))

In [26]:
blog_result = json.loads(content_blog_r.content)
cafe_result = json.loads(content_cafe_r.content)

In [27]:
blog_result

{'lastBuildDate': 'Mon, 22 Feb 2021 10:13:44 +0900',
 'total': 244160,
 'start': 1,
 'display': 100,
 'items': [{'title': 'com/c/%EA%B8%B<b>0%</b><b>EB%A</b><b>7%</b>89<b>%ED%</b>9E%<b>8C%EC%</b><b>8C%</b>A<b>4%</b><b>EC%</b>9D%98%<b>EC</b>... ',
   'link': 'https://blog.naver.com/leeej84?Redirect=Log&logNo=222249720218',
   'description': 'https://www.youtube.com/c/%EA%B8%B<b>0%</b><b>EB%A</b><b>7%</b>89<b>%ED%</b>9E%<b>8C%EC%</b><b>8C%</b>A<b>4%</b><b>EC%</b>9D%98%<b>EC%A7%</b>84%<b>EC%A7%</b>9C%EA%B5%90%<b>EC%A7%</b><b>81%</b><b>EC%</b>8B%A<b>4%EB%</b>AC%B4/featured',
   'bloggername': '기록하고 공유하며 성장하는 나의 보물창고',
   'bloggerlink': 'https://blog.naver.com/leeej84',
   'postdate': '20210220'},
  {'title': '지성피부 얼굴 트러블 고민이라면 세안부터 관리해요',
   'link': 'https://blog.naver.com/hahaha1504?Redirect=Log&logNo=222245557883',
   'description': '화학성분 없는 세안제로 자극 없이 부드러운 세안하고 싶으신 분들께 정말로 추천  https://msearch.shopping.naver.com/search/all?query=%EC%B2%9C%EC%97%B0+%EC%96%B4%EC%84%B1%EC%B4%88+%EB%B9%84%EB

In [18]:
cafe_url = "https://cafe.naver.com/ca-fe/home/search/articles"+"?q="+encText+"&pr=3"
blog_url = "https://section.blog.naver.com/Search/Post.nhn"+"?pageNo=1"+"&rangeType=MONTH"+"&orderBy=sim"+"&startDate="+month_ago+"&endDate="+current_date+"&keyword="+encText

In [29]:
### Test url

import webbrowser
#webbrowser.open(blog_url)
#webbrowser.open(cafe_url)

Selenium + Django 어떻게 연결할 껀지 고민해보기

In [1]:
from bs4 import BeautifulSoup
from urllib.request import urlopen
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
import pathlib as p
import requests

In [31]:
## Selenium 속도 향상
options = webdriver.ChromeOptions() # 크롬 옵션 객체 생성
options.add_argument('headless') # headless 모드 설정
options.add_argument("window-size=1920x1080") # 화면크기(전체화면)
options.add_argument("disable-gpu") 
options.add_argument("disable-infobars")
options.add_argument("--disable-extensions")

# 속도 향상을 위한 옵션 해제
prefs = {'profile.default_content_setting_values': {'cookies' : 2, 'images': 2, 'plugins' : 2, 'popups': 2, 'geolocation': 2, 'notifications' : 2, 'auto_select_certificate': 2, 'fullscreen' : 2, 'mouselock' : 2, 'mixed_script': 2, 'media_stream' : 2, 'media_stream_mic' : 2, 'media_stream_camera': 2, 'protocol_handlers' : 2, 'ppapi_broker' : 2, 'automatic_downloads': 2, 'midi_sysex' : 2, 'push_messaging' : 2, 'ssl_cert_decisions': 2, 'metro_switch_to_desktop' : 2, 'protected_media_identifier': 2, 'app_banner': 2, 'site_engagement' : 2, 'durable_storage' : 2}}   
options.add_experimental_option('prefs', prefs)

In [27]:
print(str(datetime.now()))
driver = webdriver.Chrome("./chromedriver_win32/chromedriver.exe")
driver.get(blog_url)
driver.implicitly_wait(3)
html = driver.page_source
html

2021-03-08 16:37:27.943905


'<html lang="ko" data-useragent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.190 Safari/537.36"><head><style type="text/css">@charset "UTF-8";[ng\\:cloak],[ng-cloak],[data-ng-cloak],[x-ng-cloak],.ng-cloak,.x-ng-cloak,.ng-hide:not(.ng-hide-animate){display:none !important;}ng\\:form{display:block;}.ng-animate-shim{visibility:hidden;}.ng-anchor{position:absolute;}</style>\n    <meta http-equiv="X-UA-Compatible" content="IE=edge">\n    <base href="/home">\n    <meta name="robots" content="noindex,nofollow">\n    <meta name="referrer" content="always">\n    <meta name="format-detection" content="telephone=no">\n    <link rel="shortcut icon" type="image/x-icon" href="https://section.blog.naver.com/favicon.ico?3">\n    \n\n\n\n\n \n\n\t\n\t\t<meta property="og:title" content="폼클렌징 : 네이버 블로그">\n\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t<meta property="og:image" content="https://blogimgs.pstatic.net/nblog/mylog/post/og_default_image_160610.png">\n\t\t\

In [25]:
req = requests.get(blog_url)
time.sleep(1)
html = req.content
html

b'\n\n\n\n<!DOCTYPE html>\n<html lang="ko">\n<head>\n    <meta http-equiv="X-UA-Compatible" content="IE=edge">\n    <base href="/home" />\n    <meta name="robots" content="noindex,nofollow"/>\n    <meta name="referrer" content="always"/>\n    <meta name="format-detection" content="telephone=no">\n    <link rel="shortcut icon" type="image/x-icon" href="https://section.blog.naver.com/favicon.ico?3"/>\n    \n\n\n\n\n \n\n\t\n\t\t<meta property="og:title" content="\xed\x8f\xbc\xed\x81\xb4\xeb\xa0\x8c\xec\xa7\x95 : \xeb\x84\xa4\xec\x9d\xb4\xeb\xb2\x84 \xeb\xb8\x94\xeb\xa1\x9c\xea\xb7\xb8"/>\n\n\t\t\n\t\t\t\n\t\t\t\n\t\t\t<meta property="og:image" content="https://blogimgs.pstatic.net/nblog/mylog/post/og_default_image_160610.png"/>\n\t\t\t\n\t\t\n        <meta property="og:description" content="\xeb\x8b\xb9\xec\x8b\xa0\xec\x9d\x98 \xeb\xaa\xa8\xeb\x93\xa0 \xea\xb8\xb0\xeb\xa1\x9d\xec\x9d\x84 \xeb\x8b\xb4\xeb\x8a\x94 \xea\xb3\xb5\xea\xb0\x84"/>\n\n\t\t\n\t\n\t\n\n<meta property="me:feed:servi

In [28]:
soup = BeautifulSoup(html, "html.parser")
get_blog_number = soup.find("em",{"class","search_number"})
get_blog_number = int(get_blog_number.text[:-1].replace(",",""))
get_blog_number

16502

In [29]:
driver.get(cafe_url)
driver.implicitly_wait(3)

In [30]:
driver.find_element_by_id("app").click()
get_cafe_number = driver.find_element_by_class_name("total_count")
time.sleep(1)
get_cafe_number = int(get_cafe_number.text[:-1].replace(",",""))
print(get_cafe_number)

13355


In [31]:
driver.close()

In [37]:
dlab_list = datalab_result["results"][0]["data"]
month_index = next((index for (index, d) in enumerate(dlab_list) if d["period"] == month_ago), None)
dlab_list = dlab_list[month_index:]
ratio_sum = sum(dlab_list[i]["ratio"] for i in range(len(dlab_list)))
ratio_sum

805.7785999999999

In [38]:
df = pd.DataFrame(index = ["values"])
df["Blog"] = get_blog_number
df["Cafe"] = get_cafe_number
df["Total"] = get_blog_number + get_cafe_number
df["전월 대비"] = int(df["Total"] / ratio_sum)
df
print(str(datetime.now()))

2021-02-22 10:13:52.368732


### Graph

In [44]:
## 카테고리에 있는 모든 카테고리 아이디 블러오기
naver_shop = "https://shopping.naver.com/"
driver = webdriver.Chrome("./chromedriver_win32/chromedriver.exe")
driver.get(naver_shop)
time.sleep(1)

In [45]:
action = ActionChains(driver)
driver.find_element_by_class_name("co_category_menu").click()
category_list = driver.find_elements.by_tag_name("li")
print(category_list)

WebDriverException: Message: chrome not reachable
  (Session info: chrome=88.0.4324.182)


In [38]:
#연령별 옵션 가능
age = input("연령: ")
#성별 옵션도 가능한데 일단은 보류

연령: 20


In [40]:
url = "https://openapi.naver.com/v1/datalab/shopping/categories";
body = { "startDate":start_date,
         "endDate":end_date,
         "timeUnit":time_unit,
         "category" : [
             {"name": "화장품/미용", "param": [ "50000002"]}
         ],
         "ages" : [age] 
       }

In [None]:
dlab_shopr = requests.post(url,data = json.dumps(body),
                 headers=get_datalab_header(client_id, client_secret))

In [None]:
dlab_shopr = json.loads(datalab_r.content)
dlab_shopr

### 관련 키워드 현황

In [None]:
search_result

In [None]:
df = pd.DataFrame(search_result, index = ["values"])
df.drop(["compIdx",
         "plAvgDepth"])


### 카테고리 인기 검색어

In [81]:
from selenium import webdriver
from time import sleep
from selenium.webdriver import ActionChains
import pyperclip
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import NoSuchElementException
import requests
from bs4 import BeautifulSoup
import time
import win32com.client
import pandas as pd
import requests

### 예상 판매량

In [82]:
def get_category(item):
    categories = item.find_elements_by_tag_name("a")
    return [i.text for i in categories]

In [83]:
def get_link(item):
    link = item.find_element_by_tag_name("a").get_attribute("href")
    return link

In [84]:
def get_title(item):
    title = item.find_element_by_tag_name("a").text
    return title

In [85]:
def get_shipping(item):
    shipping_fee = 0
    fee = item.find_element_by_tag_name("em").text
    if not fee[4:].startswith("무료"):
        shipping_fee = fee[4:].replace("원","")
        shipping_fee = shipping_fee.replace(",","")
    return int(shipping_fee)

In [86]:
def get_store_name(item):
    store = item.find_element_by_class_name("basicList_mall__sbVax").text
    return store

In [87]:
def get_total_price(driver):
    get_buy_nums = driver.find_elements_by_class_name("Zye_YMu5OB")
    get_price = int(driver.find_element_by_class_name("_1LY7DqCnwR").text.replace(",",""))
    total = sum([int(i.text.split("건")[0]) for i in get_buy_nums])
    total_price = get_price * total
    return total_price

In [88]:
def get_sales(driver):
    get_buy_nums = driver.find_elements_by_class_name("Zye_YMu5OB")
    total = sum([int(i.text.split("건")[0]) for i in get_buy_nums])
    return total

In [89]:
def get_price(item):
    get_price = item.find_element_by_tag_name("span").text
    get_price = get_price.replace("원","")
    get_price = int(get_price.replace(",",""))
    return get_price

In [99]:
n_shop_url = "https://search.shopping.naver.com/search/all?frm=NVSHCHK&origQuery={}&pagingIndex=1&pagingSize=40&productSet=checkout&query={}&sort=rel&timestamp=&viewType=list".format(encText, encText)
driver = webdriver.Chrome("./chromedriver_win32/chromedriver.exe")
driver.get(n_shop_url)
driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")
time.sleep(1)

In [100]:
get_item_list = driver.find_element_by_class_name("list_basis")
items = get_item_list.find_elements_by_tag_name("div")
all_items_info = {}
info = {}
for item in items:
    class_name = item.get_attribute("class")
    if class_name.startswith("basicList_title"):
        title = get_title(item)
        info["link"] = get_link(item)
    elif class_name.startswith("basicList_mall_title"):
        info["store"] = get_store_name(item)
    elif class_name.startswith("basicList_depth"):
        info["category"] = get_category(item)
    elif class_name.startswith("basicList_mall_area"):
        info["shipping"] = get_shipping(item)
    elif class_name.startswith("basicList_price_area"):
        info["price"] = get_price(item)
    elif class_name.startswith("basicList_mall_grade"):
        info["price_shipping"] = info["shipping"] + info["price"]
        all_items_info[title]= info
        info = {}

In [101]:
for item in items:
    print(item.get_attribute("class"))



basicList_inner__eY_mq
basicList_img_area__a3NRA
thumbnail_thumb_wrap__1pEkS _wrapper
thumbnail_btn_box__3_V2T
thumbnail_btn__L5sLn
thumbnail_layer_info__2Bo5z
thumbnail_link_box__z5jwn
basicList_info_area__17Xyo
basicList_title__3P9Q7
basicList_price_area__1UXXR
basicList_depth__2QIie
basicList_desc__2-tko
basicList_detail_box__3ta3h
basicList_event__fLNNU
basicList_etc_box__1Jzg6
basicList_mall_area__lIA7R
basicList_mall_title__3MWFY
basicList_mall_grade__31CEX


In [102]:
for key,values in all_items_info.items():
    print(key)
    for title, value in values.items():
        print(title, value)
    print("------------------------")

전국운행 픽업 샌딩 콜벤 / 코로나19소독 방역콜밴 / 제이디 프리미엄 김포공항콜밴
link https://cr.shopping.naver.com/adcr.nhn?x=vyM8ceyywyzBTJM5%2F4roIP%2F%2F%2Fw%3D%3DsxFbVLtOgbz0SKacrKaS5o88s0hIXl9IwAzsRcPTfktfn36S%2BPozlL0oHlBkzL%2BGwWF3qRqDkUe%2B8pRmTWAdRDgukGVyLnwV1C1xZer7oGSn8mtz%2FeZHSwjJ42cOwj0Cfhwi%2Bso400G4fayQzDpG0EOTNFG8xSlnsEaHGgiisktaIU2nTQRckpgMldaETzgSPUJdIgSYnosQraYLJ%2FMuKUZXeiXbxuiD%2F8EOfb7uK9qhqZnDVwMmLkCFFKcmu4TDDFe00g%2BKOZqM0JekrlAxQDkS%2BortKzfQDWBF5GV%2FKfqEhJv3ljJo79VVIpj6%2Fm3WOkXjkJnky%2FB7WMGUsk8jI%2Bgcyz48mW%2B%2BYu%2BCClhVFVTxjI50HZy%2FcLV2dSyPQS0HhEIZJB1yuP5kkcuYnGBP9u%2B9ntzLRX7%2FckGw2yxcGq%2BP1M2d1CXZt2h8Jd2SRb4JfaXEPSTSJxSd4DjaxOs7a3NEJeUCmpPKPpNh7yzUHda8E7G6ucT0VhBDFUuE4TRg0ncDKdxLGvM44%2FNfX0QkIskj36agcF%2FJ%2BnxUGI9gn%2FMbDhGvU%2FlCgT%2BZpmbfQNx8K5ecsr6Dgwc4e%2FHGtdzEJowAClQzNqN%2BEXPeM244U5rNr8sz1Ni%2FKXUa6hhmVYKKL%2BzdVAtahD3v4497z0IU1aAkk4r4sLJIl2oHt%2FF6Bv3vQ2JFnEOKnhePRU54S2FpUbY9HZaG4rt4S8eJjMyqSRWVpY4ojQ%2FmQYfDbR2hEK3224HzMFPCAgbu1hTl8ArfHqk9k3UaW6Ri3v%2By6YPmuAm9TZjqEQTAb

In [103]:
for title,info in all_items_info.items():
    begin = time.time()
    driver.get(info["link"])
    time.sleep(1)
    try:      
        sales = get_sales(driver)
    except:
        sales = "Unknown"
        print("Time elapsed: {:.3f}s".format(time.time() - begin))
    info["sales"] = sales
    all_items_info[title] = info

In [104]:
#driver.close()
for key, values in all_items_info.items():
    print(key, values["price"])

전국운행 픽업 샌딩 콜벤 / 코로나19소독 방역콜밴 / 제이디 프리미엄 김포공항콜밴 40000


In [105]:
from collections import OrderedDict

ordered = OrderedDict(sorted(all_items_info.items(), key=lambda i: i[1]["sales"], reverse = True))

In [106]:
for key,values in ordered.items():
    print(key, values["sales"])

전국운행 픽업 샌딩 콜벤 / 코로나19소독 방역콜밴 / 제이디 프리미엄 김포공항콜밴 0


In [107]:
driver.close()