In [10]:
import requests
import json
import pymongo
from bs4 import BeautifulSoup
from sqlalchemy import *
from sqlalchemy.orm import sessionmaker
from sqlalchemy.ext.declarative import declarative_base

In [11]:
mysql_client = create_engine("mysql://root:dss@52.78.139.87/world?charset=utf8")
base = declarative_base()
mysql_client

Engine(mysql://root:***@52.78.139.87/world?charset=utf8)

In [12]:
mongo_client = pymongo.MongoClient('mongodb://52.78.139.87:27017')
mongo_client

MongoClient('52.78.139.87', 27017)

In [13]:
class NaverKeyword(base):
    __tablename__ = "naver"

    id = Column(Integer, primary_key=True)
    rank = Column(Integer, nullable=False)
    keyword = Column(String(50), nullable=False)
    rdate = Column(TIMESTAMP, nullable=False)

    def __init__(self, rank, keyword):
        self.rank = rank
        self.keyword = keyword

    def __repr__(self):
        return "<NaverKeyword {}, {}>".format(self.rank, self.keyword)

In [14]:
def crawling():
    response = requests.get("https://www.naver.com/")
    dom = BeautifulSoup(response.content, "html.parser")
    keywords = dom.select(".ah_roll_area > .ah_l > .ah_item")
    datas = []
    for keyword in keywords:
        rank = keyword.select_one(".ah_r").text
        keyword = keyword.select_one(".ah_k").text
        datas.append((rank, keyword))
    return datas

In [15]:
datas = crawling()
datas

[('1', '2019 스타벅스 럭키백'),
 ('2', '스타벅스 럭키백'),
 ('3', '골목식당 고로케'),
 ('4', '양예원'),
 ('5', '복면가왕 황금돼지'),
 ('6', '카피타노 치약'),
 ('7', '정유안'),
 ('8', '김기방 부인'),
 ('9', '왜그래 풍상씨'),
 ('10', '지진'),
 ('11', '경주지진'),
 ('12', '심석희 성폭행'),
 ('13', '메이플'),
 ('14', '뽀로로 아쿠아빌리지'),
 ('15', '백승호'),
 ('16', '카카오 카풀'),
 ('17', '카풀'),
 ('18', '삼성출판사'),
 ('19', '롯데리아 핫크리스피버거'),
 ('20', '텔루스')]

In [16]:
def mysql_save(datas):
    
    keywords = [NaverKeyword(rank, keyword) for rank, keyword in datas]
    
    # make session
    maker = sessionmaker(bind=mysql_client)
    session = maker()

    # save datas
    session.add_all(keywords)
    session.commit()

    # close session
    session.close()

In [17]:
base.metadata.create_all(mysql_client)

In [18]:
mysql_save(datas)

In [19]:
def mongo_save(datas):
    querys = [{"rank":rank, "keyword":keyword} for rank, keyword in datas]
    mongo_client.crawling.naver_keywords.insert(querys)

In [20]:
mongo_save(datas)

In [21]:
def send_slack(msg, channel="#dss", username="provision_bot" ):
    webhook_URL = "https://hooks.slack.com/services/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"
    payload = {
        "channel": channel,
        "username": username,
        "icon_emoji": ":provision:",
        "text": msg,
    }
    response = requests.post(
        webhook_URL,
        data = json.dumps(payload),
    )
    return response

In [22]:
def run():
    # 데이터 베이스에 테이블 생성
    base.metadata.create_all(mysql_client)

    # 네이버 키워드 크롤링
    datas = crawling()

    # 데이터 베이스에 저장
    mysql_save(datas)
    mongo_save(datas)

    # 슬랙으로 메시지 전송
    send_slack("naver crawling done!")

In [23]:
run()