In [2]:
import urllib.request
import requests
import json
from bs4 import BeautifulSoup
from google.cloud import language

# configurations
keywords = ['신한은행','신한금융지주','신한','shinhan','shinhan bank']
analizer_url = 'https://language.googleapis.com/v1beta2/documents:analyzeSentiment'
api_key = 'AIzaSyAGmt9zzbdr1JMbHn7aBn9xSJjC5oF2Qnk'


def make_naver_next_page_url(url, page_no):
    kNaverItemsPerPage = 30
    next_page = url + '&start=%s&mode=normal&aq=0&prank=%s&sm=tab_jum&ssc=tab.view.view&_callback=viewMoreContents'\
                % (page_no * kNaverItemsPerPage + 1, page_no * kNaverItemsPerPage + 1)
    return next_page

def make_naver_soup(keyword, page_no):
    url = 'https://search.naver.com/search.naver?where=view&sm=tab_jum&query=%s' % urllib.parse.quote_plus(keyword)
    page_url = make_naver_next_page_url(url, page_no)
    sourcecode = urllib.request.urlopen(page_url).read()
    return BeautifulSoup(sourcecode, "html.parser")

def make_naver_search_contents(keyword, list_to_append):
    page_no = 0
    print("searching naver contents with keyword[%s]" % keyword)

    # parse pages until there is no search result
    while True:
        soup = make_naver_soup(keyword, page_no)
        items = soup.find_all("li", "bx _svp_item")

        if len(items) == 0:
            break

        page_no = page_no + 1
        print("scrapped %s items in page %s from naver..." % (len(items), page_no))

        for item in items:
            desc_div = item.find_all("div", "api_txt_lines dsc_txt")

            if len(desc_div)==0:
                continue

            content = desc_div[0].text.replace('...','')
            if len(content) > 0:
                list_to_append.append(content)

    print("naver search with keyword[%s] is done. total-items=%s" % (keyword, len(list_to_append)))

def make_daum_soup(keyword, page_no):
    url = 'https://search.daum.net/search?w=cafe&DA=PGD&enc=utf8&ASearchType=1&lpp=10&rlang=0&q=%s&p=%s' \
          % (urllib.parse.quote_plus(keyword), page_no)
    req = urllib.request.Request(
        url,
        data=None,
        headers={
            'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36'
        }
    )

    sourcecode =  urllib.request.urlopen(req).read()
    return BeautifulSoup(sourcecode, "html.parser")

def make_daum_search_contents(keyword, list_to_append):
    page_no = 0
    print("searching daum contents with keyword[%s]" % keyword)

    last_string = None

    while True:
        soup = make_daum_soup(keyword, page_no+1)
        result = soup.find("ul", attrs={'id':"cafeResultUL"})

        if result == None:
            break

        items = result.find_all("li")
        if items == None or len(items) == 0:
            break

        page_no = page_no + 1
        print("scrapped %s items in page %s from daum.." % (len(items), page_no))

        # check last page
        cur_last_string = items[len(items)-1].find("div", attrs={'class':'cont_inner'}).find("p", attrs={'class':'f_eb desc'}).text

        if cur_last_string == last_string:
            break

        last_string = cur_last_string

        # cumulate contents to the list
        for item in items:
            inner = item.find("div", attrs={'class':'cont_inner'})
            cont = inner.find("p", attrs={'class':'f_eb desc'})
            content = cont.text.replace('...', '')
            if len(content) > 0:
                list_to_append.append(content)

    print("daum search with keyword[%s] is done. total-items=%s" % (keyword, len(list_to_append)))

def anaylze_content(content, score_list):
    analyzer = '%s?key=%s' % (analizer_url, api_key)
    req_body = {'document':{'type':'PLAIN_TEXT','content':content},
                'encodingType':'UTF8'}
    print('making natural language analysis request..')
    rsp = requests.post(analyzer, json=req_body)
    print('analysis completed. status-code=%d' % rsp.status_code)

    if rsp.status_code != 200:
        print('analyze content api failed - %d, content=%s' % (rsp.status_code, content))
    else:
        sentences = rsp.json()['sentences']
        for sentence in sentences:
            score = sentence['sentiment']['score']
            score_list.append(score)

# initialize search result list
contents = list([])

# do search with keywords
for keyword in keywords:
    make_naver_search_contents(keyword, contents)
    make_daum_search_contents(keyword, contents)

# remove duplicates
contents = list(set(contents))

# merge contents to make only a single analysis request
merged_contents = ''
for content in contents:
    merged_contents = merged_contents + '.\n' + content

# request analysis
scores = list([])
anaylze_content(merged_contents, scores)

# make request
cnt_positives = 0
cnt_negatives = 0
for score in scores:
    # positive sentiment
    if score > 0.0 :
        cnt_positives = cnt_positives + 1
    # negative sentiment
    elif score < 0.0:
        cnt_negatives = cnt_negatives + 1
    # discard neutral sentiment


print('positives : %d   negatives : %d' % (cnt_positives, cnt_negatives))
if cnt_positives / (cnt_positives + cnt_negatives) >= 0.7:
    print('우대조건2 : 우대이자율 최대 연 1%')
elif cnt_positives / (cnt_positives + cnt_negatives) >= 0.5 and cnt_negatives / (cnt_positives + cnt_negatives) < 0.05:
    print('우대조건4 : 우대이자율 최대 연 0.5%')
elif cnt_positives > 1000000:
    print('우대조건1 : 우대이자율 최대 연 2%')
elif cnt_positives > 500000:
    print('우대조건3 : 우대이자율 최대 연 0.5%')


searching naver contents with keyword[신한은행]
scrapped 30 items in page 1 from naver...
scrapped 30 items in page 2 from naver...
scrapped 30 items in page 3 from naver...
scrapped 30 items in page 4 from naver...
scrapped 23 items in page 5 from naver...
naver search with keyword[신한은행] is done. total-items=143
searching daum contents with keyword[신한은행]
scrapped 10 items in page 1 from daum..
scrapped 10 items in page 2 from daum..
scrapped 10 items in page 3 from daum..
scrapped 10 items in page 4 from daum..
scrapped 10 items in page 5 from daum..
scrapped 10 items in page 6 from daum..
scrapped 10 items in page 7 from daum..
scrapped 10 items in page 8 from daum..
scrapped 10 items in page 9 from daum..
scrapped 10 items in page 10 from daum..
scrapped 10 items in page 11 from daum..
scrapped 10 items in page 12 from daum..
scrapped 10 items in page 13 from daum..
scrapped 10 items in page 14 from daum..
scrapped 10 items in page 15 from daum..
scrapped 10 items in page 16 from daum..