In [1]:
import pandas as pd
import numpy as np
import re
import requests
import pickle
from konlpy.tag import Okt
from bs4 import BeautifulSoup
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import load_model
from datetime import date

import warnings
warnings.filterwarnings('ignore')

In [9]:
def get_code(symbol):
    krx = pd.read_csv('./src/krx_code.csv')
    krx = krx.set_index('한글 종목약명')
    try:
        code = krx.at[symbol,'단축코드']
        return code
    except:
        print('종목명을 다시 확인해주세요.')
        return 0

def get_today():
    today = date.today().isoformat()
    return today

def get_comment(symbol):
    code = get_code(symbol)
    today = get_today()
    comment_list = []
    chk = 1
    i = 1
    while chk:  
        url = f'https://finance.naver.com/item/board.naver?code={code}&page={i}'
        headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.127 Safari/537.36 Edg/100.0.1185.50'}
        res = requests.get(url, headers = headers)
        bs = BeautifulSoup(res.text, 'html.parser')  
        for j in range(20):
            try:
                root = bs.find('div',{'class':'section inner_sub'}).find_all('tr',{'onmouseover':'mouseOver(this)'})[j].text.split('\n') 
                if today != root[1].split()[0].replace('.','-'):
                    chk = 0
                    break
                if len(root) == 14: # 답글
                    pass      
                elif len(root) == 13: # 기본
                    comment = root[3]
                    comment = re.sub('\[삭제된 게시물의 답글\]',' ',comment)
                    comment = re.sub('[^가-힣]',' ',comment)
                    comment = re.sub(' +',' ',comment)
                    comment = comment.strip()
                    if comment == '':
                        pass
                    else:
                        comment_list.append(comment)                 
                else: # 에러
                    pass
            except: # 에러
                pass
            print(f'\r{today} 댓글{len(comment_list)}개 크롤링중..',end='')
        i += 1
        if chk == 0:
            break   
    print(f'\r{today} 댓글{len(comment_list)}개 크롤링완료')
    return comment_list

def konlpy_okt(symbol):
    okt = Okt()
    tag_list = ['Noun','Verb','Adjective','VerbPrefix'] 
    comment_list = get_comment(symbol)
    print('분석 진행중..',end='')
    tokenized_data = []
    for i in range(len(comment_list)):
        tokenized_sentence = okt.pos(comment_list[i], stem=True) 
        tag_checked_sentence = []
        for j in tokenized_sentence:
            x,y = j
            if y in tag_list:
                tag_checked_sentence.append(x)
        if tag_checked_sentence == []:
            pass
        else:
            tokenized_data.append(tag_checked_sentence)     
    for i in tokenized_data:
        for j in range(len(i)):
            i[j] = "'"+i[j]+"'"
    return tokenized_data
    
def tokenize(symbol):
    with open('./src/mytokenizer1.pickle', 'rb') as handle:
        tokenizer = pickle.load(handle)   
    tokenized_data = konlpy_okt(symbol)
    test = tokenizer.texts_to_sequences(tokenized_data)
    test = pad_sequences(test, maxlen=15)
    return test

def feargreed_index(symbol):
    if get_code(symbol) == 0:
        return    
    model = load_model('./src/mymodel1.h5')
    test = tokenize(symbol)
    pred = model.predict(test)
    print(f'\r{symbol} 공포탐욕지수: {int(((pred.mean()-0.5)*2+0.5)*100)}%')

In [12]:
feargreed_index('카카오')

2022-05-18 댓글115개 크롤링완료.
카카오 공포탐욕지수: 36%
