#종목관련 뉴스피드 제공

In [None]:
# 필요한 라이브러리 설치
!pip install Flask requests beautifulsoup4




In [None]:
#개발모드 사용경고(운영환경에서는 비활성화, 개발 서버 경고)
from flask import Flask, render_template
import requests
from bs4 import BeautifulSoup

app = Flask(__name__)

# 네이버 뉴스 검색 URL 생성 함수
def get_news_page_url(stock_symbol):
    return f'https://finance.naver.com/item/news_news.nhn?code={stock_symbol}&page=1'

# 네이버 뉴스 정보 가져오기 함수
def get_news_info(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # 예시로 뉴스 제목과 출처를 추출
    headlines = soup.select('.title')
    sources = soup.select('.info')

    news_info = []
    for headline, source in zip(headlines, sources):
        title = headline.get_text(strip=True)
        source = source.get_text(strip=True)
        news_info.append({'title': title, 'source': source})

    return news_info

# Flask 라우트
@app.route('/stock_news/<stock_symbol>')
def stock_news(stock_symbol):
    url = get_news_page_url(stock_symbol)
    news_info = get_news_info(url)
    return render_template('stock_news.html', news_info=news_info)

if __name__ == '__main__':
    app.run(debug=True)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with stat


#TextRank로 크롤링한 주식뉴스기사 요약 모델

In [None]:
!pip uninstall konlpy



Found existing installation: konlpy 0.6.0
Uninstalling konlpy-0.6.0:
  Would remove:
    /usr/local/lib/python3.10/dist-packages/konlpy-0.6.0.dist-info/*
    /usr/local/lib/python3.10/dist-packages/konlpy/*
Proceed (Y/n)? y
  Successfully uninstalled konlpy-0.6.0


In [None]:
!pip install konlpy



In [None]:
# MeCab 설치
!apt-get install -y openjdk-8-jdk
!bash <(curl -s https://raw.githubusercontent.com/konlpy/konlpy/master/scripts/mecab.sh)

# mecab-python 설치
!pip install mecab-python


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
openjdk-8-jdk is already the newest version (8u382-ga-1~22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 6 not upgraded.
mecab-ko is already installed
mecab-ko-dic is already installed
mecab-python is already installed
Done.


In [None]:
from konlpy.tag import Mecab

mecab = Mecab('/usr/local/lib/mecab/dic/mecab-ko-dic')
result = mecab.pos('주식 코드를 입력하세요: 005306')
print(result)


[('주식', 'NNG'), ('코드', 'NNG'), ('를', 'JKO'), ('입력', 'NNG'), ('하', 'XSV'), ('세요', 'EP+EF'), (':', 'SC'), ('005306', 'SN')]


In [None]:
import requests
from bs4 import BeautifulSoup
from konlpy.tag import Mecab
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import networkx as nx

In [None]:
!pip install requests beautifulsoup4 konlpy scikit-learn



In [None]:
# Install MeCab
!apt-get install -y curl
!bash <(curl -s https://raw.githubusercontent.com/konlpy/konlpy/master/scripts/mecab.sh)

# Install mecab-python
!pip install mecab-python

# Restart the runtime (kernel) after installation


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
curl is already the newest version (7.81.0-1ubuntu1.14).
0 upgraded, 0 newly installed, 0 to remove and 6 not upgraded.
mecab-ko is already installed
mecab-ko-dic is already installed
mecab-python is already installed
Done.


In [None]:
!sudo apt-get install curl
!sudo apt-get install -y mecab libmecab-dev mecab-ipadic mecab-ipadic-utf8


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
curl is already the newest version (7.81.0-1ubuntu1.14).
0 upgraded, 0 newly installed, 0 to remove and 6 not upgraded.
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
libmecab-dev is already the newest version (0.996-14build9).
mecab-ipadic is already the newest version (2.7.0-20070801+main-3).
mecab-ipadic-utf8 is already the newest version (2.7.0-20070801+main-3).
mecab is already the newest version (0.996-14build9).
0 upgraded, 0 newly installed, 0 to remove and 6 not upgraded.


In [None]:
!pip install mecab-python




In [None]:


def crawl_stock_news(stock_code):
    url = f'https://finance.naver.com/item/news_news.nhn?code={stock_code}&page=1&sm=title_entity_id.basic&clusterId='

    # Send an HTTP request
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    # Extract news elements
    news_elements = soup.select('.title')

    # Extract news text and links
    news_texts = []
    for element in news_elements:
        try:
            link = element.a['href']
            news_text = element.a.get_text(strip=True)
            news_texts.append({'text': news_text, 'link': link})
        except (KeyError, TypeError):
            # Handle cases where the 'href' attribute is not present
            pass

    return news_texts

def summarize_news(news_texts):
    # Extract text from news_texts
    documents = [news['text'] for news in news_texts]

    # Tokenize and vectorize the documents
    mecab = Mecab()
    vectorizer = TfidfVectorizer(tokenizer=mecab.morphs, stop_words=['은', '는', '이', '가', '을', '를'])
    X = vectorizer.fit_transform(documents)

    # Calculate cosine similarity matrix
    similarity_matrix = cosine_similarity(X, X)

    # TextRank algorithm
    scores = similarity_matrix.sum(axis=1)
    ranked_sentences = [(score, sentence) for score, sentence in zip(scores, documents)]
    ranked_sentences.sort(reverse=True)

    # Select top sentences for summary
    num_sentences = min(3, len(ranked_sentences))
    summary = [sentence for _, sentence in ranked_sentences[:num_sentences]]

    return summary

def main():
    # 주식 코드를 입력받아 해당 주식의 최신 뉴스를 크롤링
    stock_code = input("주식 코드를 입력하세요: ")
    news_texts = crawl_stock_news(stock_code)

    # TextRank로 요약하여 출력
    summary = summarize_news(news_texts)
    print("\n".join(summary))

if __name__ == "__main__":
    main()
#뉴스헤드라인 출력

주식 코드를 입력하세요: 005930
삼성 비스포크 가전, '2023 인간공학디자인상' 수상
삼성전자, 포터블 SSD 'T5 EVO' 출시…"업계 최대 8TB 용...
삼성 비스포크 가전, 인간공학디자인상 수상




#각 종목에 대한 주식코드 보여주는 방법

In [None]:
!pip install requests




In [None]:
#api키 : 'Y5bb740c89b964610652eccad1f7de49b6dc0d730'

In [None]:
import requests
from bs4 import BeautifulSoup

def get_stock_codes(api_key):
    url = f'https://opendart.fss.or.kr/api/corpCode.xml?crtfc_key={api_key}'

    # API 호출
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Failed to fetch data. Status code: {response.status_code}")
        return []

    # XML 파싱
    xml_data = response.text
    soup = BeautifulSoup(xml_data, 'html.parser')

    # Extract corp_name and stock_code
    corp_data = []
    rows = soup.find_all('list')
    for row in rows:
        corp_name = row.find('corp_name').text
        stock_code = row.find('stock_code').text
        corp_data.append({corp_name: stock_code})

    return corp_data

def find_stock_code(stock_name, corp_data):
    for data in corp_data:
        for corp_name, stock_code in data.items():
            if stock_name in corp_name:
                return stock_code
    return None

# API 키를 여기에 입력
api_key = 'Y5bb740c89b964610652eccad1f7de49b6dc0d730'

# 기업 코드 목록을 출력
corp_data = get_stock_codes(api_key)
for data in corp_data:
    for corp_name, stock_code in data.items():
        print(f'{corp_name}: {stock_code}')

# 종목명을 입력받아 종목 코드 찾기
user_stock_name = input("원하는 주식 종목을 입력하세요: ")

# 종목명이 포함되어 있는 기업명이 있는지 확인
stock_code = find_stock_code(user_stock_name, corp_data)

if stock_code:
    print(f'{user_stock_name}의 종목 코드는 {stock_code}입니다.')
else:
    print(f'{user_stock_name}에 대한 종목 코드를 찾을 수 없습니다.')


원하는 주식 종목을 입력하세요: 삼성전자
삼성전자에 대한 종목 코드를 찾을 수 없습니다.


In [None]:
import requests

# API 키를 여기에 입력
api_key = '5bb740c89b964610652eccad1f7de49b6dc0d730'

url = f'https://opendart.fss.or.kr/api/corpCode.xml?crtfc_key={api_key}'

# API 호출
response = requests.get(url)
if response.status_code != 200:
    print(f"Failed to fetch data. Status code: {response.status_code}")
else:
    print(response.text)
print(corp_data);

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [1]:
import pandas as pd

# 엑셀 파일에서 주식 종목과 종목 코드 읽어오기
def read_stock_data_excel(file_path):
    try:
        df = pd.read_excel(file_path, dtype=str, engine='openpyxl')  # dtype을 문자열로 설정하여 숫자로 인식되는 것을 방지
        stock_data = df.set_index('회사명')['종목코드'].to_dict()
        return df, stock_data
    except Exception as e:
        print(f"Error reading data from Excel file: {e}")
        return None, {}

# 엑셀 파일 경로 설정
excel_file_path = r"/content/상장법인목록 .xlsx"

# 주식 종목과 종목 코드 출력
stocks_excel_df, stocks_excel = read_stock_data_excel(excel_file_path)

# DataFrame 출력
print(stocks_excel_df)

# 사용자에게 종목명 입력받기
user_input_excel = input('원하는 주식 종목을 입력하세요: ')

# 입력된 종목명에 대한 종목 코드 찾기
if user_input_excel in stocks_excel:
    print(f'{user_input_excel} 종목의 종목 코드는 {stocks_excel[user_input_excel]}입니다.')
else:
    print(f'{user_input_excel}에 대한 종목 코드를 찾을 수 없습니다.')


          회사명    종목코드                   업종  \
0      두산로보틱스  454910        특수 목적용 기계 제조업   
1    STX그린로지스  465770               해상 운송업   
2         넥스틸  092790            1차 철강 제조업   
3    NICE평가정보  030190           기타 정보 서비스업   
4        조선내화  462520     내화, 비내화 요업제품 제조업   
..        ...     ...                  ...   
832      유한양행  000100              의약품 제조업   
833    CJ대한통운  000120            도로 화물 운송업   
834        경방  000050               종합 소매업   
835     유수홀딩스  000700  회사 본부 및 경영 컨설팅 서비스업   
836  한진중공업홀딩스  003480    연료용 가스 제조 및 배관공급업   

                                                  주요제품                  상장일  \
0                                                 협동로봇  2023-10-05 00:00:00   
1                                          해상운송업 및 물류업  2023-09-15 00:00:00   
2             OCTG Pipe, Line Pipe, Standard Pipe, 일반관  2023-08-21 00:00:00   
3                                       개인 및 기업정보, 솔루션  2023-08-08 00:00:00   
4                                              내화 요업

In [None]:
# DataFrame 출력
print(stocks_excel_d

None
