In [None]:
!pip install reportlab
!pip install Django
!pip install pyRXP
!pip install "reportlab[accel]"
!pip install "reportlab[renderpm]"
!pip install "reportlab[pycairo]"
!pip install Preppy
!pip install reportlab.lib
!pip install svglib
!pip install yfinance

In [60]:
from reportlab.lib.pagesizes import A4
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, PageBreak, Image
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.enums import TA_CENTER
from reportlab.pdfbase.ttfonts import TTFont
from reportlab.pdfbase import pdfmetrics
from io import BytesIO
from svglib.svglib import svg2rlg
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import datetime
import yfinance as yf
import seaborn as sns
import os

In [64]:
PATH = './data/'
REPORT_PATH = './report/'

# PDF 문서 생성
doc = SimpleDocTemplate(REPORT_PATH + 'Daily_Report_test.pdf', pagesize=A4, rightMargin=72, leftMargin=72, bottomMargin=18)
Elements = []

# 한글 폰트 등록
pdfmetrics.registerFont(TTFont("HCR Bating", "HANBatang.ttf"))
pdfmetrics.registerFont(TTFont("HCR Bating Bold", "HANBatangB.ttf"))

# 스타일 정의
style_normal = ParagraphStyle(
    name='Normal',
    fontSize=10,
    leading=12,
    fontName='HCR Bating',
)
style_heading = ParagraphStyle(
    name='Heading',
    fontSize=16,
    leading=18,
    fontName='HCR Bating Bold',
)

# 제목 페이지 추가
title_text = "Nasdaq 뉴스 데이터 데일리 레포트"
date_text = "-2023.08.01-"

title_style = ParagraphStyle(
    name='Heading',
    fontSize=16,
    leading=18,
    fontName='HCR Bating Bold',
    alignment=TA_CENTER
)

date_style = ParagraphStyle(
    name='Heading',
    fontSize=16,
    leading=18,
    fontName='HCR Bating Bold',
    alignment=TA_CENTER
)

# Paragraph 객체 생성 및 스타일 적용
title_paragraph = Paragraph(title_text, title_style)
date_paragraph = Paragraph(date_text, date_style)

# Element에 추가
Elements.append(title_paragraph)
Elements.append(Spacer(1, 36))
Elements.append(date_paragraph)
Elements.append(PageBreak())


df_summary = pd.read_csv(PATH + 'news_summary.csv')
intv = datetime.timedelta(days=30)
date = str(df_summary.rgs_dt[0])
date = datetime.date.fromisoformat(date)


In [211]:
def draw_graph(ticker, date, interval = '1m', show=False, save=True):
    '''
    interval(string) : '1d', '1w', '1m', '3m'
    '''
    data = yf.Ticker(ticker)
    if interval == '1d':
        intv = datetime.timedelta(days=1)
    elif interval == '1w':
        intv = datetime.timedelta(days=7)
    elif interval == '1m':
        intv = datetime.timedelta(days=30)
    elif interval == '3m':
        intv = datetime.timedelta(days=92)
    else:
        raise ValueError

    if interval == '1d':
        history = data.history(start = date - intv, end = date, interval="1h")
        history.index = list(map(lambda x:str(x).split()[1][:5], list(history.index)))
    else:
        history = data.history(start = date - intv, end = date, interval="1h")
        history.index = list(map(lambda x:str(x).split()[0], list(history.index)))
    
    plt.figure(figsize=(10,8))
    plt.subplots_adjust(top=0.95, bottom=0.25)
    history.Open.plot()
    plt.xticks(rotation=45)
    plt.title(f"{ticker} Chart")
    image_filename = REPORT_PATH + f"{ticker}_chart.png"
    if save:
        plt.savefig(image_filename)
    if not show:
        plt.close()
    
    return image_filename


def stock_price(ticker, date):
    data = yf.Ticker(ticker)
    history = data.history(start= date-datetime.timedelta(days=1), end = date, interval="1d")
    if not history.empty:  # 데이터가 비어있지 않은지 확인
        stock = history.iloc[0].Close
        return str(round(stock,2))
    else:
        return "데이터 없음"  # 데이터가 없는 경우 메시지 반환

In [174]:
for index, row in df_summary.iloc[:10].iterrows():
    summary_text = row['summary']
    stock_codes = row['rld_ose_iem_tck_cd']
    media = row['mdi_ifo']
    art_title = row['til_ifo']
    field = row['ctgy_cfc_ifo']

    # 기사 출처 및 산업 분야
    Elements.append(Paragraph(f"기사 출처: {media} / 산업 분야: {field}", style_normal))
    Elements.append(Spacer(1, 12))

    # 기사 내용 - 키워드
    Elements.append(Paragraph(f"{index}. {art_title}", style_heading))
    Elements.append(Spacer(1, 12))

    # 기사 내용 - 본문
    Elements.append(Paragraph(f"{summary_text}", style_normal))
    Elements.append(Spacer(1, 12))

    # 기사 관련주
    stock_codes = stock_codes.split(',')  # 주식 코드를 쉼표로 분리
    
    # 중복 제거
    unique_stock_codes = list(set(stock_codes))

    for sc in unique_stock_codes:
        Elements.append(Paragraph(f"기사와 관련된 주식: {sc}", style_normal))
        Elements.append(Spacer(1, 12))

        price = stock_price(sc, date)
        Elements.append(Paragraph(f"{sc} 전일 종가: {price}", style_normal))
        Elements.append(Spacer(1, 12))
        
        chart_image = draw_graph(sc, date)  # 이미지 파일 경로를 반환
        if os.path.exists(chart_image):
            img = Image(chart_image, width=300, height=170)
            Elements.append(img)
        else:
            Elements.append(Paragraph("차트를 찾을 수 없습니다.", style_normal))
        Elements.append(Spacer(1, 12))
        
    Elements.append(PageBreak())

# 문서를 생성하고 저장
doc.build(Elements)

_: No timezone found, symbol may be delisted
_: No timezone found, symbol may be delisted


In [216]:
dfa = pd.read_csv(PATH + 'news_summary_keyword.csv')

In [220]:
t = dfa.keywords[10]

In [227]:
dfa['test']=None

In [236]:
dfa.loc[0]['summary']

'The Indonesia stock market has risen by more than 35 points in consecutive trading days, reaching a plateau of 6,930 points. The global forecast for Asian markets is cautiously optimistic, with mixed performances from European and U.S. markets. The JCI finished modestly higher on Monday, with gains from cement companies and mixed performances from financial and resource shares. Crude oil prices also climbed higher on Monday, with WTI crude futures gaining 16 percent in July. Indonesia will release June numbers for consumer prices later today, with forecasts suggesting an annual increase of 3.11 percent for overall inflation and 2.50 percent for core CPI.'

In [233]:
for i in range(10):
    dfa['test'].loc[i] = dfa.loc[i]['summary'].__len__()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dfa['test'].loc[i] = dfa.loc[i]['summary'].__len__()


In [235]:
dfa.head(5)

Unnamed: 0,rgs_dt,tck_iem_cd,til_ifo,ctgy_cfc_ifo,mdi_ifo,news_smy_ifo,rld_ose_iem_tck_cd,url_ifo,article,summary,keywords,test
0,20230801,Latest,Steady Start Seen For Indonesia Stock Market,Markets,RTTNews,(RTTNews) - The Indonesia stock market has mov...,_,https://www.nasdaq.com/articles/steady-start-s...,"July 31, 2023 — 09:34 pm EDT Written by ...",The Indonesia stock market has risen by more t...,1. Indonesia stock market\r\n2. rise\r\n3. con...,662
1,20230801,ARES,Ares Management (ARES) Q2 Earnings: Taking a L...,Technology,Zacks,"For the quarter ended June 2023, Ares Manageme...",ARES,https://www.nasdaq.com/articles/ares-managemen...,"August 01, 2023 — 09:30 am EDT Written by...",Ares Management reported revenue of $792.1 mil...,1. Ares Management\r\n2. revenue\r\n3. quarter...,568
2,20230801,EW,RBC Capital Reiterates Edwards Lifesciences (E...,Stocks,Fintel,"Fintel reports that on August 1, 2023, RBC Cap...",EW,https://www.nasdaq.com/articles/rbc-capital-re...,"August 01, 2023 — 07:30 pm EDT Written by...",RBC Capital reiterated its Outperform recommen...,1. RBC Capital\r\n2. Edwards Lifesciences\r\n3...,612
3,20230801,LMAT,"Noteworthy Tuesday Option Activity: LMAT, MSTR...","Markets,Stocks,Options",BNK Invest,Looking at options trading activity among comp...,"LMAT,LMAT,MSTR,TPC",https://www.nasdaq.com/articles/noteworthy-tue...,"August 01, 2023 — 03:19 pm EDT Written by...",BNK Invest analyzed options trading activity a...,1. BNK Invest\r\n2. Russell 3000 index\r\n3. L...,666
4,20230801,EW,Notable Two Hundred Day Moving Average Cross - EW,"Markets,Stocks",BNK Invest,"In trading on Tuesday, shares of Edwards Lifes...",EW,https://www.nasdaq.com/articles/notable-two-hu...,"August 01, 2023 — 11:07 am EDT Written by...","On August 1, 2023, at 11:07 am EDT, shares of ...",1. Edwards Lifesciences Corp\r\n2. Symbol: EW\...,500
