* 네이버증권 리서치 크롤링
* https://finance.naver.com/research/invest_list.naver

In [37]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://finance.naver.com/research/invest_list.naver"
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')

target_date = '25.07.28'
target_company = '키움증권'

# 모든 <tr>을 순회하면서 조건 확인
for row in soup.find_all('tr'):
    date_td = row.find('td', class_='date')
    if date_td and date_td.get_text(strip=True) == target_date:
        file_td = row.find('td', class_='file')
        if file_td and file_td.find('a'):
            pdf_url = file_td.find('a')['href']
            print(f"✔ 날짜: {target_date} → PDF 링크: {pdf_url}")

✔ 날짜: 25.07.28 → PDF 링크: https://stock.pstatic.net/stock-research/invest/66/20250728_invest_289585000.pdf
✔ 날짜: 25.07.28 → PDF 링크: https://stock.pstatic.net/stock-research/invest/57/20250728_invest_378226000.pdf
✔ 날짜: 25.07.28 → PDF 링크: https://stock.pstatic.net/stock-research/invest/57/20250728_invest_581354000.pdf
✔ 날짜: 25.07.28 → PDF 링크: https://stock.pstatic.net/stock-research/invest/57/20250728_invest_37676000.pdf
✔ 날짜: 25.07.28 → PDF 링크: https://stock.pstatic.net/stock-research/invest/57/20250728_invest_839751000.pdf
✔ 날짜: 25.07.28 → PDF 링크: https://stock.pstatic.net/stock-research/invest/57/20250728_invest_143603000.pdf
✔ 날짜: 25.07.28 → PDF 링크: https://stock.pstatic.net/stock-research/invest/39/20250728_invest_917031000.pdf
✔ 날짜: 25.07.28 → PDF 링크: https://stock.pstatic.net/stock-research/invest/2/20250728_invest_892827000.pdf
✔ 날짜: 25.07.28 → PDF 링크: https://stock.pstatic.net/stock-research/invest/2/20250728_invest_347864000.pdf
✔ 날짜: 25.07.28 → PDF 링크: https://stock.pstatic.ne

In [27]:
pdf_url

'https://stock.pstatic.net/stock-research/invest/39/20250728_invest_463167000.pdf'

In [31]:
### 다운로드 하기 샘플

import os

filename = pdf_url.split('/')[-1]
response = requests.get(pdf_url)
output = f"./reports/{filename}"

with open(output, 'wb') as f:
    f.write(response.content)

print(f"📄 파일 저장 완료: {filename}")

📄 파일 저장 완료: 20250728_invest_463167000.pdf


In [39]:
### 키움증권 문서만 다운로드하기 !!

import os
import shutil

# 기존 폴더 비우기
output_dir = './reports'
if os.path.exists(output_dir):
    shutil.rmtree(output_dir)  # 폴더 전체 삭제
os.makedirs(output_dir)         # 빈 폴더 다시 생성

# <tr> 순회하며 조건에 맞는 파일 다운로드
for row in soup.find_all('tr'):
    date_td = row.find('td', class_='date')
    company_td = row.find_all('td')

    # 조건: 날짜 일치 + "키움증권" 포함 + 파일 존재
    if (
        date_td and date_td.get_text(strip=True) == target_date and
        any(target_company in td.get_text(strip=True) for td in company_td)
    ):
        file_td = row.find('td', class_='file')
        if file_td and file_td.find('a'):
            pdf_url = file_td.find('a')['href']
            filename = pdf_url.split('/')[-1]
            output_path = os.path.join(output_dir, filename)

            pdf_response = requests.get(pdf_url)
            with open(output_path, 'wb') as f:
                f.write(pdf_response.content)

            print(f"✅ 저장 완료: {output_path}")

✅ 저장 완료: ./reports\20250728_invest_917031000.pdf
✅ 저장 완료: ./reports\20250728_invest_764144000.pdf
✅ 저장 완료: ./reports\20250728_invest_582328000.pdf
✅ 저장 완료: ./reports\20250728_invest_463167000.pdf


In [41]:
# ### 해당 날짜 모든 문서 다운로드하기 !!!
# import os
# import shutil

# # 기존 폴더 비우기
# output_dir = './reports'
# if os.path.exists(output_dir):
#     shutil.rmtree(output_dir)  # 폴더 전체 삭제
# os.makedirs(output_dir)         # 빈 폴더 다시 생성


# for row in soup.find_all('tr'):
#     date_td = row.find('td', class_='date')
#     if date_td and date_td.get_text(strip=True) == target_date:
#         file_td = row.find('td', class_='file')
#         if file_td and file_td.find('a'):
#             pdf_url = file_td.find('a')['href']
#             filename = pdf_url.split('/')[-1]
#             response = requests.get(pdf_url)
#             output = f"./reports/{filename}"
#             with open(output, 'wb') as f:
#                 f.write(response.content)
            
#             print(f"📄 파일 저장 완료: {filename}")

In [1]:
# 패키지 설치
!pip install yfinance --quiet

In [1]:
# 패키지 임포트
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import yfinance as yf
from scipy.optimize import minimize
sns.set()

In [9]:
def price_etf(tickers):
    price_data = pd.DataFrame()

    for ticker in tickers:
        try:
            data = yf.download(ticker, start='2007-05-01', end='2025-06-30', progress=False)
            
            # 빈 DataFrame이면 스킵
            if data.empty:
                print(f"[EMPTY] {ticker} returned no data.")
                continue
            
            # Adj Close 우선, 없으면 Close 사용
            col = 'Adj Close' if 'Adj Close' in data.columns else 'Close'
            adj_close = data[[col]].rename(columns={col: ticker})
            
            price_data = pd.concat([price_data, adj_close], axis=1)
        
        except Exception as e:
            print(f"[ERROR] Failed to download {ticker}: {e}")
    
    price_data.index = pd.to_datetime(price_data.index)
    price_data = price_data.resample('W').last()

    return price_data

tickers = ['SPY', 'FEZ', 'EWJ', 'EEM', 'IEF', 'TLT', 'TIP', 'GLD', 'DBC', 'DBA', 'IYR']

df = price_etf(tickers)
print(df.tail())

  data = yf.download(ticker, start='2007-05-01', end='2025-06-30', progress=False)
  data = yf.download(ticker, start='2007-05-01', end='2025-06-30', progress=False)
  data = yf.download(ticker, start='2007-05-01', end='2025-06-30', progress=False)
  data = yf.download(ticker, start='2007-05-01', end='2025-06-30', progress=False)
  data = yf.download(ticker, start='2007-05-01', end='2025-06-30', progress=False)
  data = yf.download(ticker, start='2007-05-01', end='2025-06-30', progress=False)
  data = yf.download(ticker, start='2007-05-01', end='2025-06-30', progress=False)
  data = yf.download(ticker, start='2007-05-01', end='2025-06-30', progress=False)
  data = yf.download(ticker, start='2007-05-01', end='2025-06-30', progress=False)
  data = yf.download(ticker, start='2007-05-01', end='2025-06-30', progress=False)
  data = yf.download(ticker, start='2007-05-01', end='2025-06-30', progress=False)


Price              SPY        FEZ        EWJ        EEM        IEF        TLT  \
Ticker             SPY        FEZ        EWJ        EEM        IEF        TLT   
Date                                                                            
2025-06-01  587.652771  58.107380  73.524826  45.083256  93.961777  85.640511   
2025-06-08  597.374023  59.040005  73.227234  46.469822  93.214157  85.032776   
2025-06-15  595.240295  58.117199  73.009003  46.658001  93.991684  86.009140   
2025-06-22  594.280029  57.430000  71.760002  46.330002  94.300705  86.168541   
2025-06-29  614.909973  59.490002  75.279999  48.090000  95.088203  87.065193   

Price              TIP         GLD        DBC        DBA        IYR  
Ticker             TIP         GLD        DBC        DBA        IYR  
Date                                                                 
2025-06-01  108.640533  303.600006  20.879999  26.700001  94.047707  
2025-06-08  107.696800  305.179993  21.650000  27.480000  94.465523  
2

In [3]:
# ETF 가격 데이터 호출 함수 
### 예전 방식 ############
# def price_etf(tickers):
#     etf = yf.Tickers(tickers)
#     data = etf.history(start='2007-05-01', end='2025-06-30', actions=False)
#     data.drop(['Open', 'High', 'Low', 'Volume'], inplace=True, axis=1)
#     data = data.droplevel(0, axis=1).resample('W').last()

#     return data

In [15]:
df = df.droplevel(0, axis=1)
df

Ticker,SPY,FEZ,EWJ,EEM,IEF,TLT,TIP,GLD,DBC,DBA,IYR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2007-05-06,107.141235,32.910442,43.001335,28.377594,53.248665,51.145237,59.478397,68.190002,21.399391,22.067789,44.038940
2007-05-13,107.098648,32.699287,43.329144,28.655167,53.145996,50.908550,59.342579,66.449997,21.281860,21.826515,44.347450
2007-05-20,108.348114,33.213593,42.464943,28.859926,52.735329,50.181248,58.852467,65.519997,21.349026,21.438757,41.756016
2007-05-27,107.687836,33.051178,42.554348,28.609661,52.568462,49.857929,58.917404,64.940002,21.491743,22.403849,41.766300
2007-06-03,109.384590,33.641281,43.746346,29.606167,52.265316,49.601776,58.631321,66.440002,21.575695,22.739902,44.002975
...,...,...,...,...,...,...,...,...,...,...,...
2025-06-01,587.652771,58.107380,73.524826,45.083256,93.961777,85.640511,108.640533,303.600006,20.879999,26.700001,94.047707
2025-06-08,597.374023,59.040005,73.227234,46.469822,93.214157,85.032776,107.696800,305.179993,21.650000,27.480000,94.465523
2025-06-15,595.240295,58.117199,73.009003,46.658001,93.991684,86.009140,108.224968,316.290009,22.650000,26.959999,94.564995
2025-06-22,594.280029,57.430000,71.760002,46.330002,94.300705,86.168541,108.912590,310.130005,23.260000,26.410000,94.550003


In [17]:
# 주간 수익률 데이터
rets = df.pct_change().fillna(0)
rets

Ticker,SPY,FEZ,EWJ,EEM,IEF,TLT,TIP,GLD,DBC,DBA,IYR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2007-05-06,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2007-05-13,-0.000397,-0.006416,0.007623,0.009781,-0.001928,-0.004628,-0.002283,-0.025517,-0.005492,-0.010933,0.007005
2007-05-20,0.011666,0.015728,-0.019945,0.007146,-0.007727,-0.014286,-0.008259,-0.013995,0.003156,-0.017765,-0.058435
2007-05-27,-0.006094,-0.004890,0.002105,-0.008672,-0.003164,-0.006443,0.001103,-0.008852,0.006685,0.045016,0.000246
2007-06-03,0.015756,0.017854,0.028011,0.034831,-0.005767,-0.005138,-0.004856,0.023098,0.003906,0.015000,0.053552
...,...,...,...,...,...,...,...,...,...,...,...
2025-06-01,0.017751,0.006633,0.014925,-0.015358,0.008531,0.020461,0.006724,-0.019855,-0.024755,-0.016212,0.025268
2025-06-08,0.016543,0.016050,-0.004048,0.030756,-0.007957,-0.007096,-0.008687,0.005204,0.036877,0.029213,0.004443
2025-06-15,-0.003572,-0.015630,-0.002980,0.004049,0.008341,0.011482,0.004904,0.036405,0.046189,-0.018923,0.001053
2025-06-22,-0.001613,-0.011824,-0.017107,-0.007030,0.003288,0.001853,0.006354,-0.019476,0.026932,-0.020401,-0.000159


In [6]:
#  **연환산 수익률(annualized return)** -> 단순 단리방식
rets*52

Ticker,DBA,DBC,EEM,EWJ,FEZ,GLD,IEF,IYR,SPY,TIP,TLT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2007-05-06,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2007-05-13,-0.568529,-0.285601,0.508618,0.396395,-0.333607,-1.326885,-0.100267,0.364272,-0.020670,-0.118735,-0.240630
2007-05-20,-0.923807,0.164102,0.371575,-1.037139,0.817878,-0.727766,-0.401835,-3.038609,0.606653,-0.429454,-0.742943
2007-05-27,2.340838,0.347621,-0.450924,0.109473,-0.254278,-0.460313,-0.164518,0.012809,-0.316863,0.057390,-0.334981
2007-06-03,0.779999,0.203126,1.811212,1.456585,0.928419,1.201109,-0.507810,2.784684,0.819301,-0.484666,-0.469611
...,...,...,...,...,...,...,...,...,...,...,...
2023-10-08,0.412891,-2.166665,-0.164425,-0.784873,0.074270,-0.530767,-0.862974,-0.911699,0.250585,-0.802159,-2.286617
2023-10-15,0.313251,2.021740,0.041235,0.175144,-0.778703,2.797644,0.675474,0.914424,0.237278,0.773990,1.729449
2023-10-22,0.502996,0.585913,-1.469624,-1.265526,-0.953666,1.384106,-1.008766,-2.243217,-1.242458,-0.401391,-2.593769
2023-10-29,0.308392,-0.268999,-0.367494,-0.277311,-0.434612,0.725093,0.354533,-0.786085,-1.297565,0.192146,0.705914


In [19]:
#  **연환산 수익률(annualized return)** -> 더 정확한 복리 방식
(1 + rets)**52 - 1

Ticker,SPY,FEZ,EWJ,EEM,IEF,TLT,TIP,GLD,DBC,DBA,IYR
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2007-05-06,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
2007-05-13,-0.020461,-0.284454,0.484241,0.658911,-0.095486,-0.214316,-0.112083,-0.739228,-0.249026,-0.435414,0.437650
2007-05-20,0.827861,1.251300,-0.649230,0.448100,-0.331937,-0.526808,-0.350306,-0.519490,0.178041,-0.606277,-0.956326
2007-05-27,-0.272295,-0.225009,0.115569,-0.364216,-0.151937,-0.285463,0.059021,-0.370206,0.414055,8.871827,0.012888
2007-06-03,1.254518,1.509865,3.206177,4.932153,-0.259727,-0.234975,-0.223617,2.278700,0.224740,1.168854,14.070026
...,...,...,...,...,...,...,...,...,...,...,...
2025-06-01,1.496724,0.410238,1.160605,-0.552827,0.555436,1.866932,0.416938,-0.647545,-0.728407,-0.572560,2.660607
2025-06-08,1.347082,1.288678,-0.190143,3.831681,-0.339924,-0.309490,-0.364717,0.309853,5.573939,3.469750,0.259234
2025-06-15,-0.169783,-0.559211,-0.143757,0.233861,0.540256,0.810627,0.289682,5.419919,9.464920,-0.629691,0.056252
2025-06-22,-0.080529,-0.461265,-0.592326,-0.307082,0.186114,0.101070,0.390056,-0.640390,2.982528,-0.657608,-0.008211


In [21]:
cols = rets.columns
cols

Index(['SPY', 'FEZ', 'EWJ', 'EEM', 'IEF', 'TLT', 'TIP', 'GLD', 'DBC', 'DBA',
       'IYR'],
      dtype='object', name='Ticker')