In [1]:
import datetime as dt
import pandas as pd
import bs4
from urllib.request import urlopen

def date_format(d):
    d = str(d).replace('-', '.')
    
    yyyy = int(d.split('.')[0]) 
    mm = int(d.split('.')[1])
    dd = int(d.split('.')[2])

    this_date= dt.date(yyyy, mm, dd)
    return this_date

def historical_stock_naver(stock_cd, start_date='', end_date='', page_n=1, last_page=0):
    
    if start_date:   # start_date가 있으면
        start_date = date_format(start_date)   # date 포맷으로 변환
    else:    # 없으면
        start_date = dt.date.today()   # 오늘 날짜를 지정
    if end_date:   # end_date가 없으면 
        end_date = date_format(end_date)   # date 포맷으로 변환
    else:   # end_date가 있으면
        end_date = dt.date.today()   # 오늘 날짜를 end_date로 지정
        
    naver_stock = 'http://finance.naver.com/item/sise_day.nhn?code=' + stock_cd + '&page=' + str(page_n)
    
    source = urlopen(naver_stock).read()
    source = bs4.BeautifulSoup(source, 'lxml')
    
    dates = source.find_all('span', class_='tah p10 gray03')   # 날짜 수집   
    prices = source.find_all('td', class_='num')   # 종가 수집
    
    for n in range(len(dates)):
    
        if len(dates) > 0:
            
            # 날짜 처리
            this_date = dates[n].text
            this_date = date_format(this_date)
            
            if this_date <= end_date and this_date >= start_date:   
            # start_date와 end_date 사이에서 데이터 저장
                # 종가 처리
                this_close = prices[n*6].text
                this_close = this_close.replace(',', '')
                this_close = float(this_close)

                # 딕셔너리에 저장
                historical_prices[this_date] = this_close
                              
            elif this_date < start_date:   
            # start_date 이전이면 함수 종료
                return historical_prices              
            
    # 페이지 네비게이션
    if last_page == 0:
        last_page = source.find_all('table')[1].find('td', class_='pgRR').find('a')['href']
        last_page = last_page.split('&')[1]
        last_page = last_page.split('=')[1]
        last_page = float(last_page)
        
    # 다음 페이지 호출
    if page_n < last_page:
        page_n = page_n + 1
        historical_stock_naver(stock_cd, start_date, end_date, page_n, last_page)   
        
    return historical_prices 

In [2]:
'''
한국거래소 시가총액 상위 10종목 (2019년 5월 18일 현재)
005930	삼성전자
000660	SK하이닉스
005935	삼성전자우
005380	현대차
068270	셀트리온
051910	LG화학
055550	신한지주
017670	SK텔레콤
005490	POSCO
012330	현대모비스
'''
k10_component = ['005930', '000660', '005935', '005380', '068270', 
                 '051910', '055550', '017670', '005490', '012330']

In [16]:
k10_historical_prices = dict()

for stock_cd in k10_component:
    
    historical_prices = dict()
    start_date = '2019-1-1'
    end_date = '2019-5-1'
    historical_stock_naver(stock_cd, start_date, end_date)
    
    k10_historical_prices[stock_cd] = historical_prices

In [17]:
k10_historical_price = pd.DataFrame(k10_historical_prices)
k10_historical_price.sort_index(axis=1, inplace=True)

In [18]:
k10_historical_price

Unnamed: 0,000660,005380,005490,005930,005935,012330,017670,051910,055550,068270
2019-01-02,60600.0,114000.0,237000.0,38750.0,31600.0,185000.0,272500.0,337000.0,39400.0,214500.0
2019-01-03,57700.0,116500.0,237000.0,37600.0,30800.0,192000.0,275500.0,328000.0,39100.0,210500.0
2019-01-04,58300.0,119500.0,239000.0,37450.0,30450.0,199500.0,279500.0,330500.0,39000.0,220000.0
2019-01-07,58700.0,120500.0,246000.0,38750.0,32100.0,198500.0,276500.0,343000.0,38750.0,216000.0
2019-01-08,59200.0,119500.0,245500.0,38100.0,30900.0,193000.0,276000.0,349500.0,38500.0,213500.0
2019-01-09,63600.0,123000.0,255500.0,39600.0,32500.0,202500.0,269500.0,352000.0,39350.0,214000.0
2019-01-10,65300.0,123000.0,258500.0,39800.0,32450.0,196000.0,271000.0,347000.0,38350.0,216500.0
2019-01-11,65100.0,123000.0,255500.0,40500.0,33100.0,196500.0,263500.0,349000.0,38450.0,216500.0
2019-01-14,62100.0,121500.0,250000.0,40050.0,33050.0,195000.0,267000.0,350500.0,38350.0,211000.0
2019-01-15,64000.0,127500.0,256500.0,41100.0,33850.0,202000.0,268500.0,356000.0,39700.0,207500.0


In [19]:
k10_historical_price.to_excel('k10_historical_price.xlsx')