# 네이버 금융 개별종목 수집

## 📍미리보기

In [35]:
import pandas as pd
import numpy as np
import requests
import time

def get_finance_information(code, name):
    
    page_no = 1
    item_list = []
    prev_day = None
    
    while True:
        # 수집할 url 가져오기
        url = f"https://finance.naver.com/item/sise_day.naver?code={code}&page={page_no}"
        
        # 데이터 저장
        table = pd.read_html(requests.get(url, headers={'User-agent': 'Mozilla/5.0'}).text)
        df = table[0].dropna()
        
        # 반복문 종료 시점 만들기
        last_day = df.iloc[-1]["날짜"]
        
        if last_day == prev_day:
            break
            
        prev_day = last_day
        item_list.append(df)
        page_no = page_no + 1
        time.sleep(0.1)
    
    # 데이터 컬럼 생성    
    df_tot = pd.concat(item_list)
    df_tot["종목코드"] = code
    df_tot["종목명"] = name
    
    # 데이터 컬럼 순서 정리
    cols = ['종목코드', '종목명', '날짜', '종가', '전일비', '시가', '고가', '저가', '거래량']
    df_tot = df_tot[cols]
    df_tot.drop_duplicates()
    
    return df_tot

In [36]:
get_finance_information('129920', '대성하이텍')

Unnamed: 0,종목코드,종목명,날짜,종가,전일비,시가,고가,저가,거래량
1,129920,대성하이텍,2022.09.30,9120.0,880.0,9800.0,9800.0,9070.0,805620.0
2,129920,대성하이텍,2022.09.29,10000.0,400.0,10850.0,11300.0,10000.0,2220094.0
3,129920,대성하이텍,2022.09.28,10400.0,700.0,10900.0,11000.0,10250.0,790762.0
4,129920,대성하이텍,2022.09.27,11100.0,150.0,11200.0,11500.0,10500.0,584862.0
5,129920,대성하이텍,2022.09.26,11250.0,600.0,11700.0,12050.0,11200.0,646782.0
9,129920,대성하이텍,2022.09.23,11850.0,150.0,12000.0,12350.0,11700.0,654339.0
10,129920,대성하이텍,2022.09.22,12000.0,50.0,11550.0,12200.0,11500.0,808001.0
11,129920,대성하이텍,2022.09.21,11950.0,150.0,12400.0,13100.0,11800.0,1616168.0
12,129920,대성하이텍,2022.09.20,12100.0,450.0,11750.0,12900.0,11750.0,2060076.0
13,129920,대성하이텍,2022.09.19,11650.0,2100.0,13750.0,14000.0,11500.0,1821469.0


---

## 1. 라이브러리 로드

In [20]:
import pandas as pd
import numpy as np
import requests
import time

## 2. 데이터 로드

In [12]:
code = '005930'
page_no = 1

url = f"https://finance.naver.com/item/sise_day.naver?code={code}&page={page_no}"
print(url)

https://finance.naver.com/item/sise_day.naver?code=005930&page=1


In [13]:
# pd.read_html(url, encoding="cp949") : ValueError: No tables found

## 3. DataFrame으로 변환

In [14]:
table = pd.read_html(requests.get(url, headers={'User-agent': 'Mozilla/5.0'}).text)

In [15]:
table

[            날짜       종가     전일비       시가       고가       저가         거래량
 0          NaN      NaN     NaN      NaN      NaN      NaN         NaN
 1   2022.09.30  53100.0   500.0  52300.0  53600.0  51800.0  21823113.0
 2   2022.09.29  52600.0   300.0  53300.0  53700.0  52600.0  13882080.0
 3   2022.09.28  52900.0  1300.0  53900.0  54400.0  52500.0  19991129.0
 4   2022.09.27  54200.0   300.0  53800.0  54200.0  53500.0  16631289.0
 5   2022.09.26  53900.0   600.0  53700.0  54200.0  53600.0  15008449.0
 6          NaN      NaN     NaN      NaN      NaN      NaN         NaN
 7          NaN      NaN     NaN      NaN      NaN      NaN         NaN
 8          NaN      NaN     NaN      NaN      NaN      NaN         NaN
 9   2022.09.23  54500.0   100.0  54400.0  54900.0  54200.0  10555964.0
 10  2022.09.22  54400.0   900.0  54600.0  54700.0  54300.0  12786510.0
 11  2022.09.21  55300.0   500.0  55400.0  55500.0  55000.0  11863700.0
 12  2022.09.20  55800.0   600.0  56400.0  57000.0  55800.0  140

In [21]:
table[0]

Unnamed: 0,날짜,종가,전일비,시가,고가,저가,거래량
0,,,,,,,
1,2022.09.30,53100.0,500.0,52300.0,53600.0,51800.0,21823113.0
2,2022.09.29,52600.0,300.0,53300.0,53700.0,52600.0,13882080.0
3,2022.09.28,52900.0,1300.0,53900.0,54400.0,52500.0,19991129.0
4,2022.09.27,54200.0,300.0,53800.0,54200.0,53500.0,16631289.0
5,2022.09.26,53900.0,600.0,53700.0,54200.0,53600.0,15008449.0
6,,,,,,,
7,,,,,,,
8,,,,,,,
9,2022.09.23,54500.0,100.0,54400.0,54900.0,54200.0,10555964.0


## 4. 결측치 제거

In [22]:
# 결측치 제거
df = table[0].dropna()

In [23]:
df

Unnamed: 0,날짜,종가,전일비,시가,고가,저가,거래량
1,2022.09.30,53100.0,500.0,52300.0,53600.0,51800.0,21823113.0
2,2022.09.29,52600.0,300.0,53300.0,53700.0,52600.0,13882080.0
3,2022.09.28,52900.0,1300.0,53900.0,54400.0,52500.0,19991129.0
4,2022.09.27,54200.0,300.0,53800.0,54200.0,53500.0,16631289.0
5,2022.09.26,53900.0,600.0,53700.0,54200.0,53600.0,15008449.0
9,2022.09.23,54500.0,100.0,54400.0,54900.0,54200.0,10555964.0
10,2022.09.22,54400.0,900.0,54600.0,54700.0,54300.0,12786510.0
11,2022.09.21,55300.0,500.0,55400.0,55500.0,55000.0,11863700.0
12,2022.09.20,55800.0,600.0,56400.0,57000.0,55800.0,14041465.0
13,2022.09.19,56400.0,200.0,56300.0,57000.0,56000.0,12278653.0


## 5. 페이지별 데이터 합치기

In [25]:
df.iloc[-1]["날짜"]

'2022.09.19'

In [26]:
code = '005930'
item_list = []

for page in range(1, 11):
    # 수집할 url 가져오기
    url = f"https://finance.naver.com/item/sise_day.naver?code={code}&page={page}"
    
    # 데이터 결측치 제거 및 dataframe으로 가져오기
    table = pd.read_html(requests.get(url, headers={'User-agent': 'Mozilla/5.0'}).text)
    df_item = table[0].dropna()
    
    # 페이지별 전체 데이터 수집
    item_list.append(df_item)
    

In [27]:
item_list

[            날짜       종가     전일비       시가       고가       저가         거래량
 1   2022.09.30  53100.0   500.0  52300.0  53600.0  51800.0  21823113.0
 2   2022.09.29  52600.0   300.0  53300.0  53700.0  52600.0  13882080.0
 3   2022.09.28  52900.0  1300.0  53900.0  54400.0  52500.0  19991129.0
 4   2022.09.27  54200.0   300.0  53800.0  54200.0  53500.0  16631289.0
 5   2022.09.26  53900.0   600.0  53700.0  54200.0  53600.0  15008449.0
 9   2022.09.23  54500.0   100.0  54400.0  54900.0  54200.0  10555964.0
 10  2022.09.22  54400.0   900.0  54600.0  54700.0  54300.0  12786510.0
 11  2022.09.21  55300.0   500.0  55400.0  55500.0  55000.0  11863700.0
 12  2022.09.20  55800.0   600.0  56400.0  57000.0  55800.0  14041465.0
 13  2022.09.19  56400.0   200.0  56300.0  57000.0  56000.0  12278653.0,
             날짜       종가     전일비       시가       고가       저가         거래량
 1   2022.09.16  56200.0   200.0  55600.0  56400.0  55500.0  13456503.0
 2   2022.09.15  56000.0   800.0  57000.0  57100.0  56000.0  11

In [29]:
# List 형태를 하나의 DataFrame으로 만들기
df = pd.concat(item_list)

In [30]:
df

Unnamed: 0,날짜,종가,전일비,시가,고가,저가,거래량
1,2022.09.30,53100.0,500.0,52300.0,53600.0,51800.0,21823113.0
2,2022.09.29,52600.0,300.0,53300.0,53700.0,52600.0,13882080.0
3,2022.09.28,52900.0,1300.0,53900.0,54400.0,52500.0,19991129.0
4,2022.09.27,54200.0,300.0,53800.0,54200.0,53500.0,16631289.0
5,2022.09.26,53900.0,600.0,53700.0,54200.0,53600.0,15008449.0
...,...,...,...,...,...,...,...
9,2022.05.13,66500.0,1600.0,65300.0,66700.0,65200.0,14551536.0
10,2022.05.12,64900.0,800.0,65200.0,65500.0,64900.0,16414188.0
11,2022.05.11,65700.0,0.0,65500.0,66300.0,65200.0,12330920.0
12,2022.05.10,65700.0,400.0,65900.0,66300.0,65300.0,17235605.0
