## yahoo, google finance api 가 중지 되었으니 다른 방법으로 크롤링을 해보자

In [37]:
import pandas as pd

code_df = pd.read_html('http://kind.krx.co.kr/corpgeneral/corpList.do?method=download&searchType=13', header=0)[0] 

code_df.종목코드 = code_df.종목코드.map('{:06d}'.format)

code_df = code_df[['회사명','종목코드']]

code_df = code_df.rename(columns={'회사명': 'name', '종목코드': 'code'}) 
code_df.head()


Unnamed: 0,name,code
0,GS글로벌,1250
1,HSD엔진,82740
2,KG케미칼,1390
3,LG이노텍,11070
4,OCI,10060


In [39]:
# 종목 이름을 입력하면 종목에 해당하는 코드를 불러와
# 네이버 금융("http://finance.naver.com") 에 넣어줌

def get_url(item_name, code_df):

    code = code_df.query("name=='{}'".format(item_name))['code'].to_string(index=False) 
    url = 'http://finance.naver.com/item/sise_day.nhn?code={code}'.format(code=code) 
    print("요청 URL = {}".format(url)) 
    return url


item_name='신라젠'
url = get_url(item_name, code_df)

# 일자 데이터를 담은 df라는 DataFrame 정의
df = pd.DataFrame()

for page in range(1,21) :
    pg_url = '{url}&page={page}'.format(url=url, page=page) 
    df = df.append(pd.read_html(pg_url, header=0)[0], ignore_index=True) 

    
df = df.dropna()

df.head()

요청 URL = http://finance.naver.com/item/sise_day.nhn?code=215600


Unnamed: 0,날짜,종가,전일비,시가,고가,저가,거래량
1,2019.06.20,57700.0,1700.0,56100.0,57900.0,56100.0,481103.0
2,2019.06.19,56000.0,600.0,55900.0,56400.0,55200.0,331231.0
3,2019.06.18,55400.0,1300.0,57000.0,57400.0,55000.0,495082.0
4,2019.06.17,56700.0,100.0,57500.0,58500.0,56600.0,421203.0
5,2019.06.14,56800.0,900.0,56100.0,59400.0,55800.0,985343.0


In [41]:
# 한글로 된 컬럼명을 영어로 바꿔줌 
df = df.rename(columns= {'날짜': 'date', '종가': 'close', '전일비': 'diff', '시가': 'open', '고가': 'high', '저가': 'low', '거래량': 'volume'}) 
# 데이터의 타입을 int형으로 바꿔줌 
df[['close', 'diff', 'open', 'high', 'low', 'volume']] \
= df[['close', 'diff', 'open', 'high', 'low', 'volume']].astype(int) 
# 컬럼명 'date'의 타입을 date로 바꿔줌 
df['date'] = pd.to_datetime(df['date']) 
# 일자(date)를 기준으로 오름차순 정렬 
df = df.sort_values(by=['date'], ascending=True) 
# 상위 5개 데이터 확인 
df.head()


Unnamed: 0,date,close,diff,open,high,low,volume
298,2018-08-24,65000,2200,64600,66600,63700,2110154
297,2018-08-27,66300,1300,66200,67500,64700,1302620
296,2018-08-28,67400,1100,67300,69100,66500,1677504
295,2018-08-29,66100,1300,67900,68100,65300,1044419
294,2018-08-30,68500,2400,65800,68600,65400,1002923


In [44]:
import plotly.offline as offline
import plotly.graph_objs as go

In [48]:
offline.init_notebook_mode(connected=True)
trace = go.Scatter(x=df.date,y=df.close,name=item_name)
data = [trace]

In [50]:
layout = dict(
    title='{}의 종가(close) Time Series'.format(item_name),
    xaxis = dict(
        rangeselector=dict(
            buttons=list([
                dict(count=1,
                    label='1m',
                    step='month',
                    steopmode='backward'),
                dict(count=3,
                    label='3m',
                    step='month',
                    stepmode='backward'),
                dict(count=6,
                    label='6m',
                    step='month',
                    stepmode='backward'),
                dict(step='all')
            ])
        ),
        rangeslider=dict(),
        type='date'
    
    )
    
)


In [51]:
print(layout)

{'xaxis': {'type': 'date', 'rangeselector': {'buttons': [{'label': '1m', 'count': 1, 'step': 'month', 'steopmode': 'backward'}, {'stepmode': 'backward', 'label': '3m', 'count': 3, 'step': 'month'}, {'stepmode': 'backward', 'label': '6m', 'count': 6, 'step': 'month'}, {'step': 'all'}]}, 'rangeslider': {}}, 'title': '신라젠의 종가(close) Time Series'}


In [53]:
layout = dict(
    title='{}의 종가(close) Time Series'.format(item_name), 
    xaxis=dict( 
        rangeselector=dict( 
            buttons=list([ 
                dict(count=1, 
                     label='1m', 
                     step='month', 
                     stepmode='backward'), 
                dict(count=3, 
                     label='3m', 
                     step='month', 
                     stepmode='backward'),
                dict(count=6, 
                     label='6m', 
                     step='month', 
                     stepmode='backward'), 
                dict(step='all') 
            ]) 
        ), 
        rangeslider=dict(), 
        type='date' 
                                                                              ) 
             )


In [55]:
fig = go.Figure(data=data,layout=layout)

In [56]:
offline.iplot(fig)