## 종목별 주가 데이터 크롤링

In [41]:
import requests    # browser 와 server 간 통신
import pandas as pd    # 데이터 분석 도구

In [42]:
# 데이터 포맷팅
pd.options.display.float_format = '{:,.2f}'.format
pd.set_option('mode.chained_assignment', None)

In [43]:
# url: 서버 주소
url = 'http://data.krx.co.kr/comm/bldAttendant/getJsonData.cmd'

## 종목 마스터 크롤링

In [44]:
# 종목마스터
data = {
    'bld': 'dbms/MDC/STAT/standard/MDCSTAT01901',
    'locale': 'ko_KR',
    'mktId': 'ALL',
    'share': '1',
    'csvxls_isNo': 'false',
}
raw = requests.post(url, data=data)    # 서버와 통신
rst = raw.json()['OutBlock_1']    # 딕셔너리로 변환 후 'output' 의 value만 추출
ln = []
for r in rst:
    ln.append([c for c in r.values()])
df_master = pd.DataFrame(ln)
df_master.columns = r.keys()
df_master.tail()

Unnamed: 0,ISU_CD,ISU_SRT_CD,ISU_NM,ISU_ABBRV,ISU_ENG_NM,LIST_DD,MKT_TP_NM,SECUGRP_NM,SECT_TP_NM,KIND_STKCERT_TP_NM,PARVAL,LIST_SHRS
2760,KR7000541003,545,흥국화재해상보험1우선주,흥국화재우,HeungkukFire&MarineIns(1P),1990/03/20,KOSPI,주권,,구형우선주,5000,768000
2761,KR7000540005,540,흥국화재해상보험보통주,흥국화재,HeungkukFire&MarineInsurance,1974/12/05,KOSPI,주권,,보통주,5000,64242645
2762,KR7003280005,3280,흥아해운보통주,흥아해운,HeungaShipping,1976/06/29,KOSPI,주권,,보통주,500,240424899
2763,KR7037440005,37440,희림종합건축사사무소,희림,Heerim Architects & Planners,2000/02/03,KOSDAQ,주권,우량기업부,보통주,500,13922475
2764,KR7238490007,238490,힘스,힘스,"HIMS CO., LTD.",2017/07/20,KOSDAQ,주권,벤처기업부,보통주,500,11312236


In [45]:
df_master['ISU_ABBRV']=='삼성전자'

0       False
1       False
2       False
3       False
4       False
        ...  
2760    False
2761    False
2762    False
2763    False
2764    False
Name: ISU_ABBRV, Length: 2765, dtype: bool

In [46]:
df_master[df_master['ISU_ABBRV']=='삼성전자']

Unnamed: 0,ISU_CD,ISU_SRT_CD,ISU_NM,ISU_ABBRV,ISU_ENG_NM,LIST_DD,MKT_TP_NM,SECUGRP_NM,SECT_TP_NM,KIND_STKCERT_TP_NM,PARVAL,LIST_SHRS
889,KR7005930003,5930,삼성전자보통주,삼성전자,SamsungElectronics,1975/06/11,KOSPI,주권,,보통주,100,5969782550


In [47]:
df_master[df_master['ISU_ABBRV']=='삼성전자']['ISU_CD'].iloc[0]

'KR7005930003'

In [48]:
df_master[df_master['ISU_ABBRV']=='삼성전자']['ISU_CD'].iloc[0]

'KR7005930003'

In [49]:
df_master[df_master['ISU_SRT_CD']=='005930']['ISU_ABBRV'].iloc[0]

'삼성전자'

## 모든 종목에 쓸 수 있도록 함수화

In [50]:
def historical_price(symbol, start_date, end_date):
    # 종목정보 선택
    stock = df_master[df_master['ISU_SRT_CD']==symbol] if len(df_master[df_master['ISU_ABBRV']==symbol.upper()])<1 else df_master[df_master['ISU_ABBRV']==symbol.upper()]
    print(stock[['ISU_SRT_CD', 'ISU_ABBRV']])
    if len(stock)>0:
        # 입력인자 세팅
        payload = {
            'bld': 'dbms/MDC/STAT/standard/MDCSTAT01701',
            'isuCd': '{}'.format(stock['ISU_CD'].iloc[0]),
            'strtDd': start_date,
            'endDd': end_date,
        }
        raw = requests.post(url, data=payload)    # 서버와 통신
        rst = raw.json()['output']    # 딕셔너리로 변환 후 'output' 의 value만 추출
        df = pd.DataFrame.from_dict(rst)    # 딕셔너리를 데이터프레임으로 변환
        return df
    else:
        return 'No matched result'

In [51]:
df = historical_price(symbol='000660', start_date='20230901', end_date='20230915')
df

     ISU_SRT_CD ISU_ABBRV
1399     000660    SK하이닉스


Unnamed: 0,TRD_DD,TDD_CLSPRC,FLUC_TP_CD,CMPPREVDD_PRC,FLUC_RT,TDD_OPNPRC,TDD_HGPRC,TDD_LWPRC,ACC_TRDVOL,ACC_TRDVAL,MKTCAP,LIST_SHRS
0,2023/09/15,122400,1,200,0.16,122900,124400,121600,3579662,440233706194,89107489476000,728002365
1,2023/09/14,122200,1,3700,3.12,118600,122200,118600,3948784,477795104066,88961889003000,728002365
2,2023/09/13,118500,1,1800,1.54,117900,120700,116900,3860589,459528455530,86268280252500,728002365
3,2023/09/12,116700,1,800,0.69,115400,117500,113900,2749688,318540598500,84957875995500,728002365
4,2023/09/11,115900,1,2200,1.93,113700,116000,112900,2542590,291666361832,84375474103500,728002365
5,2023/09/08,113700,2,-4800,-4.05,117000,117100,112800,4943255,564691269150,82773868900500,728002365
6,2023/09/07,118500,1,300,0.25,118200,120900,117100,2995088,354989251824,86268280252500,728002365
7,2023/09/06,118200,2,-1200,-1.01,118600,120000,117800,1615519,191335016200,86049879543000,728002365
8,2023/09/05,119400,1,200,0.17,118100,121000,117700,1749591,209186512876,86923482381000,728002365
9,2023/09/04,119200,2,-800,-0.67,118300,119700,116200,3491280,410802524712,86777881908000,728002365


In [52]:
df.rename(columns={'TRD_DD':'Date', 'TDD_OPNPRC':'Open', 'TDD_HGPRC':'High', 'TDD_LWPRC':'Low', 'TDD_CLSPRC':'Close', 'ACC_TRDVOL':'Volume', 'ACC_TRDVAL':'Value', 'MKTCAP':'MarketCap', 'LIST_SHRS':'Shares',}, inplace=True)
df

Unnamed: 0,Date,Close,FLUC_TP_CD,CMPPREVDD_PRC,FLUC_RT,Open,High,Low,Volume,Value,MarketCap,Shares
0,2023/09/15,122400,1,200,0.16,122900,124400,121600,3579662,440233706194,89107489476000,728002365
1,2023/09/14,122200,1,3700,3.12,118600,122200,118600,3948784,477795104066,88961889003000,728002365
2,2023/09/13,118500,1,1800,1.54,117900,120700,116900,3860589,459528455530,86268280252500,728002365
3,2023/09/12,116700,1,800,0.69,115400,117500,113900,2749688,318540598500,84957875995500,728002365
4,2023/09/11,115900,1,2200,1.93,113700,116000,112900,2542590,291666361832,84375474103500,728002365
5,2023/09/08,113700,2,-4800,-4.05,117000,117100,112800,4943255,564691269150,82773868900500,728002365
6,2023/09/07,118500,1,300,0.25,118200,120900,117100,2995088,354989251824,86268280252500,728002365
7,2023/09/06,118200,2,-1200,-1.01,118600,120000,117800,1615519,191335016200,86049879543000,728002365
8,2023/09/05,119400,1,200,0.17,118100,121000,117700,1749591,209186512876,86923482381000,728002365
9,2023/09/04,119200,2,-800,-0.67,118300,119700,116200,3491280,410802524712,86777881908000,728002365


In [53]:
df.drop(columns=['FLUC_TP_CD', 'CMPPREVDD_PRC', 'FLUC_RT'], inplace=True)
df

Unnamed: 0,Date,Close,Open,High,Low,Volume,Value,MarketCap,Shares
0,2023/09/15,122400,122900,124400,121600,3579662,440233706194,89107489476000,728002365
1,2023/09/14,122200,118600,122200,118600,3948784,477795104066,88961889003000,728002365
2,2023/09/13,118500,117900,120700,116900,3860589,459528455530,86268280252500,728002365
3,2023/09/12,116700,115400,117500,113900,2749688,318540598500,84957875995500,728002365
4,2023/09/11,115900,113700,116000,112900,2542590,291666361832,84375474103500,728002365
5,2023/09/08,113700,117000,117100,112800,4943255,564691269150,82773868900500,728002365
6,2023/09/07,118500,118200,120900,117100,2995088,354989251824,86268280252500,728002365
7,2023/09/06,118200,118600,120000,117800,1615519,191335016200,86049879543000,728002365
8,2023/09/05,119400,118100,121000,117700,1749591,209186512876,86923482381000,728002365
9,2023/09/04,119200,118300,119700,116200,3491280,410802524712,86777881908000,728002365


In [54]:
df['Date']

0     2023/09/15
1     2023/09/14
2     2023/09/13
3     2023/09/12
4     2023/09/11
5     2023/09/08
6     2023/09/07
7     2023/09/06
8     2023/09/05
9     2023/09/04
10    2023/09/01
Name: Date, dtype: object

In [55]:
pd.to_datetime(df['Date'])

0    2023-09-15
1    2023-09-14
2    2023-09-13
3    2023-09-12
4    2023-09-11
5    2023-09-08
6    2023-09-07
7    2023-09-06
8    2023-09-05
9    2023-09-04
10   2023-09-01
Name: Date, dtype: datetime64[ns]

In [56]:
df['Date'] = pd.to_datetime(df['Date'])
df['Date']

0    2023-09-15
1    2023-09-14
2    2023-09-13
3    2023-09-12
4    2023-09-11
5    2023-09-08
6    2023-09-07
7    2023-09-06
8    2023-09-05
9    2023-09-04
10   2023-09-01
Name: Date, dtype: datetime64[ns]

In [57]:
df['Close']

0     122,400
1     122,200
2     118,500
3     116,700
4     115,900
5     113,700
6     118,500
7     118,200
8     119,400
9     119,200
10    120,000
Name: Close, dtype: object

In [58]:
df['Close'][0] + df['Close'][1]

'122,400122,200'

In [59]:
df['Close'].str.replace(',','')

0     122400
1     122200
2     118500
3     116700
4     115900
5     113700
6     118500
7     118200
8     119400
9     119200
10    120000
Name: Close, dtype: object

In [60]:
df['Close'].str.replace(',','').astype(float)

0    122,400.00
1    122,200.00
2    118,500.00
3    116,700.00
4    115,900.00
5    113,700.00
6    118,500.00
7    118,200.00
8    119,400.00
9    119,200.00
10   120,000.00
Name: Close, dtype: float64

In [61]:
df['Close'] = df['Close'].str.replace(',','').astype(float)

In [62]:
df['Close']

0    122,400.00
1    122,200.00
2    118,500.00
3    116,700.00
4    115,900.00
5    113,700.00
6    118,500.00
7    118,200.00
8    119,400.00
9    119,200.00
10   120,000.00
Name: Close, dtype: float64

In [63]:
df['Close'][0] + df['Close'][1]

244600.0

In [64]:
df

Unnamed: 0,Date,Close,Open,High,Low,Volume,Value,MarketCap,Shares
0,2023-09-15,122400.0,122900,124400,121600,3579662,440233706194,89107489476000,728002365
1,2023-09-14,122200.0,118600,122200,118600,3948784,477795104066,88961889003000,728002365
2,2023-09-13,118500.0,117900,120700,116900,3860589,459528455530,86268280252500,728002365
3,2023-09-12,116700.0,115400,117500,113900,2749688,318540598500,84957875995500,728002365
4,2023-09-11,115900.0,113700,116000,112900,2542590,291666361832,84375474103500,728002365
5,2023-09-08,113700.0,117000,117100,112800,4943255,564691269150,82773868900500,728002365
6,2023-09-07,118500.0,118200,120900,117100,2995088,354989251824,86268280252500,728002365
7,2023-09-06,118200.0,118600,120000,117800,1615519,191335016200,86049879543000,728002365
8,2023-09-05,119400.0,118100,121000,117700,1749591,209186512876,86923482381000,728002365
9,2023-09-04,119200.0,118300,119700,116200,3491280,410802524712,86777881908000,728002365


In [65]:
df.set_index('Date', inplace=True)
df

Unnamed: 0_level_0,Close,Open,High,Low,Volume,Value,MarketCap,Shares
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2023-09-15,122400.0,122900,124400,121600,3579662,440233706194,89107489476000,728002365
2023-09-14,122200.0,118600,122200,118600,3948784,477795104066,88961889003000,728002365
2023-09-13,118500.0,117900,120700,116900,3860589,459528455530,86268280252500,728002365
2023-09-12,116700.0,115400,117500,113900,2749688,318540598500,84957875995500,728002365
2023-09-11,115900.0,113700,116000,112900,2542590,291666361832,84375474103500,728002365
2023-09-08,113700.0,117000,117100,112800,4943255,564691269150,82773868900500,728002365
2023-09-07,118500.0,118200,120900,117100,2995088,354989251824,86268280252500,728002365
2023-09-06,118200.0,118600,120000,117800,1615519,191335016200,86049879543000,728002365
2023-09-05,119400.0,118100,121000,117700,1749591,209186512876,86923482381000,728002365
2023-09-04,119200.0,118300,119700,116200,3491280,410802524712,86777881908000,728002365


In [66]:
df.loc['2023-09-05']

Close                119,400.00
Open                    118,100
High                    121,000
Low                     117,700
Volume                1,749,591
Value           209,186,512,876
MarketCap    86,923,482,381,000
Shares              728,002,365
Name: 2023-09-05 00:00:00, dtype: object