# **데이터 수집 : 코스피 개별 종목의 종목 코드로 재무정보 가져오기**
- 종목코드 -> 고유번호 -> 법인등록번호 -> 재무정보
  1. '금융감독원_고유번호'를 통해 '종목 코드'로 '고유번호' 가져오기
      - [**금융감독원_고유번호**](https://opendart.fss.or.kr/guide/detail.do?apiGrpCd=DS001&apiId=2019018)
  2. '금융감독원_공시정보_기업개황'에서 '고유번호'로 '법인등록번호' 가져오기
      - [**금융감독원_공시정보_기업개황**](https://opendart.fss.or.kr/guide/detail.do?apiGrpCd=DS001&apiId=2019002)
      - KOSPI200 기업 목록 활용
  3. 가져온 '법인등록번호'으로 재무정보 가져오기
      - [**금융위원회_기업 재무정보**](https://www.data.go.kr/tcs/dss/selectApiDataDetailView.do?publicDataPk=15043459)
---
- [DART](https://dart.fss.or.kr/main.do) : 금융감독원에서 운영하는 기업정보전자공시시스템
- FSC : 금융위원회

In [28]:
import json
import os
import sys
from glob import glob
from io import BytesIO
from zipfile import ZipFile
import time

import numpy as np
import pandas as pd
import requests
import xmltodict
from bs4 import BeautifulSoup

sys.path.append("../import")

from gitig_auth import authKey

data_path = "../data/"

# 1. 종목코드 -> 고유번호
- '금융감독원_고유번호'를 통해 '종목 코드'로 '고유번호' 가져오기
    - [**금융감독원_고유번호**](https://opendart.fss.or.kr/guide/detail.do?apiGrpCd=DS001&apiId=2019018)

In [4]:
# API로 데이터 가져와서 pandas.DataFrame으로 변환 => df_cc (cc:corp_code)

auth_key = authKey["dart"]
url = f"https://opendart.fss.or.kr/api/corpCode.xml?crtfc_key={auth_key}"
response = requests.get(url)

with ZipFile(BytesIO(response.content)) as df_cc:
    df_cc = df_cc.read("CORPCODE.xml")
    df_cc = pd.read_xml(df_cc)

df_cc.head()

Unnamed: 0,corp_code,corp_name,stock_code,modify_date
0,434003,다코,,20170630
1,434456,일산약품,,20170630
2,430964,굿앤엘에스,,20170630
3,432403,한라판지,,20170630
4,388953,크레디피아제이십오차유동화전문회사,,20170630


## 전처리

In [5]:
# stock_code가 없는 행 제거
df_cc = df_cc.dropna(subset=["stock_code"])
# code 글자수
df_cc["corp_code"] = df_cc["corp_code"].astype(int).astype(str).apply(lambda x: x.zfill(8))
df_cc["stock_code"] = df_cc["stock_code"].astype(int).astype(str).apply(lambda x: x.zfill(6))
# 빈 컬럼 "jurir_no" 만들기
df_cc["jurir_no"] = np.NaN
# 컬럼 순서 설정
df_cc = df_cc[["stock_code", "corp_code", "jurir_no", "corp_name"]]
df_cc.head()

Unnamed: 0,stock_code,corp_code,jurir_no,corp_name
2009,36720,260985,,한빛네트
2021,40130,264529,,엔플렉스
2022,55000,358545,,동서정보기술
2784,32600,231567,,애드모바일
3889,37600,247939,,씨모스


# 고유번호 -> 법인등록번호
2. '금융감독원_공시정보_기업개황'에서 '고유번호'로 '법인등록번호' 가져오기
    - [**금융감독원_공시정보_기업개황**](https://opendart.fss.or.kr/guide/detail.do?apiGrpCd=DS001&apiId=2019002)
    - KOSPI200 기업 목록 활용

In [6]:
# 종목코드 -> 고유번호
def stockCode_to_corpCode(df, stock_code):
    return df[df["stock_code"] == stock_code]["corp_code"].values[0]


# 고유번호 -> 법인등록번호
def corpCode_to_jurirNo(
    corp_code,
    auth_key,
    url="https://opendart.fss.or.kr/api/company.json",
    ):
    params = {"crtfc_key": auth_key, "corp_code": corp_code}

    response = requests.get(url, params=params)
    if response.status_code == 200:
        return response.json()["jurir_no"]
    
    else:
        print(response.status_code)
    time.sleep(0.01)

In [7]:
df_ksp = pd.read_csv(fr"./KOSPI200.csv", index_col=0)
df_ksp["종목코드"] = df_ksp["종목코드"].astype(str).apply(lambda x: x.zfill(6))
df_ksp

Unnamed: 0,날짜,시가,고가,저가,종가,거래량,등락률,종목코드,종목명,연도월
0,2021-11-29,82000,85000,73900,76000,8764092,,402340,SK스퀘어,2021-11
1,2021-11-30,77900,80500,68000,68000,6863151,-10.526316,402340,SK스퀘어,2021-11
2,2021-12-01,68400,69400,61900,62700,4481654,-7.794118,402340,SK스퀘어,2021-12
3,2021-12-02,61900,72100,61500,68700,7312303,9.569378,402340,SK스퀘어,2021-12
4,2021-12-03,67400,70200,65500,66000,3301896,-3.930131,402340,SK스퀘어,2021-12
...,...,...,...,...,...,...,...,...,...,...
807824,2022-10-17,10750,10800,10550,10650,6487,-1.388889,000050,경방,2022-10
807825,2022-10-18,10800,11100,10650,11100,4479,4.225352,000050,경방,2022-10
807826,2022-10-19,11500,11500,10850,11200,4008,0.900901,000050,경방,2022-10
807827,2022-10-20,11150,11150,10800,10950,4600,-2.232143,000050,경방,2022-10


In [14]:
# KOSPI200 종목코드 리스트 가져오기

df_cc2 = df_cc.copy()

tmp = set(df_ksp["종목코드"].to_list())
for stock_code in tmp:
    try:
        df_cc2.loc[df_cc2['corp_code'] == stockCode_to_corpCode(df_cc2, stock_code), 'jurir_no'] = corpCode_to_jurirNo(stockCode_to_corpCode(df_cc2, stock_code))
    except:
        pass
df_cc2

Unnamed: 0,stock_code,corp_code,jurir_no,corp_name
2009,036720,00260985,,한빛네트
2021,040130,00264529,,엔플렉스
2022,055000,00358545,,동서정보기술
2784,032600,00231567,,애드모바일
3889,037600,00247939,,씨모스
...,...,...,...,...
97084,052670,00185505,,제일바이오
97093,228340,00993931,,동양파일
97097,004140,00115287,,동방
97100,088790,00557933,,진도


In [17]:
df_cc3 = df_cc2.copy()

def temp(x):
    if x == "nan":
        return np.NaN
    else: return x.zfill(13)

df_cc3["jurir_no"] = df_cc3["jurir_no"].astype(str).apply(lambda x: temp(x))
# spac = df_cc3[df_cc3['jurir_no'].str.contains('0000000000000')].index
# df_cc3.drop(spac, inplace=True)
df_cc3

Unnamed: 0,stock_code,corp_code,jurir_no,corp_name
2009,036720,00260985,,한빛네트
2021,040130,00264529,,엔플렉스
2022,055000,00358545,,동서정보기술
2784,032600,00231567,,애드모바일
3889,037600,00247939,,씨모스
...,...,...,...,...
97084,052670,00185505,,제일바이오
97093,228340,00993931,,동양파일
97097,004140,00115287,,동방
97100,088790,00557933,,진도


In [21]:
df_cc3 = df_cc3.dropna(subset=["jurir_no"])
df_cc3

Unnamed: 0,stock_code,corp_code,jurir_no,corp_name
53482,192820,01009789,1348110294273,코스맥스
53606,003570,00134477,1101110042880,SNT중공업
53703,005180,00124726,1152110000287,빙그레
54172,008060,00109189,1301110007608,대덕
54367,009680,00151128,1101110161317,모토닉
...,...,...,...,...
96984,079980,00362238,1101112102070,휴비스
96988,010120,00105855,1101110520076,엘에스일렉트릭
96994,005930,00126380,1301110006246,삼성전자
97057,096760,00632304,1101113710468,JW홀딩스


# 법인등록번호 -> 재무정보
금융위원회_기업 재무정보
- 요청 URL : http://apis.data.go.kr/1160100/service/GetFinaStatInfoService/getSummFinaStat

In [31]:
# 함수 
def Get_FinaStatInfo(crno, authKey, bizYear="", numOfRows="", pageNo=""
                   ,url = 'http://apis.data.go.kr/1160100/service/GetFinaStatInfoService/getSummFinaStat'):

    params = {'serviceKey' : authKey,
              'numOfRows' : numOfRows, 'pageNo' : numOfRows, 'resultType' : 'json', 
              'crno' : crno, 'bizYear' : bizYear}
    
    response = requests.get(url, params=params)
    time.sleep(0.2)
    
    if response.status_code == 200:
        return response.json()
    
    else :
        print(response.status_code)

In [49]:
df_dart = df_cc3.copy()
auth_key = authKey["fsc_finaStatInfo"]
df_finaStatInfo = pd.DataFrame()

for jurirNo in df_dart["jurir_no"].values[:]:
    data_json = Get_FinaStatInfo(jurirNo, authKey=auth_key)["response"]["body"]["items"]["item"]
    tmp = pd.json_normalize(data_json)
    df_finaStatInfo = pd.concat([df_finaStatInfo, tmp], axis = 0, sort=False)

df_finaStatInfo

Unnamed: 0,basDt,crno,bizYear,fnclDcd,fnclDcdNm,enpSaleAmt,enpBzopPft,iclsPalClcAmt,enpCrtmNpf,enpTastAmt,enpTdbtAmt,enpTcptAmt,enpCptlAmt,fnclDebtRto
0,20111231,1348110294273,2011,120,요약별도재무정보,176879983,10740303,10035262,7597714,138479511,98464605,40014906,4499755,246.0698145835
1,20121231,1348110294273,2012,120,요약별도재무정보,215611295,14748657,14820724,13123151,146226729,102470549,43756180,4499755,234.1853173654
2,20131231,1348110294273,2013,120,요약별도재무정보,260700561000,18005908000,18600176000,15651286000,166245866000,104848655000,61397211000,4499754000,170.7710387691
3,20151231,1348110294273,2015,ifrs_ConsolidatedMember,연결요약재무제표,533342717781,35934351724,29490776255,18885577415,438932111547,342412375965,96519735582,0,354.7589245871
4,20151231,1348110294273,2015,ifrs_SeparateMember,별도요약재무제표,372482450750,28201633567,28784194040,22215275889,266152320313,181524386805,84627933508,0,214.4970097702
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5,20171231,1101110003733,2017,ifrs_SeparateMember,별도요약재무제표,286386535237,19514857445,19981045677,13814407827,689594455044,66157192173,623437262871,8450000000,10.6116839838
6,20181231,1101110003733,2018,ifrs_ConsolidatedMember,연결요약재무제표,864585835647,32798360071,74914386097,51472655804,919690198948,170684939463,749005259485,8450000000,22.7882164112
7,20181231,1101110003733,2018,ifrs_SeparateMember,별도요약재무제표,305132945611,19773999199,55139663239,34676568703,707393585361,61768795388,645624789973,8450000000,9.5672899101
8,20191231,1101110003733,2019,ifrs_ConsolidatedMember,연결요약재무제표,933866093002,23483614049,25514184242,16993288734,1005108274460,245259980306,759848294154,8450000000,32.277493046


In [61]:
df_finaStatInfo.to_csv(f"{data_path}/FSC_finaStatInfo.csv", index=False)