https://coding-moomin.tistory.com/5

In [61]:
import requests 
import pandas as pd 
import io 
import zipfile
import xml.etree.ElementTree as et 
import json

In [62]:
crtfc_key = 'api키'

In [63]:
def get_corpcode(crtfc_key): 
    """ 
    OpenDART 기업 고유번호 받아오기 
    return 값: 주식코드를 가진 업체의 DataFrame 
    """ 
    params = {'crtfc_key':crtfc_key} 
    items = ["corp_code","corp_name","stock_code","modify_date"] 
    item_names = ["고유번호","회사명","종목코드","수정일"] 
    url = "https://opendart.fss.or.kr/api/corpCode.xml" #요청 url
    res = requests.get(url,params=params) #url 불러오기
    zfile = zipfile.ZipFile(io.BytesIO(res.content))  #zip file 받기
    fin = zfile.open(zfile.namelist()[0])  #zip file 열고
    root = et.fromstring(fin.read().decode('utf-8'))  #utf-8 디코딩
    data = [] 
    for child in root: 
        if len(child.find('stock_code').text.strip()) > 1: # 종목코드가 있는 경우 
            data.append([]) #data에 append하라 
            for item in items: 
                data[-1].append(child.find(item).text) 
    df = pd.DataFrame(data, columns=item_names) 
    return df

In [4]:
def Frame(url, items, item_names, params): 
    """
    url : json형태로 요청하는 주소
    items : 반환되는 데이터들의 key를 가진 리스트
    item_names :  데이터프레임을 만들때 컬럼명 리스트
    params : url 요청시 필수값으로 들어가는 인자들을 가진 딕셔너리
    """
    res = requests.get(url, params)
    json_data = res.json()
    json_dict = json.loads(res.text) 
    data = [] 
    if json_dict['status'] == "000":  # 오류 없이 정상적으로 데이터가 있다면 
        for line in json_dict['list']: 
            data.append([])
            for itm in items: 
                if itm in line.keys(): 
                    data[-1].append(line[itm]) 
                else: 
                    data[-1].append('')
    df = pd.DataFrame(data, columns=item_names)
    return df

In [64]:
def get_fnlttSinglAcntAll(crtfc_key, corp_code, bsns_year, reprt_code, fs_div = "CFS"): 
    items = ["rcept_no","reprt_code","bsns_year","corp_code","sj_div","sj_nm", 
             "account_id","account_nm","account_detail","thstrm_nm", "thstrm_amount",
             "thstrm_add_amount","frmtrm_nm","frmtrm_amount", "frmtrm_q_nm","frmtrm_q_amount",
             "frmtrm_add_amount","bfefrmtrm_nm", "bfefrmtrm_amount","ord"] 
    item_names = ["접수번호","보고서코드","사업연도","고유번호","재무제표구분", "재무제표명",
                  "계정ID","계정명","계정상세","당기명","당기금액", "당기누적금액","전기명","전기금액","전기명(분/반기)", 
                  "전기금액(분/반기)","전기누적금액","전전기명","전전기금액", "계정과목정렬순서"] 
    params = {'crtfc_key':crtfc_key, 'corp_code':corp_code, 'bsns_year':bsns_year, 'reprt_code':reprt_code, 'fs_div':fs_div} 
    url = "https://opendart.fss.or.kr/api/fnlttSinglAcntAll.json?" 
    df = Frame(url, items, item_names, params)
    return df

In [65]:
stock_comp = get_corpcode(crtfc_key)
stock_comp[stock_comp['고유번호']=='00375302']

Unnamed: 0,고유번호,회사명,종목코드,수정일
313,375302,우리금융지주,53000,20170630


In [66]:
company_list=pd.read_csv('업종별_시총_상위_10위_기업.csv',encoding='utf-8')

In [67]:
company_list.columns

Index(['회사명', '종목코드', '업종', '주요제품', '상장일', '결산월', '대표자명', '홈페이지', '지역', '시가총액',
       '업종명', '대분류'],
      dtype='object')

In [68]:
top_comp=stock_comp.loc[stock_comp['회사명'].isin(company_list['회사명']),:]

In [69]:
top_comp=top_comp.reset_index()
top_comp=top_comp.drop('index',axis=1)

In [51]:
#top_comp.to_csv("우리가_관심있는_기업들.csv",encoding='cp949')

In [12]:
corp_code = top_comp['고유번호']
final_reprt_df = get_fnlttSinglAcntAll(crtfc_key, corp_code[0], 2021, '11014', fs_div = "CFS") 
from tqdm import tqdm
for k in tqdm(range(1, len(corp_code))):
    reprt_df = get_fnlttSinglAcntAll(crtfc_key, corp_code[k], 2021, '11014', fs_div = "CFS") 
    final_reprt_df = pd.concat([final_reprt_df, reprt_df], axis = 0)

100%|████████████████████████████████████████████████████████████████████████████████| 102/102 [00:18<00:00,  5.47it/s]


In [70]:
import OpenDartReader

In [71]:
api_key = 'api_key'
dart = OpenDartReader(api_key)

In [78]:
df = dart.report(top_comp['고유번호'][5],'직원',2021)

In [79]:
df

Unnamed: 0,rcept_no,corp_cls,corp_code,corp_name,rm,sexdstn,fo_bbm,reform_bfe_emp_co_rgllbr,reform_bfe_emp_co_cnttk,reform_bfe_emp_co_etc,rgllbr_co,rgllbr_abacpt_labrr_co,cnttk_co,cnttk_abacpt_labrr_co,sm,avrg_cnwk_sdytrn,fyer_salary_totamt,jan_salary_am
0,20220512000853,Y,413046,셀트리온,-,남,관리사무직,-,-,-,222,-,5,-,227,4.6,20583000000,93000000
1,20220512000853,Y,413046,셀트리온,-,여,관리사무직,-,-,-,123,-,12,10,145,5.0,9913000000,73000000
2,20220512000853,Y,413046,셀트리온,-,남,연구개발직,-,-,-,282,-,17,-,299,5.8,29115000000,99000000
3,20220512000853,Y,413046,셀트리온,-,여,연구개발직,-,-,-,397,-,5,-,402,4.8,31971000000,85000000
4,20220512000853,Y,413046,셀트리온,-,남,생산직,-,-,-,726,-,34,-,760,5.7,54568000000,72000000
5,20220512000853,Y,413046,셀트리온,-,여,생산직,-,-,-,324,-,50,-,374,5.4,23945000000,63000000


In [55]:
def tqreport(year):
    corp_code = top_comp['고유번호']
    final_reprt_df = dart.finstate(corp_code[0], year) 
    from tqdm import tqdm
    for k in tqdm(range(1, len(corp_code))):
        reprt_df = dart.finstate(corp_code[k], year) 
        final_reprt_df = pd.concat([final_reprt_df, reprt_df], axis = 0)
        
    final=final_reprt_df[final_reprt_df['sj_div']=='IS']
    final=final.astype({'corp_code':'str'})

    return final

In [56]:
data=tqreport(year=2021)

{'status': '013', 'message': '조회된 데이타가 없습니다.'}



  1%|▊                                                                                 | 1/102 [00:00<00:15,  6.67it/s]

{'status': '013', 'message': '조회된 데이타가 없습니다.'}




  2%|█▌                                                                                | 2/102 [00:00<00:14,  6.90it/s]

{'status': '013', 'message': '조회된 데이타가 없습니다.'}



  8%|██████▍                                                                           | 8/102 [00:01<00:12,  7.27it/s]

{'status': '013', 'message': '조회된 데이타가 없습니다.'}



 12%|█████████▌                                                                       | 12/102 [00:01<00:11,  7.66it/s]

{'status': '013', 'message': '조회된 데이타가 없습니다.'}

{'status': '013', 'message': '조회된 데이타가 없습니다.'}



 38%|██████████████████████████████▉                                                  | 39/102 [00:05<00:08,  7.37it/s]

{'status': '013', 'message': '조회된 데이타가 없습니다.'}



 62%|██████████████████████████████████████████████████                               | 63/102 [00:08<00:10,  3.89it/s]

{'status': '013', 'message': '조회된 데이타가 없습니다.'}



 75%|█████████████████████████████████████████████████████████████▏                   | 77/102 [00:11<00:03,  7.07it/s]

{'status': '013', 'message': '조회된 데이타가 없습니다.'}



 88%|███████████████████████████████████████████████████████████████████████▍         | 90/102 [00:13<00:01,  7.56it/s]

{'status': '013', 'message': '조회된 데이타가 없습니다.'}



 90%|█████████████████████████████████████████████████████████████████████████        | 92/102 [00:13<00:01,  7.22it/s]

{'status': '013', 'message': '조회된 데이타가 없습니다.'}



100%|████████████████████████████████████████████████████████████████████████████████| 102/102 [00:14<00:00,  6.95it/s]


In [49]:
data.to_csv("2021재무제표.csv",encoding='cp949')

In [25]:
data['account_nm'].unique()

array(['매출액', '영업이익', '법인세차감전 순이익', '당기순이익'], dtype=object)

In [57]:
print(data.shape)
print(data.columns)

(712, 21)
Index(['rcept_no', 'reprt_code', 'bsns_year', 'corp_code', 'stock_code',
       'fs_div', 'fs_nm', 'sj_div', 'sj_nm', 'account_nm', 'thstrm_nm',
       'thstrm_dt', 'thstrm_amount', 'frmtrm_nm', 'frmtrm_dt', 'frmtrm_amount',
       'bfefrmtrm_nm', 'bfefrmtrm_dt', 'bfefrmtrm_amount', 'ord', 'currency'],
      dtype='object')


In [39]:
data['fs_nm'].unique

<bound method Series.unique of 9     연결재무제표
10    연결재무제표
11    연결재무제표
12    연결재무제표
22      재무제표
       ...  
10    연결재무제표
11    연결재무제표
21      재무제표
22      재무제표
23      재무제표
Name: fs_nm, Length: 677, dtype: object>

In [41]:
top_comp

Unnamed: 0,고유번호,회사명,종목코드,수정일
0,00375302,우리금융지주,053000,20170630
1,00126229,삼성물산,000830,20170630
2,00144155,SK,003600,20170630
3,00126478,삼성중공업,010140,20211210
4,00121941,대상,001680,20211202
...,...,...,...,...
98,00162461,한화솔루션,009830,20220901
99,00136378,신세계,004170,20220908
100,00828497,한미약품,128940,20220407
101,00261285,한국가스공사,036460,20220407


In [48]:
data

Unnamed: 0,rcept_no,reprt_code,bsns_year,corp_code,stock_code,fs_div,fs_nm,sj_div,sj_nm,account_nm,...,thstrm_dt,thstrm_amount,frmtrm_nm,frmtrm_dt,frmtrm_amount,bfefrmtrm_nm,bfefrmtrm_dt,bfefrmtrm_amount,ord,currency
9,20220310001057,11011,2021,00126478,010140,CFS,연결재무제표,IS,손익계산서,매출액,...,2021.01.01 ~ 2021.12.31,6622001487326,제 47 기,2020.01.01 ~ 2020.12.31,6860317642496,제 46 기,2019.01.01 ~ 2019.12.31,7349656035614,23,KRW
10,20220310001057,11011,2021,00126478,010140,CFS,연결재무제표,IS,손익계산서,영업이익,...,2021.01.01 ~ 2021.12.31,-1311956174022,제 47 기,2020.01.01 ~ 2020.12.31,-1054144023100,제 46 기,2019.01.01 ~ 2019.12.31,-616585523278,25,KRW
11,20220310001057,11011,2021,00126478,010140,CFS,연결재무제표,IS,손익계산서,법인세차감전 순이익,...,2021.01.01 ~ 2021.12.31,-1349816814471,제 47 기,2020.01.01 ~ 2020.12.31,-1475030182281,제 46 기,2019.01.01 ~ 2019.12.31,-1139245938287,27,KRW
12,20220310001057,11011,2021,00126478,010140,CFS,연결재무제표,IS,손익계산서,당기순이익,...,2021.01.01 ~ 2021.12.31,-1452069565718,제 47 기,2020.01.01 ~ 2020.12.31,-1492699901584,제 46 기,2019.01.01 ~ 2019.12.31,-1315353147184,29,KRW
22,20220310001057,11011,2021,00126478,010140,OFS,재무제표,IS,손익계산서,매출액,...,2021.01.01 ~ 2021.12.31,6542631445460,제 47 기,2020.01.01 ~ 2020.12.31,6825532312130,제 46 기,2019.01.01 ~ 2019.12.31,7098805561786,24,KRW
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10,20220321001331,11011,2021,00258801,035720,CFS,연결재무제표,IS,손익계산서,법인세차감전 순이익,...,2021.01.01 ~ 2021.12.31,2293693668438,제 26 기,2020.01.01 ~ 2020.12.31,414271425061,제 25 기,2019.01.01 ~ 2019.12.31,-234256905702,27,KRW
11,20220321001331,11011,2021,00258801,035720,CFS,연결재무제표,IS,손익계산서,당기순이익,...,2021.01.01 ~ 2021.12.31,1646153359281,제 26 기,2020.01.01 ~ 2020.12.31,173359671915,제 25 기,2019.01.01 ~ 2019.12.31,-341924578348,29,KRW
21,20220321001331,11011,2021,00258801,035720,OFS,재무제표,IS,손익계산서,영업이익,...,2021.01.01 ~ 2021.12.31,402025254785,제 26 기,2020.01.01 ~ 2020.12.31,300064492177,제 25 기,2019.01.01 ~ 2019.12.31,196063924523,26,KRW
22,20220321001331,11011,2021,00258801,035720,OFS,재무제표,IS,손익계산서,법인세차감전 순이익,...,2021.01.01 ~ 2021.12.31,574692459376,제 26 기,2020.01.01 ~ 2020.12.31,-35091884829,제 25 기,2019.01.01 ~ 2019.12.31,-230629069957,28,KRW


In [80]:
def report_worker(year):
    corp_code = top_comp['고유번호']
    final_reprt_df = dart.report(corp_code[0], '직원',year) 
    from tqdm import tqdm
    for k in tqdm(range(1, len(corp_code))):
        reprt_df = dart.report(corp_code[k],'직원', year) 
        final_reprt_df = pd.concat([final_reprt_df, reprt_df], axis = 0)
        

    return final_reprt_df

In [81]:
worker=report_worker(2021)

{'status': '013', 'message': '조회된 데이타가 없습니다.'}


  1%|▊                                                                                 | 1/102 [00:00<00:13,  7.47it/s]

{'status': '013', 'message': '조회된 데이타가 없습니다.'}



  2%|█▌                                                                                | 2/102 [00:00<00:14,  6.91it/s]

{'status': '013', 'message': '조회된 데이타가 없습니다.'}


100%|████████████████████████████████████████████████████████████████████████████████| 102/102 [00:16<00:00,  6.06it/s]


In [82]:
worker

Unnamed: 0,rcept_no,corp_cls,corp_code,corp_name,rm,sexdstn,fo_bbm,reform_bfe_emp_co_rgllbr,reform_bfe_emp_co_cnttk,reform_bfe_emp_co_etc,rgllbr_co,rgllbr_abacpt_labrr_co,cnttk_co,cnttk_abacpt_labrr_co,sm,avrg_cnwk_sdytrn,fyer_salary_totamt,jan_salary_am
0,20220310001057,Y,00126478,삼성중공업,-,남,조선,-,-,-,8585,-,81,-,8666,18.9,699797000000,76000000
1,20220310001057,Y,00126478,삼성중공업,-,여,조선,-,-,-,324,16,16,-,340,11.0,19972000000,55000000
2,20220310001057,Y,00126478,삼성중공업,-,남,건설,-,-,-,232,-,33,-,265,15.3,7172000000,83000000
3,20220310001057,Y,00126478,삼성중공업,-,여,건설,-,-,-,5,-,3,-,8,7.6,172000000,44000000
0,20220816001426,Y,00121941,대상,-,남,식품,-,-,-,1737,-,31,1,1768,11년 1개월,124891000000,66000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3,20220325000211,Y,00261285,한국가스공사,-,여,기술직,-,-,-,284,10,-,-,294,9.72,18716171000,63660000
4,20220325000211,Y,00261285,한국가스공사,-,남,기타,-,-,-,501,2,-,-,503,16.97,37703919000,74958000
5,20220325000211,Y,00261285,한국가스공사,-,여,기타,-,-,-,36,1,-,-,37,11.79,2447736000,66155000
0,20220321001331,Y,00258801,카카오,-,남,전사,-,-,-,1926,-,32,-,1958,4년 10개월,381472000000,217000000


In [84]:
len(worker['corp_name'].unique())

100