## 자산_부채총계

In [1]:
import pandas as pd
import requests
import json
import numpy as np
from bs4 import BeautifulSoup
from datetime import datetime
from dateutil.relativedelta import relativedelta
from tqdm import tqdm
import time
time.sleep(0.3)
pd.options.display.float_format = '{:.6f}'.format

In [2]:
cdf = pd.read_csv('data/company_name_and_code.csv', index_col = 0)
cdf

Unnamed: 0,corp_code,corp_name,stock_code
0,365387,AJ네트웍스,95570
1,125080,AK홀딩스,6840
2,219097,BGF,27410
3,1263022,BGF리테일,282330
4,858364,BNK금융지주,138930
...,...,...,...
726,111421,휴니드테크놀러지스,5870
727,362238,휴비스,79980
728,156488,휴스틸,5010
729,103176,흥국화재,540


In [3]:
my_key = pd.read_csv('data/dart_key.txt', header = None)[0][0]

In [4]:
my_key# = '5f636989908de5019426008adba1a078e3c134f9'

'd6546705cd01f3257395ab1d45d6099f97d60e04'

In [5]:
base_url = 'https://opendart.fss.or.kr/api/fnlttSinglAcntAll.xml'
surv_key = '?crtfc_key=' + my_key
corp_code = '&corp_code=' + '00' + '365387'
bsns_year = '&bsns_year=' + '2024'
reprt_code = '&reprt_code=11011&fs_div=CFS'

In [6]:
url = base_url + surv_key + corp_code + bsns_year + reprt_code
url

'https://opendart.fss.or.kr/api/fnlttSinglAcntAll.xml?crtfc_key=d6546705cd01f3257395ab1d45d6099f97d60e04&corp_code=00365387&bsns_year=2024&reprt_code=11011&fs_div=CFS'

In [7]:
response = requests.get(url)
response

<Response [200]>

In [8]:
response.text

'<?xml version="1.0" encoding="UTF-8" standalone="yes"?><result><status>000</status><message>정상</message><list><rcept_no>20250320000455</rcept_no><reprt_code>11011</reprt_code><bsns_year>2024</bsns_year><corp_code>00365387</corp_code><sj_div>BS</sj_div><sj_nm>재무상태표</sj_nm><account_id>ifrs-full_Assets</account_id><account_nm>자산총계</account_nm><account_detail>-</account_detail><thstrm_nm>제 25 기</thstrm_nm><thstrm_amount>1717770434389</thstrm_amount><frmtrm_nm>제 24 기</frmtrm_nm><frmtrm_amount>1622157341878</frmtrm_amount><bfefrmtrm_nm>제 23 기</bfefrmtrm_nm><bfefrmtrm_amount>1483368740285</bfefrmtrm_amount><ord>7</ord><currency>KRW</currency></list><list><rcept_no>20250320000455</rcept_no><reprt_code>11011</reprt_code><bsns_year>2024</bsns_year><corp_code>00365387</corp_code><sj_div>BS</sj_div><sj_nm>재무상태표</sj_nm><account_id>ifrs-full_CurrentAssets</account_id><account_nm>유동자산</account_nm><account_detail>-</account_detail><thstrm_nm>제 25 기</thstrm_nm><thstrm_amount>304434525487</thstrm_amoun

In [9]:
soup = BeautifulSoup(response.text, "lxml")
soup

  soup = BeautifulSoup(response.text, "lxml")


<?xml version="1.0" encoding="UTF-8" standalone="yes"?><html><body><result><status>000</status><message>정상</message><list><rcept_no>20250320000455</rcept_no><reprt_code>11011</reprt_code><bsns_year>2024</bsns_year><corp_code>00365387</corp_code><sj_div>BS</sj_div><sj_nm>재무상태표</sj_nm><account_id>ifrs-full_Assets</account_id><account_nm>자산총계</account_nm><account_detail>-</account_detail><thstrm_nm>제 25 기</thstrm_nm><thstrm_amount>1717770434389</thstrm_amount><frmtrm_nm>제 24 기</frmtrm_nm><frmtrm_amount>1622157341878</frmtrm_amount><bfefrmtrm_nm>제 23 기</bfefrmtrm_nm><bfefrmtrm_amount>1483368740285</bfefrmtrm_amount><ord>7</ord><currency>KRW</currency></list><list><rcept_no>20250320000455</rcept_no><reprt_code>11011</reprt_code><bsns_year>2024</bsns_year><corp_code>00365387</corp_code><sj_div>BS</sj_div><sj_nm>재무상태표</sj_nm><account_id>ifrs-full_CurrentAssets</account_id><account_nm>유동자산</account_nm><account_detail>-</account_detail><thstrm_nm>제 25 기</thstrm_nm><thstrm_amount>304434525487</t

In [14]:
def asset(cdf, my_key, start_year=2019, end_year=2024):

    base_url = 'https://opendart.fss.or.kr/api/fnlttSinglAcntAll.xml'
    reprt_code = '11011'

    corp_map = cdf[['corp_code', 'corp_name']].copy()
    corp_map['corp_code'] = corp_map['corp_code'].astype(str).str.zfill(8)
    corp_codes = corp_map['corp_code']

    year_dfs = {}

    ASSET_IDS = ('ifrs-full_Assets', 'dart_TotalAssets')
    DEBT_IDS  = ('ifrs-full_Liabilities', 'dart_TotalLiabilities')

    for year in range(start_year, end_year + 1):
        print(f"\n=== {year}년 진행 ===")
        all_dfs = []

        for idx, code in enumerate(
            tqdm(corp_codes, desc=f"{year}년 기업 진행", ncols=100),
            start=1
        ):

            def fetch_financial(fs_div):
                params = {
                    'crtfc_key': my_key,
                    'corp_code': code,
                    'bsns_year': str(year),
                    'reprt_code': reprt_code,
                    'fs_div': fs_div
                }

                try:
                    r = requests.get(base_url, params=params, timeout=10)
                    r.raise_for_status()
                except requests.RequestException:
                    return None, None

                soup = BeautifulSoup(r.text, "lxml-xml")

                status = soup.find('status')
                if status and status.text != '000':
                    return None, None

                lists = soup.find_all('list')

                asset, debt = None, None

                for item in lists:
                    id_tag = item.find('account_id')
                    if not id_tag:
                        continue

                    account_id = id_tag.text.strip()
                    value_tag = item.find('thstrm_amount')
                    value = value_tag.text.strip() if value_tag else None

                    if account_id in ASSET_IDS and asset is None:
                        asset = value
                    elif account_id in DEBT_IDS and debt is None:
                        debt = value

                return asset, debt

            # 1️⃣ CFS 조회
            total_asset, total_debt = fetch_financial('CFS')

            # 2️⃣ 하나라도 없으면 → CFS에서 부족한 것만 보완
            if total_asset is None or total_debt is None:
                cfs_asset, cfs_debt = fetch_financial('OFS')

                if total_asset is None:
                    total_asset = cfs_asset

                if total_debt is None:
                    total_debt = cfs_debt

            if (total_asset is not None) or (total_debt is not None):
                all_dfs.append(pd.DataFrame([{
                    'corp_code': code,
                    'year': year,
                    'total_asset': total_asset,
                    'total_debt': total_debt
                }]))

            # 속도 제어
            time.sleep(0.3)
            if idx % 50 == 0:
                time.sleep(5)

        # 연도별 raw DF
        if all_dfs:
            year_raw = pd.concat(all_dfs, ignore_index=True)
        else:
            year_raw = pd.DataFrame(
                columns=['corp_code', 'year', 'total_asset', 'total_debt']
            )

        base_df = corp_map.copy()
        base_df['year'] = year

        year_df = base_df.merge(
            year_raw,
            on=['corp_code', 'year'],
            how='left'
        )

        year_df['total_asset'] = pd.to_numeric(
            year_df['total_asset'], errors='coerce'
        ).fillna(0)

        year_df['total_debt'] = pd.to_numeric(
            year_df['total_debt'], errors='coerce'
        ).fillna(0)

        year_df['ln_asset'] = np.log1p(year_df['total_asset'])

        year_df = year_df[
            ['corp_code', 'corp_name', 'year',
             'total_asset', 'ln_asset', 'total_debt']
        ]

        year_dfs[year] = year_df

    return tuple(year_dfs[year] for year in range(start_year, end_year + 1))


In [15]:
adf_2019, adf_2020, adf_2021, adf_2022, adf_2023, adf_2024 =  asset(cdf, my_key, start_year=2019, end_year=2024)


=== 2019년 진행 ===


2019년 기업 진행: 100%|███████████████████████████████████████████| 731/731 [07:52<00:00,  1.55it/s]



=== 2020년 진행 ===


2020년 기업 진행: 100%|███████████████████████████████████████████| 731/731 [07:45<00:00,  1.57it/s]



=== 2021년 진행 ===


2021년 기업 진행: 100%|███████████████████████████████████████████| 731/731 [07:52<00:00,  1.55it/s]



=== 2022년 진행 ===


2022년 기업 진행: 100%|███████████████████████████████████████████| 731/731 [08:04<00:00,  1.51it/s]



=== 2023년 진행 ===


2023년 기업 진행: 100%|███████████████████████████████████████████| 731/731 [08:13<00:00,  1.48it/s]



=== 2024년 진행 ===


2024년 기업 진행: 100%|███████████████████████████████████████████| 731/731 [08:13<00:00,  1.48it/s]


In [16]:
adf_2019

Unnamed: 0,corp_code,corp_name,year,total_asset,ln_asset,total_debt
0,00365387,AJ네트웍스,2019,1803255225986.000000,28.220615,1455914070366.000000
1,00125080,AK홀딩스,2019,4327964555085.000000,29.096118,2898025666994.000000
2,00219097,BGF,2019,1657381460248.000000,28.136260,129126735585.000000
3,01263022,BGF리테일,2019,2172410899706.000000,28.406859,1549815137631.000000
4,00858364,BNK금융지주,2019,0.000000,0.000000,0.000000
...,...,...,...,...,...,...
726,00111421,휴니드테크놀러지스,2019,260965121649.000000,26.287653,145116828409.000000
727,00362238,휴비스,2019,877170812678.000000,27.499968,481045043591.000000
728,00156488,휴스틸,2019,579096387394.000000,27.084735,182538348103.000000
729,00103176,흥국화재,2019,0.000000,0.000000,0.000000


In [17]:
adf_lst = [
    adf_2019, adf_2020, adf_2021, adf_2022, adf_2023, adf_2024
]

In [18]:
for df, year in zip(adf_lst, range(2019, 2025)):
    df.to_csv(
        f'data/total_asset_debt/total_asset_debt_{year}.csv',
        float_format='%.6f',
        index=False
    )