In [21]:
%pip install pandas numpy requests plotly python-dotenv

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


In [22]:
%pip install -U pandas numpy requests plotly python-dotenv

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


기본설정 (api 키, 폴더)

In [29]:
import os

# DART API 키 입력
API_KEY = "32ba89ca15f1fde1cb7116b4e08b2959c861f4e4"

# 작업 폴더 준비
os.makedirs("dart_data", exist_ok=True)

# 테스트
print("data 폴더 생성 확인:", os.path.exists("dart_data"))
print("API_KEY 길이:", len(API_KEY))


data 폴더 생성 확인: True
API_KEY 길이: 40


In [39]:
corp_code = "00126380"

In [41]:
import io, zipfile, requests, pandas as pd
import xml.etree.ElementTree as ET

def build_corp_code_cache(api_key: str, out_csv: str = "data/corp_codes.csv") -> pd.DataFrame:
    url = "https://opendart.fss.or.kr/api/corpCode.xml"
    r = requests.get(url, params={"crtfc_key": api_key}, timeout=30)
    r.raise_for_status()

    with zipfile.ZipFile(io.BytesIO(r.content)) as zf:
        xml_bytes = zf.read("CORPCODE.xml")

    root = ET.fromstring(xml_bytes)
    rows = []
    for node in root.findall("list"):
        rows.append({
            "corp_code": node.findtext("corp_code"),
            "corp_name": node.findtext("corp_name"),
            "stock_code": node.findtext("stock_code"),
        })
    df = pd.DataFrame(rows)
    df.to_csv(out_csv, index=False, encoding="utf-8-sig")
    return df

codes = build_corp_code_cache(API_KEY)
print("총 기업 수:", len(codes))
codes[codes["corp_name"].str.contains("하이닉스", na=False)].head()


총 기업 수: 113770


Unnamed: 0,corp_code,corp_name,stock_code
2777,360452,하이닉스제일차유동화전문회사,
5679,650717,하이닉스인재개발원,
97770,164779,SK하이닉스,660.0
98991,1265516,에스케이하이닉스시스템아이씨,
99647,1871672,에스케이하이닉스시스템아이씨우시솔루션스,


In [67]:
import io, zipfile, xml.etree.ElementTree as ET
import re
import numpy as np, pandas as pd, requests

def _to_number(x):
    if pd.isna(x):
        return np.nan
    s = str(x).strip()
    if s == "" or s == "-":
        return np.nan
    s = s.replace(",", "").replace("원", "").strip()
    # 괄호음수 (1234) -> -1234
    if re.match(r"^\(.*\)$", s):
        s = "-" + s[1:-1]
    # 삼각기호 △1234 -> -1234
    s = s.replace("△", "-")
    # 유니코드 마이너스(−)를 ASCII '-'로 치환
    s = s.replace("−", "-")
    try:
        return float(s)
    except:
        return np.nan

def _safe_div(a, b):
    a = pd.to_numeric(a, errors="coerce")
    b = pd.to_numeric(b, errors="coerce")
    with np.errstate(divide="ignore", invalid="ignore"):
        r = a / b
    return r.replace([np.inf, -np.inf], np.nan)

def lookup_corp_code_by_name(api_key: str, query: str):
    """
    회사명/종목코드로 DART corp_code 찾기.
    - 종목코드(6자리)나 회사명 일부를 넣어도 됨.
    """
    url = "https://opendart.fss.or.kr/api/corpCode.xml"
    r = requests.get(url, params={"crtfc_key": api_key}, timeout=60)
    r.raise_for_status()
    z = zipfile.ZipFile(io.BytesIO(r.content))
    xml_bytes = z.read(z.namelist()[0])
    root = ET.fromstring(xml_bytes)

    items = []
    q = query.strip().lower()
    for el in root.findall("list"):
        corp_code = el.findtext("corp_code")
        corp_name = el.findtext("corp_name") or ""
        stock_code = el.findtext("stock_code") or ""  # 상장 아닌 경우 빈 문자열
        if q in corp_name.lower() or (stock_code and q in stock_code.lower()):
            items.append({"corp_code": corp_code, "corp_name": corp_name, "stock_code": stock_code})
    return pd.DataFrame(items)

def get_financials_json(
    api_key: str,
    corp_code: str,
    years,
    prefer_cfs: bool = True,
    reprt_codes=("11011",),  # 필요시 ("11011","11012","11013")로 확장
) -> pd.DataFrame:
    base = "https://opendart.fss.or.kr/api/fnlttSinglAcntAll.json"

    id_map = {
        "ifrs-full_Revenue": "revenue",
        "ifrs-full_OperatingIncomeLoss": "op_income",
        "ifrs-full_ProfitLoss": "net_income",
        "ifrs-full_Assets": "assets",
        "ifrs-full_Liabilities": "liab",
        "ifrs-full_Equity": "equity",
        "dart_OperatingIncomeLoss": "op_income",
        "dart_Revenue": "revenue",
    }
    name_patterns = [
        (re.compile(r"매출|수익"), "revenue"),
        (re.compile(r"영업이익"), "op_income"),
        (re.compile(r"(지배(기업)?\s*소유주지분\s*)?당기순이익|순이익"), "net_income"),
        (re.compile(r"자산총계|총자산"), "assets"),
        (re.compile(r"부채총계|총부채"), "liab"),
        (re.compile(r"자본총계|총자본|지배(기업)?\s*소유주지분"), "equity"),
    ]

    ordered_cols = ["year", "revenue", "op_income", "net_income", "assets", "liab", "equity"]
    rows = []
    fs_order = ("CFS", "OFS") if prefer_cfs else ("OFS", "CFS")  # ← 연결 우선/개별 폴백

    for y in years:
        got = None
        last_err = None
        for rc in reprt_codes:
            for fs_div in fs_order:
                try:
                    j = requests.get(
                        base,
                        params={
                            "crtfc_key": api_key,
                            "corp_code": corp_code,
                            "bsns_year": y,
                            "reprt_code": rc,
                            "fs_div": fs_div,           # ★ 필수 파라미터
                        },
                        timeout=30,
                    ).json()
                except Exception as e:
                    last_err = f"요청 실패: {e}"
                    continue

                if j.get("status") == "000" and "list" in j and j["list"]:
                    got = j["list"]
                    break
                else:
                    last_err = f"{y}년 {rc} {fs_div} 응답: status={j.get('status')} message={j.get('message')}"
            if got is not None:
                break

        if got is None:
            print(f"[WARN] {y}년 호출 실패: {last_err}")
            continue

        df = pd.DataFrame(got)
        if df.empty:
            print(f"[WARN] {y}년 데이터 없음")
            continue

        # 값 숫자화
        df["value"] = df["thstrm_amount"].apply(_to_number)

        # account_id → account_nm 패턴
        picks = {}
        for _, row in df.iterrows():
            k = None
            aid = (row.get("account_id") or "").strip()
            if aid in id_map:
                k = id_map[aid]
            else:
                nm = (row.get("account_nm") or "").strip()
                for pat, key in name_patterns:
                    if pat.search(nm):
                        k = key
                        break
            if k and pd.notna(row["value"]) and k not in picks:
                picks[k] = row["value"]

        rows.append(pd.DataFrame([{"year": int(y), **picks}]))

    if not rows:
        return pd.DataFrame(columns=ordered_cols)

    out = pd.concat(rows, ignore_index=True).reindex(columns=ordered_cols)
    return out


def add_indicators(fin: pd.DataFrame) -> pd.DataFrame:
    fin = fin.copy()
    fin["ROE"] = (_safe_div(fin["net_income"], fin["equity"]) * 100).round(2)
    fin["DebtRatio"] = (_safe_div(fin["liab"], fin["equity"]) * 100).round(2)
    fin["OPMargin"] = (_safe_div(fin["op_income"], fin["revenue"]) * 100).round(2)
    return fin
# ===== 단위/대상 컬럼 정의 =====
UNIT_MAP = {
    "원": 1,
    "천원": 1e3,
    "만원": 1e4,
    "백만원": 1e6,
    "억원": 1e8,
    "조원": 1e12,
}
FIN_COLS = ["revenue", "op_income", "net_income", "assets", "liab", "equity"]
PCT_COLS = ["ROE", "DebtRatio", "OPMargin"]

def scale_financials(fin: pd.DataFrame, unit: str = "억원",
                     decimals: int = 1, fmt: bool = True) -> pd.DataFrame:
    """
    재무 숫자열을 원하는 단위(기본: 억원)로 축소.
    fmt=True 이면 콤마/소수/퍼센트까지 보기 좋은 문자열로 포맷.
    fmt=False 이면 숫자형 유지(그래프/계산용).
    """
    out = fin.copy()
    # 지표가 없으면 계산
    if not set(PCT_COLS).issubset(out.columns):
        out = add_indicators(out)

    scale = UNIT_MAP[unit]
    # 값 컬럼만 단위 축소
    out[FIN_COLS] = out[FIN_COLS].div(scale)

    if fmt:  # 화면용 포맷
        num_fmt = "{:,.%df}" % decimals
        for c in FIN_COLS:
            out[c] = out[c].apply(lambda v: "" if pd.isna(v) else num_fmt.format(v))
        for c in PCT_COLS:
            out[c] = out[c].apply(lambda v: "" if pd.isna(v) else f"{v:.2f}%")
        # 표 상단에 단위 표시
        out.insert(1, "단위", unit)

    return out


In [69]:
API_KEY = "32ba89ca15f1fde1cb7116b4e08b2959c861f4e4"
#회사명/종목코드로 corp_code 조회
df_code = lookup_corp_code_by_name(API_KEY, "000660") 
print(df_code.head())

corp_code = df_code.iloc[0]["corp_code"]  # 원하는 행 선택
years = [2021, 2022, 2023, 2024] 

fin = get_financials_json(API_KEY, corp_code, [2021,2022,2023,2024],
                          prefer_cfs=True,
                          reprt_codes=("11011","11012","11013"))
fin = add_indicators(fin)

# 화면용(문자열 포맷)
fin_view = scale_financials(fin, unit="억원", decimals=1, fmt=True)
print(fin_view)

# 계산/그래프용(숫자 유지)
fin_num = scale_financials(fin, unit="억원", fmt=False)

  corp_code corp_name stock_code
0  00164779    SK하이닉스     000660
   year  단위    revenue  op_income net_income       assets       liab  \
0  2021  억원   82,671.1  124,103.4  134,159.9    963,864.7  341,954.2   
1  2022  억원   51,860.5   68,094.2   40,027.8  1,038,715.1  405,809.7   
2  2023  억원   66,002.7  -77,303.1  -91,375.5  1,003,301.7  468,264.1   
3  2024  억원  130,190.1  234,673.2  197,969.0  1,198,552.1  459,395.0   

      equity      ROE DebtRatio  OPMargin  
0  621,910.6   21.57%    54.98%   150.12%  
1  632,905.4    6.32%    64.12%   131.30%  
2  535,037.5  -17.08%    87.52%  -117.12%  
3  739,157.0   26.78%    62.15%   180.25%  


In [53]:
def get_financials_json(
    api_key: str,
    corp_code: str,
    years,
    prefer_cfs: bool = True,
    reprt_codes=("11011",),   # 필요 시 ("11011","11012","11013")로 확장
) -> pd.DataFrame:
    """
    DART 단일회사 전체 재무제표 API (fnlttSinglAcntAll.json)
    - fs_div(개별/연결) 필수: CFS 또는 OFS
    - 연결 우선, 없으면 개별로 폴백
    """
    base = "https://opendart.fss.or.kr/api/fnlttSinglAcntAll.json"

    # IFRS id/계정명 매핑은 이전 코드 그대로 사용
    id_map = {
        "ifrs-full_Revenue": "revenue",
        "ifrs-full_OperatingIncomeLoss": "op_income",
        "ifrs-full_ProfitLoss": "net_income",
        "ifrs-full_Assets": "assets",
        "ifrs-full_Liabilities": "liab",
        "ifrs-full_Equity": "equity",
        "dart_OperatingIncomeLoss": "op_income",
        "dart_Revenue": "revenue",
    }
    import re
    name_patterns = [
        (re.compile(r"매출|수익"), "revenue"),
        (re.compile(r"영업이익"), "op_income"),
        (re.compile(r"(지배(기업)?\s*소유주지분\s*)?당기순이익|순이익"), "net_income"),
        (re.compile(r"자산총계|총자산"), "assets"),
        (re.compile(r"부채총계|총부채"), "liab"),
        (re.compile(r"자본총계|총자본|지배(기업)?\s*소유주지분"), "equity"),
    ]

    ordered_cols = ["year","revenue","op_income","net_income","assets","liab","equity"]
    rows = []
    fs_order = ("CFS","OFS") if prefer_cfs else ("OFS","CFS")

    for y in years:
        got = None
        last_err = None
        for rc in reprt_codes:
            for fs_div in fs_order:
                j = requests.get(
                    base,
                    params={
                        "crtfc_key": api_key,
                        "corp_code": corp_code,
                        "bsns_year": y,
                        "reprt_code": rc,
                        "fs_div": fs_div,   # <-- 필수 파라미터
                    },
                    timeout=30
                ).json()
                if j.get("status") == "000" and j.get("list"):
                    got = j["list"]
                    break
                else:
                    last_err = f"{y}년 {rc} {fs_div} 응답: status={j.get('status')} message={j.get('message')}"
            if got is not None:
                break

        if got is None:
            print(f"[WARN] {last_err}")
            continue

        df = pd.DataFrame(got)
        if df.empty:
            print(f"[WARN] {y}년 데이터 없음"); continue

        # 값 숫자화 (이전에 정의한 _to_number 사용)
        df["value"] = df["thstrm_amount"].apply(_to_number)

        # account_id → account_nm 패턴 매칭
        picks = {}
        for _, row in df.iterrows():
            k = None
            aid = (row.get("account_id") or "").strip()
            if aid in id_map:
                k = id_map[aid]
            else:
                nm = (row.get("account_nm") or "").strip()
                for pat, key in name_patterns:
                    if pat.search(nm):
                        k = key; break
            if k and pd.notna(row["value"]) and k not in picks:
                picks[k] = row["value"]

        pivot = pd.DataFrame([{"year": int(y), **picks}])
        rows.append(pivot)

    if not rows:
        return pd.DataFrame(columns=ordered_cols)

    out = pd.concat(rows, ignore_index=True).reindex(columns=ordered_cols)
    return out
