In [3]:
from datetime import datetime
import pandas as pd

import duckdb

# 데이터 수집

### 종목 기본정보 - kor_info

In [23]:
import requests
import pandas as pd

url = 'https://data.krx.co.kr/comm/bldAttendant/getJsonData.cmd'

headers = {
    'User-Agent': 'Mozilla/5.0',
    'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201020201'
}

payload = {
    'bld': 'dbms/MDC/STAT/standard/MDCSTAT01901',  #핵심 API!
    'mktId': 'ALL',   # STK: 코스피, KSQ: 코스닥, ALL: 전체
    'share': '1',
    'csvxls_isNo': 'false'
}

res = requests.post(url, headers=headers, data=payload)
json_data = res.json()
kor_info_raw = pd.DataFrame(json_data['OutBlock_1'])

# 숫자로 변환
cols_to_convert = ['PARVAL', 'LIST_SHRS']  # 변환할 컬럼 목록
for col in cols_to_convert:
    kor_info_raw[col] = kor_info_raw[col].astype(str).str.replace(',', '', regex=False)
    kor_info_raw[col] = pd.to_numeric(kor_info_raw[col], errors='coerce')


kor_info = duckdb.query(
    """
    select
        ISU_CD as 표준티커,
        ISU_SRT_CD as 티커,
        ISU_ABBRV as 종목명,
        MKT_TP_NM as 시장구분,
        STRPTIME(LIST_DD, '%Y/%m/%d') AS 상장일,
        KIND_STKCERT_TP_NM as 증권구분,
        PARVAL as 액면가,
        LIST_SHRS as 상장주식수
    from kor_info_raw
    """
).fetchdf()

kor_info.head()

Unnamed: 0,표준티커,티커,종목명,시장구분,상장일,증권구분,액면가,상장주식수
0,KR7098120009,98120,마이크로컨텍솔,KOSDAQ,2008-09-23,보통주,500.0,8312766
1,KR7009520008,9520,포스코엠텍,KOSDAQ GLOBAL,1997-11-10,보통주,500.0,41642703
2,KR7095570008,95570,AJ네트웍스,KOSPI,2015-08-21,보통주,1000.0,45252759
3,KR7006840003,6840,AK홀딩스,KOSPI,1999-08-11,보통주,5000.0,13247561
4,KR7282330000,282330,BGF리테일,KOSPI,2017-12-08,보통주,1000.0,17283906


### 전종목 현재가 - kor_price

In [5]:
# 실시간
today_str = datetime.today().strftime('%Y%m%d')

# 특정 조회날짜
# today_str = 20250715

In [6]:
import requests
import pandas as pd

# 요청 URL
url = 'http://data.krx.co.kr/comm/bldAttendant/getJsonData.cmd'

# 헤더 설정
headers = {
    'User-Agent': 'Mozilla/5.0',
    'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201020201'
}

# POST 요청의 form data (payload)
payload = {
    'bld': 'dbms/MDC/STAT/standard/MDCSTAT01501',  # 전종목 시세
    'locale': 'ko_KR',
    'mktId': 'ALL',             # 코스피: STK, 코스닥: KSQ, ALL로 하면 전체
    'trdDd': today_str,        # 특정 날짜
    'share': '1',
    'money': '1',
    'csvxls_isNo': 'false'
}

res = requests.post(url, headers=headers, data=payload)
data_json = res.json()
kor_price_raw = pd.DataFrame(data_json['OutBlock_1'])

# 숫자로 변환
exclude_cols = ['ISU_SRT_CD', 'ISU_CD', 'ISU_ABBRV', 'MKT_NM', 'SECT_TP_NM', 'MKT_ID'] # 제외할 칼럼 목록
for col in kor_price_raw.select_dtypes(include='object').columns:
    if col not in exclude_cols:
        kor_price_raw[col] = kor_price_raw[col].str.replace(',', '', regex=False)
        kor_price_raw[col] = pd.to_numeric(kor_price_raw[col], errors='coerce')


kor_price = duckdb.query(
    """
    select
        ISU_SRT_CD as 티커,
        ISU_ABBRV as 종목명,
        MKT_NM as 시장구분,
        TDD_CLSPRC as 종가,
        CMPPREVDD_PRC as 등락,
        FLUC_RT as 등락률,
        TDD_OPNPRC as 시가,
        TDD_HGPRC as 고가,
        TDD_LWPRC as 저가,
        ACC_TRDVOL as 거래량,
        ACC_TRDVAL as 거래대금,
        MKTCAP as 시가총액,
        LIST_SHRS as 상장주식수
    from kor_price_raw
    """
).fetchdf()

kor_price.head()

Unnamed: 0,티커,종목명,시장구분,종가,등락,등락률,시가,고가,저가,거래량,거래대금,시가총액,상장주식수
0,60310,3S,KOSDAQ,,,,,,,,,,53059040
1,95570,AJ네트웍스,KOSPI,,,,,,,,,,45252759
2,6840,AK홀딩스,KOSPI,,,,,,,,,,13247561
3,54620,APS,KOSDAQ,,,,,,,,,,19894221
4,265520,AP시스템,KOSDAQ,,,,,,,,,,15281421


### 업종분류현황 - kor_sector
- KONEX는 업종분류 없음

In [7]:
import requests
import pandas as pd

url = 'http://data.krx.co.kr/comm/bldAttendant/getJsonData.cmd'
headers = {
    'User-Agent': 'Mozilla/5.0',
    'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201020201'
}

# 시장별 결과를 담을 딕셔너리
sector_data = {}

# 시장코드 반복
for mkt_id in ['STK', 'KSQ']:
    payload = {
        'bld': 'dbms/MDC/STAT/standard/MDCSTAT03901',
        'locale': 'ko_KR',
        'mktId': mkt_id,
        'trdDd': today_str,
        'money': '1',
        'csvxls_isNo': 'false'
    }

    res = requests.post(url, headers=headers, data=payload)
    data = res.json()

    # block1 키로부터 DataFrame 생성
    sector_df = pd.DataFrame(data.get('block1', []))
    sector_data[f"{mkt_id}_sector_raw"] = sector_df

STK_sector_raw = sector_data['STK_sector_raw']
KSQ_sector_raw = sector_data['KSQ_sector_raw']


kor_sector = duckdb.query(
    """
    select
        ISU_SRT_CD as 티커,
        ISU_ABBRV as 종목명,
        MKT_TP_NM as 시장구분,
        IDX_IND_NM as 업종명
    from STK_sector_raw

    UNION ALL

    select
        ISU_SRT_CD as 티커,
        ISU_ABBRV as 종목명,
        MKT_TP_NM as 시장구분,
        IDX_IND_NM as 업종명
    from KSQ_sector_raw
    """
).fetchdf()

kor_sector.head()

Unnamed: 0,티커,종목명,시장구분,업종명
0,95570,AJ네트웍스,KOSPI,일반서비스
1,6840,AK홀딩스,KOSPI,기타금융
2,27410,BGF,KOSPI,기타금융
3,282330,BGF리테일,KOSPI,유통
4,138930,BNK금융지주,KOSPI,기타금융


### 일반조회

In [33]:
import requests
import pandas as pd

# 요청 URL
url = 'http://data.krx.co.kr/comm/bldAttendant/getJsonData.cmd'

# 요청 헤더
headers = {
    'User-Agent': 'Mozilla/5.0',
    'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201020201'
}

# 삼성전자 티커
isin_code = 'KR7005930003'

# 요청 본문
payload = {
    'bld': 'dbms/MDC/STAT/standard/MDCSTAT02104', # 1: 주가정보 3: 개요일반 4: 재무정보
    'locale': 'ko_KR',
    'tboxisuCd_finder_stkisu0_4': '005930/삼성전자',
    'isuCd': isin_code,
    'isuCd2': isin_code,
    'codeNmisuCd_finder_stkisu0_4': '삼성전자',
    'param1isuCd_finder_stkisu0_4': 'ALL',
    'csvxls_isNo': 'false'
}

# 요청 전송
res = requests.post(url, headers=headers, data=payload)
data = res.json()

# 결과 출력 (dict)
print(data)

# DataFrame으로 변환
df = pd.DataFrame([data])  # 단일 dict이므로 리스트로 감싸야 함
df.head()

{'INDVDL_CONN_TP': '2', 'ASST_TOTAMT': '514,531,948,000,000', 'DEBT_TOTAMT': '112,339,878,000,000', 'CAP': '897,514,000,000', 'CAP_GRNDTOT': '402,192,070,000,000', 'SALES': '300,870,903,000,000', 'OPERPROFT_AMT': '32,725,961,000,000', 'NETINCM': '34,451,351,000,000', 'CURRENT_DATETIME': '2025.07.16 AM 01:57:06'}


Unnamed: 0,INDVDL_CONN_TP,ASST_TOTAMT,DEBT_TOTAMT,CAP,CAP_GRNDTOT,SALES,OPERPROFT_AMT,NETINCM,CURRENT_DATETIME
0,2,514531948000000,112339878000000,897514000000,402192070000000,300870903000000,32725961000000,34451351000000,2025.07.16 AM 01:57:06


### 일반재무정보

### ETF조회

### 개별종목시세

# SQL

In [8]:
duckdb.query(
    """
    select count(*)
    from kor_price
    -- group by 시장구분
    """
).fetchdf()

Unnamed: 0,count_star()
0,2874


# DART 전자공시

In [9]:
import requests
import zipfile
import io
import pandas as pd

api_key = '3adc2cc74bd0f5806d6364d34476a332aa420bd5'

# DART에서 전체 기업 코드 ZIP 다운로드
url = f'https://opendart.fss.or.kr/api/corpCode.xml?crtfc_key={api_key}'
res = requests.get(url)
with zipfile.ZipFile(io.BytesIO(res.content)) as z:
    xml_content = z.read(z.namelist()[0])

# XML → DataFrame 변환
import xml.etree.ElementTree as ET
tree = ET.fromstring(xml_content)
corp_list = []
for child in tree:
    corp = {item.tag: item.text for item in child}
    corp_list.append(corp)
corp_df = pd.DataFrame(corp_list)

# 예: 삼성전자 찾기
corp_code = corp_df[corp_df['corp_name'] == 'SAMG엔터']['corp_code'].values[0]

In [10]:
# 사업보고서 (2023년 기준)
url = f'https://opendart.fss.or.kr/api/fnlttSinglAcnt.json'
params = {
    'crtfc_key': api_key,
    'corp_code': corp_code,
    'bsns_year': '2024',
    'reprt_code': '11011',  # 11011: 사업보고서, 11012: 1Q, 11013: 반기, 11014: 3Q
    'fs_div': 'CFS'         # 연결: CFS / 개별: OFS
}

res = requests.get(url, params=params)
data = res.json()

df_fnl = pd.DataFrame(data['list'])
df_fnl

Unnamed: 0,rcept_no,reprt_code,bsns_year,corp_code,stock_code,fs_div,fs_nm,sj_div,sj_nm,account_nm,...,thstrm_dt,thstrm_amount,frmtrm_nm,frmtrm_dt,frmtrm_amount,bfefrmtrm_nm,bfefrmtrm_dt,bfefrmtrm_amount,ord,currency
0,20250318001024,11011,2024,1082418,419530,CFS,연결재무제표,BS,재무상태표,유동자산,...,2024.12.31 현재,65699669512,제 24 기,2023.12.31 현재,60626359363,제 23 기,2022.12.31 현재,54719246284,1,KRW
1,20250318001024,11011,2024,1082418,419530,CFS,연결재무제표,BS,재무상태표,비유동자산,...,2024.12.31 현재,31650766588,제 24 기,2023.12.31 현재,39508123335,제 23 기,2022.12.31 현재,21961782500,3,KRW
2,20250318001024,11011,2024,1082418,419530,CFS,연결재무제표,BS,재무상태표,자산총계,...,2024.12.31 현재,97350436100,제 24 기,2023.12.31 현재,100134482698,제 23 기,2022.12.31 현재,76681028784,5,KRW
3,20250318001024,11011,2024,1082418,419530,CFS,연결재무제표,BS,재무상태표,유동부채,...,2024.12.31 현재,32183346101,제 24 기,2023.12.31 현재,21218756805,제 23 기,2022.12.31 현재,26191891492,7,KRW
4,20250318001024,11011,2024,1082418,419530,CFS,연결재무제표,BS,재무상태표,비유동부채,...,2024.12.31 현재,34392622548,제 24 기,2023.12.31 현재,31824206962,제 23 기,2022.12.31 현재,3400441854,9,KRW
5,20250318001024,11011,2024,1082418,419530,CFS,연결재무제표,BS,재무상태표,부채총계,...,2024.12.31 현재,66575968649,제 24 기,2023.12.31 현재,53042963767,제 23 기,2022.12.31 현재,29592333346,11,KRW
6,20250318001024,11011,2024,1082418,419530,CFS,연결재무제표,BS,재무상태표,자본금,...,2024.12.31 현재,4295465000,제 24 기,2023.12.31 현재,4295465000,제 23 기,2022.12.31 현재,4295465000,13,KRW
7,20250318001024,11011,2024,1082418,419530,CFS,연결재무제표,BS,재무상태표,이익잉여금,...,2024.12.31 현재,-86480262392,제 24 기,2023.12.31 현재,-67038706667,제 23 기,2022.12.31 현재,-49885911792,17,KRW
8,20250318001024,11011,2024,1082418,419530,CFS,연결재무제표,BS,재무상태표,자본총계,...,2024.12.31 현재,30774467451,제 24 기,2023.12.31 현재,47091518931,제 23 기,2022.12.31 현재,47088695438,21,KRW
9,20250318001024,11011,2024,1082418,419530,CFS,연결재무제표,IS,손익계산서,매출액,...,2024.01.01 ~ 2024.12.31,116438413800,제 24 기,2023.01.01 ~ 2023.12.31,95122568514,제 23 기,2022.01.01 ~ 2022.12.31,68322017040,23,KRW
