# 기본 데이터 수집

In [1]:
import pandas as pd
from datetime import datetime
import time
from tqdm import tqdm
import requests

import duckdb

### 종목 기본정보 - kor_info

In [2]:
url = 'https://data.krx.co.kr/comm/bldAttendant/getJsonData.cmd'

headers = {
    'User-Agent': 'Mozilla/5.0',
    'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201020201'
}

payload = {
    'bld': 'dbms/MDC/STAT/standard/MDCSTAT01901',  #핵심 API!
    'mktId': 'ALL',   # STK: 코스피, KSQ: 코스닥, ALL: 전체
    'share': '1',
    'csvxls_isNo': 'false'
}

res = requests.post(url, headers=headers, data=payload)
json_data = res.json()
kor_info_raw = pd.DataFrame(json_data['OutBlock_1'])

# 숫자로 변환
cols_to_convert = ['PARVAL', 'LIST_SHRS']  # 변환할 컬럼 목록
for col in cols_to_convert:
    kor_info_raw[col] = kor_info_raw[col].astype(str).str.replace(',', '', regex=False)
    kor_info_raw[col] = pd.to_numeric(kor_info_raw[col], errors='coerce')
# 날짜형 변환
kor_info_raw['LIST_DD'] = pd.to_datetime(kor_info_raw['LIST_DD'], format='%Y/%m/%d')


kor_info = duckdb.query(
    """
    select
        ISU_CD as 표준티커,
        ISU_SRT_CD as 티커,
        ISU_ABBRV as 종목명,
        MKT_TP_NM as 시장구분,
        LIST_DD AS 상장일,
        KIND_STKCERT_TP_NM as 증권구분,
        PARVAL as 액면가,
    from kor_info_raw
    """
).fetchdf()

kor_info.head()

Unnamed: 0,표준티커,티커,종목명,시장구분,상장일,증권구분,액면가
0,KR7098120009,98120,마이크로컨텍솔,KOSDAQ,2008-09-23,보통주,500.0
1,KR7009520008,9520,포스코엠텍,KOSDAQ GLOBAL,1997-11-10,보통주,500.0
2,KR7095570008,95570,AJ네트웍스,KOSPI,2015-08-21,보통주,1000.0
3,KR7006840003,6840,AK홀딩스,KOSPI,1999-08-11,보통주,5000.0
4,KR7282330000,282330,BGF리테일,KOSPI,2017-12-08,보통주,1000.0


### 전종목 현재가 - kor_price

In [3]:
# 실시간
today_str = datetime.today().strftime('%Y%m%d')

# 특정 조회날짜
# today_str = 20250818

In [4]:
# 요청 URL
url = 'http://data.krx.co.kr/comm/bldAttendant/getJsonData.cmd'

# 헤더 설정
headers = {
    'User-Agent': 'Mozilla/5.0',
    'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201020201'
}

# POST 요청의 form data (payload)
payload = {
    'bld': 'dbms/MDC/STAT/standard/MDCSTAT01501',  # 전종목 시세
    'locale': 'ko_KR',
    'mktId': 'ALL',             # 코스피: STK, 코스닥: KSQ, ALL로 하면 전체
    'trdDd': today_str,        # 특정 날짜
    'share': '1',
    'money': '1',
    'csvxls_isNo': 'false'
}

res = requests.post(url, headers=headers, data=payload)
data_json = res.json()
kor_price_raw = pd.DataFrame(data_json['OutBlock_1'])

# 숫자로 변환
exclude_cols = ['ISU_SRT_CD', 'ISU_CD', 'ISU_ABBRV', 'MKT_NM', 'SECT_TP_NM', 'MKT_ID'] # 제외할 칼럼 목록
for col in kor_price_raw.select_dtypes(include='object').columns:
    if col not in exclude_cols:
        kor_price_raw[col] = kor_price_raw[col].str.replace(',', '', regex=False)
        kor_price_raw[col] = pd.to_numeric(kor_price_raw[col], errors='coerce')


kor_price = duckdb.query(
    """
    select
        ISU_SRT_CD as 티커,
        TDD_CLSPRC as 종가,
        CMPPREVDD_PRC as 등락,
        FLUC_RT as 등락률,
        TDD_OPNPRC as 시가,
        TDD_HGPRC as 고가,
        TDD_LWPRC as 저가,
        ACC_TRDVOL as 거래량,
        ACC_TRDVAL as 거래대금,
        MKTCAP as 시가총액,
        LIST_SHRS as 상장주식수
    from kor_price_raw p
    """
).fetchdf()

kor_price.head()

Unnamed: 0,티커,종가,등락,등락률,시가,고가,저가,거래량,거래대금,시가총액,상장주식수
0,60310,1807,-21,-1.15,1800,1827,1770,159034,285732598,95877685280,53059040
1,95570,4140,-10,-0.24,4115,4150,4065,87679,359356492,187346422260,45252759
2,6840,11060,-210,-1.86,11210,11220,10910,6606,72748490,146518024660,13247561
3,54620,4515,-165,-3.53,4680,4680,4500,17554,79751900,89822407815,19894221
4,265520,16870,-180,-1.06,16750,17120,16740,104451,1763726670,257797572270,15281421


### 업종분류현황 - kor_sector
- KONEX는 업종분류 없음

In [5]:
url = 'http://data.krx.co.kr/comm/bldAttendant/getJsonData.cmd'
headers = {
    'User-Agent': 'Mozilla/5.0',
    'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201020201'
}

# 시장별 결과를 담을 딕셔너리
sector_data = {}

# 시장코드 반복
for mkt_id in ['STK', 'KSQ']:
    payload = {
        'bld': 'dbms/MDC/STAT/standard/MDCSTAT03901',
        'locale': 'ko_KR',
        'mktId': mkt_id,
        'trdDd': today_str,
        'money': '1',
        'csvxls_isNo': 'false'
    }

    res = requests.post(url, headers=headers, data=payload)
    data = res.json()

    # block1 키로부터 DataFrame 생성
    sector_df = pd.DataFrame(data.get('block1', []))
    sector_data[f"{mkt_id}_sector_raw"] = sector_df

STK_sector_raw = sector_data['STK_sector_raw']
KSQ_sector_raw = sector_data['KSQ_sector_raw']


kor_sector = duckdb.query(
    """
    select
        ISU_SRT_CD as 티커,
        IDX_IND_NM as 업종명
    from STK_sector_raw

    UNION ALL

    select
        ISU_SRT_CD as 티커,
        IDX_IND_NM as 업종명
    from KSQ_sector_raw
    """
).fetchdf()

kor_sector.head()

Unnamed: 0,티커,업종명
0,95570,일반서비스
1,6840,기타금융
2,27410,기타금융
3,282330,유통
4,138930,기타금융


In [6]:
kor_info.to_csv(f"/Users/leesangwon/Documents/ThemeStock_file/KRX_data/kor_info.csv", index=False)
kor_price.to_csv(f"/Users/leesangwon/Documents/ThemeStock_file/KRX_data/kor_price.csv", index=False)
kor_sector.to_csv(f"/Users/leesangwon/Documents/ThemeStock_file/KRX_data/kor_sector.csv", index=False)

# 본 분석

### 개별일반조회 - unit_info

In [29]:
# 요청 URL
url = 'http://data.krx.co.kr/comm/bldAttendant/getJsonData.cmd'

# 요청 헤더
headers = {
    'User-Agent': 'Mozilla/5.0',
    'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201020201'
}

# 요청 본문
payload = {
    'bld': 'dbms/MDC/STAT/standard/MDCSTAT02101', # 1: 일반정보 3: 개요 4: 재무정보
    'locale': 'ko_KR',
    'tboxisuCd_finder_stkisu0_4': '005930/삼성전자', ######
    'isuCd': 'KR7005930003', ######
    'isuCd2': 'KR7005930003', ######
    'codeNmisuCd_finder_stkisu0_4': '삼성전자', ######
    'param1isuCd_finder_stkisu0_4': 'ALL',
    'csvxls_isNo': 'false'
}

# 요청 전송
res = requests.post(url, headers=headers, data=payload)
data = res.json()

# DataFrame으로 변환
df = pd.DataFrame([data])
df.columns = ['표준티커', '종목명', '티커', '시장구분', '현재가', '등락', '증감라벨', '등락률',
              '시가', '고가', '저가', '거래량', '거래대금', '시가총액_백만원', '52주최고', '52주최저',
              '외국인비율', 'PER', 'PBR', '대용가', '배당수익률', '날짜', '날짜_시간']

# 숫자형 변환
for col in ['현재가', '등락', '등락률', '시가', '고가', '저가', '52주최고', '52주최저',
            'PER', 'PBR', '거래량', '거래대금', '외국인비율' ,'대용가' ,'배당수익률']:
    df[col] = pd.to_numeric(df[col].str.replace(",", ""), errors='coerce')


# 날짜형 변환
df['날짜_시간'] = pd.to_datetime(df['날짜_시간'], format='%Y.%m.%d %p %I:%M:%S')


unit_info = df[['날짜_시간', '표준티커', '종목명', '현재가', '등락', '등락률',
         '시가', '고가', '저가', '52주최고', '52주최저',
         'PER', 'PBR', '거래량', '거래대금', '외국인비율', '배당수익률', '대용가']]
unit_info.head()



Unnamed: 0,날짜_시간,표준티커,종목명,현재가,등락,등락률,시가,고가,저가,52주최고,52주최저,PER,PBR,거래량,거래대금,외국인비율,배당수익률,대용가
0,2025-08-18 15:36:06,KR7005930003,삼성전자,70000,-1600,-2.23,71100,71200,70000,78900,49900,14.14,1.21,13549958,953512936500,50.57,2.07,54410


#### 일반조회 종목열거 코드

In [7]:
# 검색종목 칼럼 + 검색조건
df = duckdb.query(
    """
    select *
    from kor_info
    
    """
).fetchdf()
kor_info_list = df.to_dict('records')

# 결과 저장용 리스트
results = []

# 반복 처리
for t in tqdm(kor_info_list):
    try:
        url = 'http://data.krx.co.kr/comm/bldAttendant/getJsonData.cmd'

        headers = {
            'User-Agent': 'Mozilla/5.0',
            'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201020201'
        }

        payload = {
            'bld': 'dbms/MDC/STAT/standard/MDCSTAT02101',
            'locale': 'ko_KR',
            'tboxisuCd_finder_stkisu0_4': f"{t['티커']}/{t['종목명']}",
            'isuCd': t['표준티커'],
            'isuCd2': t['표준티커'],
            'codeNmisuCd_finder_stkisu0_4': t['종목명'],
            'param1isuCd_finder_stkisu0_4': 'ALL',
            'csvxls_isNo': 'false'
        }

        res = requests.post(url, headers=headers, data=payload)
        data = res.json()

        # JSON 응답에서 DataFrame 생성
        df = pd.DataFrame([data])
        df.columns = ['표준티커', '종목명', '티커', '시장구분', '현재가', '등락', '증감라벨', '등락률',
                      '시가', '고가', '저가', '거래량', '거래대금', '시가총액_백만원', '52주최고', '52주최저',
                      '외국인비율', 'PER', 'PBR', '대용가', '배당수익률', '날짜', '날짜_시간']

        # 숫자형 변환
        for col in ['현재가', '등락', '등락률', '시가', '고가', '저가', '52주최고', '52주최저',
                    'PER', 'PBR', '거래량', '거래대금', '외국인비율', '대용가', '배당수익률']:
            df[col] = pd.to_numeric(df[col].str.replace(",", ""), errors='coerce')

        # 날짜형 변환
        df['날짜_시간'] = pd.to_datetime(df['날짜_시간'], format='%Y.%m.%d %p %I:%M:%S')

        # 원하는 컬럼만 추출
        unit_info = df[['날짜_시간', '시장구분', '티커', '종목명',
                        '시가', '고가', '저가', '52주최고', '52주최저', 'PER', 'PBR', '외국인비율', '배당수익률', '대용가']]

        results.append(unit_info)

        # 요청 간 0.5~1.0초 랜덤 대기 (너무 빠르면 차단될 수 있음)
        time.sleep(0.2)

    except Exception as e:
        print(f"{t['종목명']} 실패: {e}")
        continue

# 결과 통합
all_info = pd.concat(results, ignore_index=True)
all_info


100%|██████████| 2880/2880 [18:05<00:00,  2.65it/s]


Unnamed: 0,날짜_시간,시장구분,티커,종목명,시가,고가,저가,52주최고,52주최저,PER,PBR,외국인비율,배당수익률,대용가
0,2025-08-20 13:48:07,KOSDAQ,098120,마이크로컨텍솔,15950,16800,15920,24300,4250,12.44,2.04,11.69,0.50,10450
1,2025-08-20 13:48:08,KOSDAQ GLOBAL,009520,포스코엠텍,13400,13650,13280,19190,10460,958.57,5.00,5.09,0.15,10170
2,2025-08-20 13:48:08,KOSPI,095570,AJ네트웍스,4115,4150,4065,4970,3550,8.55,0.42,4.20,6.51,3070
3,2025-08-20 13:48:08,KOSPI,006840,AK홀딩스,11210,11220,10910,13690,9640,,0.27,5.69,3.62,7880
4,2025-08-20 13:48:09,KOSPI,282330,BGF리테일,120000,120000,115800,129800,99000,10.44,1.72,28.55,3.47,93130
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2875,2025-08-20 14:06:10,KOSPI,000545,흥국화재우,8120,8120,7950,19890,4200,,,11.25,0.00,4920
2876,2025-08-20 14:06:10,KOSPI,000540,흥국화재,4065,4145,4015,5850,2955,2.80,0.34,1.92,0.00,2720
2877,2025-08-20 14:06:11,KOSPI,003280,흥아해운,1668,1669,1630,2370,1420,10.03,1.74,2.21,0.00,1210
2878,2025-08-20 14:06:11,KOSDAQ,037440,희림,4705,4740,4510,6250,4400,4.49,0.71,7.03,3.27,3270


In [8]:
all_info.to_csv(f"/Users/leesangwon/Documents/ThemeStock_file/KRX_data/all_info.csv", index=False)

### 개별재무정보 - unit_financial

In [30]:
# 요청 URL
url = 'http://data.krx.co.kr/comm/bldAttendant/getJsonData.cmd'

# 요청 헤더
headers = {
    'User-Agent': 'Mozilla/5.0',
    'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201020201'
}

# 요청 본문
payload = {
    'bld': 'dbms/MDC/STAT/standard/MDCSTAT02104', # 1: 일반정보 3: 개요 4: 재무정보
    'locale': 'ko_KR',
    'tboxisuCd_finder_stkisu0_4': '005930/삼성전자', ######
    'isuCd': 'KR7005930003', ######
    'isuCd2': 'KR7005930003', ######
    'codeNmisuCd_finder_stkisu0_4': '삼성전자', ######
    'param1isuCd_finder_stkisu0_4': 'ALL',
    'csvxls_isNo': 'false'
}

# 요청 전송
res = requests.post(url, headers=headers, data=payload)
data = res.json()

# DataFrame으로 변환
df = pd.DataFrame([data])
df['표준티커'] =  'KR7005930003' ######
df.columns = ['CONN_TP', '자산', '부채', '자본금', '자본', '매출액', '영업이익', '당기순이익', '날짜_시간', '표준티커']

# 숫자형 변환
for col in ['자산', '부채', '자본', '자본금', '매출액', '영업이익', '당기순이익']:
    df[col] = pd.to_numeric(df[col].str.replace(",", ""), errors='coerce')


# 날짜형 변환
df['날짜_시간'] = pd.to_datetime(df['날짜_시간'], format='%Y.%m.%d %p %I:%M:%S')

unit_financial = df[['날짜_시간', '표준티커', '자산', '부채', '자본', '자본금', '매출액', '영업이익', '당기순이익']]
unit_financial.head()


Unnamed: 0,날짜_시간,표준티커,자산,부채,자본,자본금,매출액,영업이익,당기순이익
0,2025-08-18 15:36:13,KR7005930003,514531948000000,112339878000000,402192070000000,897514000000,300870903000000,32725961000000,34451351000000


#### 재무정보 종목열거 코드

In [9]:
# 검색 설정
df = duckdb.query(
    """
    select *
    from kor_info
    
    """
).fetchdf()

kor_info_list = df.to_dict('records')

# 결과 저장용 리스트
results = []

# 반복 처리
for t in tqdm(kor_info_list):
    try:
        url = 'http://data.krx.co.kr/comm/bldAttendant/getJsonData.cmd'

        headers = {
            'User-Agent': 'Mozilla/5.0',
            'Referer': 'http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201020201'
        }

        payload = {
            'bld': 'dbms/MDC/STAT/standard/MDCSTAT02104',
            'locale': 'ko_KR',
            'tboxisuCd_finder_stkisu0_4': f"{t['티커']}/{t['종목명']}",
            'isuCd': t['표준티커'],
            'isuCd2': t['표준티커'],
            'codeNmisuCd_finder_stkisu0_4': t['종목명'],
            'param1isuCd_finder_stkisu0_4': 'ALL',
            'csvxls_isNo': 'false'
        }

        res = requests.post(url, headers=headers, data=payload)
        data = res.json()

        # 응답을 DataFrame으로 변환
        df = pd.DataFrame([data])
        df['표준티커'] = t['표준티커']

        df.columns = ['CONN_TP', '자산', '부채', '자본금', '자본', '매출액', '영업이익', '당기순이익', '날짜_시간', '표준티커']

        # 숫자형 변환 (결측치는 NaN으로)
        for col in ['자산', '부채', '자본', '자본금', '매출액', '영업이익', '당기순이익']:
            df[col] = pd.to_numeric(df[col].str.replace(",", ""), errors='coerce')

        # 날짜형 변환
        df['날짜_시간'] = pd.to_datetime(df['날짜_시간'], format='%Y.%m.%d %p %I:%M:%S')

        # 원하는 컬럼만 추출
        unit_financial = df[['날짜_시간', '표준티커', '자산', '부채', '자본', '자본금', '매출액', '영업이익', '당기순이익']]

        results.append(unit_financial)

        # 과도한 요청 방지
        time.sleep(0.15)

    except Exception as e:
        print(f"### {t['종목명']} 오류 발생: {e}")
        continue

# ▶ 결과 합치기
all_financial = pd.concat(results, ignore_index=True)
all_financial

100%|██████████| 2880/2880 [14:39<00:00,  3.27it/s]


Unnamed: 0,날짜_시간,표준티커,자산,부채,자본,자본금,매출액,영업이익,당기순이익
0,2025-08-20 14:06:12,KR7098120009,76917404000,11858904000,65058500000,4156383000,69664083000,10204007000,10680949000
1,2025-08-20 14:06:13,KR7009520008,149757864000,37944731000,111813134000,20821352000,346628290000,1422749000,567504000
2,2025-08-20 14:06:13,KR7095570008,1717770434000,1274047256000,443723178000,46822295000,1014156314000,72790208000,21600801000
3,2025-08-20 14:06:13,KR7006840003,5336874928000,4091846427000,1245028501000,66237805000,4488280533000,130320128000,-16585695000
4,2025-08-20 14:06:13,KR7282330000,3402590176000,2218457219000,1184132957000,17283906000,8698757469000,251641247000,195217876000
...,...,...,...,...,...,...,...,...,...
2875,2025-08-20 14:20:50,KR7000541003,12894689000000,12122030000000,772659000000,325821000000,3374016000000,148779000000,106722000000
2876,2025-08-20 14:20:50,KR7000540005,12894689000000,12122030000000,772659000000,325821000000,3374016000000,148779000000,106722000000
2877,2025-08-20 14:20:51,KR7003280005,443559318000,209520122000,234039196000,120212450000,188000982000,27541002000,39961399000
2878,2025-08-20 14:20:51,KR7037440005,208298374000,126168802000,82129572000,6961238000,240961112000,15279029000,13428368000


In [10]:
all_financial.to_csv(f"/Users/leesangwon/Documents/ThemeStock_file/KRX_data/all_financial.csv", index=False)

### 개별종목시세 - unit_price

In [71]:
# 요청 URL
url = "http://data.krx.co.kr/comm/bldAttendant/getJsonData.cmd"

# 요청 헤더
headers = {
    "User-Agent": "Mozilla/5.0",
    "Referer": "http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201020201",
    "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
    "X-Requested-With": "XMLHttpRequest"
}

# 요청 파라미터 
data = {
    "bld": "dbms/MDC/STAT/standard/MDCSTAT01701",
    "locale": "ko_KR",
    "tboxisuCd_finder_stkisu0_1": "035720/카카오", ######
    "isuCd": "KR7035720002", ######
    "isuCd2": "KR7035720002", ######
    "codeNmisuCd_finder_stkisu0_1": "카카오", ######
    "param1isuCd_finder_stkisu0_1": "ALL",
    "strtDd": "20230730", ######
    "endDd": "20250725", ######
    "adjStkPrc_check": "Y",
    "adjStkPrc": "2",
    "share": "1",
    "money": "1",
    "csvxls_isNo": "false"
}

# POST 요청
res = requests.post(url, headers=headers, data=data)

# 결과
json_data = res.json()
df = pd.DataFrame(json_data['output'])

# 컬럼 이름 정리
df.columns = [col.strip() for col in df.columns]
df = df.rename(columns={
    'TRD_DD': '일자',
    'TDD_CLSPRC': '종가',
    'FLUC_TP_CD': '등락구분',
    'CMPPREVDD_PRC': '등락',
    'FLUC_RT': '등락률',
    'TDD_OPNPRC': '시가',
    'TDD_HGPRC': '고가',
    'TDD_LWPRC': '저가',
    'ACC_TRDVOL': '거래량',
    'ACC_TRDVAL': '거래대금',
    'MKTCAP' : '시가총액',
    'LIST_SHRS' : '상장주식수'
})

# 숫자형 변환
for col in ['종가', '등락', '등락률', '시가', '고가', '저가', '거래량', '거래대금', '시가총액', '상장주식수']:
    df[col] = pd.to_numeric(df[col].str.replace(",", ""), errors='coerce')

df['일자'] = pd.to_datetime(df['일자'], format='%Y/%m/%d')
df['표준티커'] = "KR7035720002" ######

unit_price = df[['일자', '표준티커', '종가', '등락', '시가', '고가', '저가', '거래량', '거래대금', '시가총액', '상장주식수']]
unit_price

Unnamed: 0,일자,표준티커,종가,등락,시가,고가,저가,거래량,거래대금,시가총액,상장주식수
0,2025-07-25,KR7035720002,55800,1700,54000,55900,53700,1822409,100515827450,24664365687600,442013722
1,2025-07-24,KR7035720002,54100,-900,55500,55900,53500,1968455,106958924400,23912942360200,442013722
2,2025-07-23,KR7035720002,55000,-1500,56700,56900,54100,1861734,102443926450,24310754710000,442013722
3,2025-07-22,KR7035720002,56500,-400,57100,57100,56100,1274106,71806668900,24973775293000,442013722
4,2025-07-21,KR7035720002,56900,0,56700,57400,56400,1111739,63188610600,25150580781800,442013722
...,...,...,...,...,...,...,...,...,...,...,...
478,2023-08-04,KR7035720002,51800,-1500,53800,55000,51000,3173204,166585777500,23012838422000,444263290
479,2023-08-03,KR7035720002,53300,100,52800,53800,52300,2146867,114116055900,23679233357000,444263290
480,2023-08-02,KR7035720002,53200,-1700,55100,55100,52300,2936126,157094023000,23634807028000,444263290
481,2023-08-01,KR7035720002,54900,3600,51800,55300,51800,7074120,384759856500,24390054621000,444263290


#### 5영업일 unit수익률 코드

In [None]:
# 요청 URL
url = "http://data.krx.co.kr/comm/bldAttendant/getJsonData.cmd"

# 요청 헤더
headers = {
    "User-Agent": "Mozilla/5.0",
    "Referer": "http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201020201",
    "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
    "X-Requested-With": "XMLHttpRequest"
}

# 요청 파라미터
data = {
    "bld": "dbms/MDC/STAT/standard/MDCSTAT01701",
    "locale": "ko_KR",
    "tboxisuCd_finder_stkisu0_1": "005930/삼성전자", ######
    "isuCd": "KR7005930003", ######
    "isuCd2": "KR7005930003", ######
    "codeNmisuCd_finder_stkisu0_1": "삼성전자", ######
    "param1isuCd_finder_stkisu0_1": "ALL",
    "strtDd": "20230727", ######
    "endDd": "20250725", ######
    "adjStkPrc_check": "Y",
    "adjStkPrc": "2",
    "share": "1",
    "money": "1",
    "csvxls_isNo": "false"
}

res = requests.post(url, headers=headers, data=data)

json_data = res.json()
df = pd.DataFrame(json_data['output'])

# 컬럼 이름 정리
df.columns = [col.strip() for col in df.columns]
df = df.rename(columns={
    'TRD_DD': '일자',
    'TDD_CLSPRC': '종가',
    'FLUC_TP_CD': '등락구분',
    'CMPPREVDD_PRC': '등락',
    'FLUC_RT': '등락률',
    'TDD_OPNPRC': '시가',
    'TDD_HGPRC': '고가',
    'TDD_LWPRC': '저가',
    'ACC_TRDVOL': '거래량',
    'ACC_TRDVAL': '거래대금',
    'MKTCAP' : '시가총액',
    'LIST_SHRS' : '상장주식수'
})

# 숫자형 변환
for col in ['종가', '등락', '등락률', '시가', '고가', '저가', '거래량', '거래대금', '시가총액', '상장주식수']:
    df[col] = pd.to_numeric(df[col].str.replace(",", ""), errors='coerce')

df['일자'] = pd.to_datetime(df['일자'], format='%Y/%m/%d')
df['표준티커'] = "KR7005930003" ######

unit_price = df[['일자', '표준티커', '종가', '등락', '시가', '고가', '저가', '거래량', '거래대금', '시가총액', '상장주식수']]

#################
return_5day = duckdb.query(
    """
    select 일자, 표준티커, 종가,
        round((종가 - lag(종가, 4) over (order by 일자))
        / lag(종가, 4) over (order by 일자) * 100, 2) as 수익률_5일
    from unit_price
    order by 일자 desc
    limit 1
    """
).fetchdf()

return_5day

Unnamed: 0,일자,표준티커,종가,수익률_5일
0,2025-07-25,KR7005930003,65900,-2.8


#### 5영업일 종목열거 코드

In [47]:
# 종목 리스트(표준티커, 티커, 종목명) + 조건설정(시장구분이나 섹터 특정 등)
df = duckdb.query(
    """
    select *
    from kor_info
    where 시장구분 = 'KOSDAQ'
    """
).fetchdf()

kor_info_list = df.to_dict('records')

# 결과 저장용 리스트
results = []

# 반복 처리
for t in tqdm(kor_info_list):
    # 요청 파라미터
    data = {
        "bld": "dbms/MDC/STAT/standard/MDCSTAT01701",
        "locale": "ko_KR",
        "tboxisuCd_finder_stkisu0_1": f"{t['티커']}/{t['종목명']}",
        "isuCd": t["표준티커"],
        "isuCd2": t["표준티커"],
        "codeNmisuCd_finder_stkisu0_1": t["종목명"],
        "param1isuCd_finder_stkisu0_1": "ALL",
        "strtDd": "20250714", #########
        "endDd": "20250718", #########
        "adjStkPrc_check": "Y",
        "adjStkPrc": "2",
        "share": "1",
        "money": "1",
        "csvxls_isNo": "false"
    }

    # 요청
    url = "http://data.krx.co.kr/comm/bldAttendant/getJsonData.cmd"
    headers = {
        "User-Agent": "Mozilla/5.0",
        "Referer": "http://data.krx.co.kr/contents/MDC/MDI/mdiLoader/index.cmd?menuId=MDC0201020201",
        "Content-Type": "application/x-www-form-urlencoded; charset=UTF-8",
        "X-Requested-With": "XMLHttpRequest"
    }
    res = requests.post(url, headers=headers, data=data)
    time.sleep(0.05)

    try:
        json_data = res.json()
        df = pd.DataFrame(json_data['output'])

        # 컬럼 이름 정리 및 숫자형 변환
        df.columns = [col.strip() for col in df.columns]
        df = df.rename(columns={
            'TRD_DD': '일자',
            'TDD_CLSPRC': '종가',
            'FLUC_TP_CD': '등락구분',
            'CMPPREVDD_PRC': '등락',
            'FLUC_RT': '등락률',
            'TDD_OPNPRC': '시가',
            'TDD_HGPRC': '고가',
            'TDD_LWPRC': '저가',
            'ACC_TRDVOL': '거래량',
            'ACC_TRDVAL': '거래대금',
            'MKTCAP': '시가총액',
            'LIST_SHRS': '상장주식수'
        })
        for col in ['종가', '등락', '등락률', '시가', '고가', '저가', '거래량', '거래대금', '시가총액', '상장주식수']:
            df[col] = pd.to_numeric(df[col].str.replace(",", ""), errors='coerce')

        df['일자'] = pd.to_datetime(df['일자'], format='%Y/%m/%d')
        df['표준티커'] = t["표준티커"]

        unit_price = df[['일자', '표준티커', '종가']]

        # 5일 수익률 계산 ###############
        return_5day = duckdb.query(
            """
            select 일자, 표준티커, 종가,
                round((종가 - lag(종가, 4) over (order by 일자))
                / lag(종가, 4) over (order by 일자) * 100, 2) as 수익률_5일
            from unit_price
            order by 일자 desc
            limit 1
            """
        ).fetchdf()

        results.append(return_5day)

    except Exception as e:
        print(f"[ERROR] {t['종목명']} 처리 중 오류 발생: {e}")

# 모든 결과 하나로 합치기
return_5day = pd.concat(results, ignore_index=True)

# 결과 출력
return_5day.head()

  2%|▏         | 41/1745 [00:05<03:33,  7.99it/s]


KeyboardInterrupt: 

In [None]:
sector_5day_return_rankone = duckdb.query(
    """
    select 일자, 표준티커, 종가, 수익률_5일
    from return_5day
    order by 수익률_5일 desc
    """
).fetchdf()

sector_5day_return_rankone

In [32]:
sector_5day_return_rankone = duckdb.query(
    """
    select *
    from (
        select 
            일자, 시장구분, i.표준티커, i.티커, 업종명, 종목명, 수익률_5일,
            rank() over(partition by 업종명 order by 수익률_5일 desc) as sector_rank
        from return_5day r
        join kor_info i using(표준티커)
        join kor_sector s on i.티커 = s.티커
    ) t
    where sector_rank = 1
    order by 수익률_5일 desc
    """
).fetchdf()

sector_5day_return_rankone

CatalogException: Catalog Error: Table with name return_5day does not exist!
Did you mean "pg_enum"?

# SQL

In [77]:
df = duckdb.query(
    """
select 일자, 표준티커, 종가,
    round((종가 - lag(종가, 4) over (order by 일자))
    / lag(종가, 4) over (order by 일자) * 100, 2) as 수익률_5일
from unit_price
order by 일자 desc
    """
).fetchdf()
df

Unnamed: 0,일자,표준티커,종가,수익률_5일
0,2025-07-25,KR7005930003,65900,-2.80
1,2025-07-24,KR7005930003,66000,-1.64
2,2025-07-23,KR7005930003,66400,-0.45
3,2025-07-22,KR7005930003,66000,2.01
4,2025-07-21,KR7005930003,67800,6.44
...,...,...,...,...
480,2023-08-02,KR7005930003,69900,-2.51
481,2023-08-01,KR7005930003,71100,
482,2023-07-31,KR7005930003,69800,
483,2023-07-28,KR7005930003,70600,


In [75]:
duckdb.query(
    """
    select 티커, 종목명, 종가
    from kor_price p
    join kor_info i using(티커)
    where 종목명 = '삼성전자'
    """
).fetchdf()

Unnamed: 0,티커,종목명,종가
0,5930,삼성전자,65900


# DART 전자공시

In [None]:
import requests
import zipfile
import io
import pandas as pd

api_key = '3adc2cc74bd0f5806d6364d34476a332aa420bd5'

# DART에서 전체 기업 코드 ZIP 다운로드
url = f'https://opendart.fss.or.kr/api/corpCode.xml?crtfc_key={api_key}'
res = requests.get(url)
with zipfile.ZipFile(io.BytesIO(res.content)) as z:
    xml_content = z.read(z.namelist()[0])

# XML → DataFrame 변환
import xml.etree.ElementTree as ET
tree = ET.fromstring(xml_content)
corp_list = []
for child in tree:
    corp = {item.tag: item.text for item in child}
    corp_list.append(corp)
corp_df = pd.DataFrame(corp_list)

# 예: 삼성전자 찾기
corp_code = corp_df[corp_df['corp_name'] == '삼성전자']['corp_code'].values[0]

In [None]:
# 사업보고서 (2023년 기준)
url = f'https://opendart.fss.or.kr/api/fnlttSinglAcnt.json'
params = {
    'crtfc_key': api_key,
    'corp_code': corp_code,
    'bsns_year': '2024',
    'reprt_code': '11011',  # 11011: 사업보고서, 11012: 1Q, 11013: 반기, 11014: 3Q
    'fs_div': 'CFS'         # 연결: CFS / 개별: OFS
}

res = requests.get(url, params=params)
data = res.json()

df_fnl = pd.DataFrame(data['list'])
df_fnl