# EDA

In [2]:
from pathlib import Path

import fn_dg6_ingest
from utils.chkxp_ingest import open as chkxp_open

## Load data

### Check Expert 

- kodex etf 1m
- kodex etf 10s 
- kospi 200 & kospi 200 mini & vkospi futures 1m
- ktb 3y & 10y & spread futures 1m

In [3]:
RAW_DIR = Path("../data/raw/chkxp")
DB_DIR  = Path("../data/db/chkxp")

DATASETS = {
    "etf_1m":  RAW_DIR / "chkxp_etf(kodex200)_(1m)_ohlcvNAV.csv",
    "etf_10s": RAW_DIR / "chkxp_etf(kodex200)_(10s)_ohlcvNAVlob.csv",
    "kp200":   RAW_DIR / "kp200_(fut)(mini)(v)_(1m)_from(20250101)_to(20260207).csv",
    "ktb":     RAW_DIR / "ktb_(3)(10)_(fut)(spread)(2nd)_(1m)_from(20200101)_to(20260207).csv",
}

In [4]:
ds = {}
for name, csv_path in DATASETS.items():
    output_dir = DB_DIR / csv_path.stem
    ds[name] = chkxp_open(str(csv_path), output_dir=str(output_dir))
    info = ds[name].describe()
    print(f"{name:10s}  {info.format_name:15s}  {info.frequency:4s}  {info.shape[0]:>10,} rows  entities={info.entities}")

etf_1m      single_entity    1M        23,800 rows  entities=['KODEX 200']
etf_10s     single_entity    10S       49,200 rows  entities=['KODEX 200']
kp200       multi_entity     1M       786,800 rows  entities=['KOSPI200 선물 2603', 'K200 스프레드 6366', 'KOSPI200 선물 2606', 'MINI KOSPI200 선물 2602', 'MINI K200 스프레드 6263', 'V-KOSPI200 선물 2602', 'V-KOSPI200 스프레드 6263']
ktb         multi_entity     1M     1,513,800 rows  entities=['(N)KTB3 선물 2603', '(N)KTB3 스프레드 6366', '(N)KTB3 선물 2606', '(N)KTB10 선물 2603', '(N)KTB10 스프레드 6366', '(N)KTB10 선물 2606']


In [5]:
df_etf_1m  = ds["etf_1m"].load()
df_etf_10s = ds["etf_10s"].load()
df_kp200   = ds["kp200"].load()
df_ktb     = ds["ktb"].load()

In [14]:
df_etf_1m

Unnamed: 0,datetime,entity,entity_code,Intra시가,Intra고가,Intra저가,Intra종가,Intra매도거래량,Intra매수거래량,IntraETP기초지수,Intra장중지표가치(iNAV/iIV)시가,Intra장중지표가치(iNAV/iIV)고가,Intra장중지표가치(iNAV/iIV)저가,Intra장중지표가치(iNAV/iIV)종가,IntraETP괴리율,Intra추적오차율
0,2025-11-11 09:01:00,KODEX 200,069500*001,58450.0,58720.0,58450.0,58705.0,40040.0,145573.0,586.33,58501.73,58776.20,58487.99,58773.38,-0.11,0.01
1,2025-11-11 09:02:00,KODEX 200,069500*001,58720.0,58810.0,58625.0,58660.0,96993.0,106907.0,586.35,58773.38,58838.48,58719.31,58779.26,-0.20,0.02
2,2025-11-11 09:03:00,KODEX 200,069500*001,58660.0,58770.0,58605.0,58745.0,70163.0,62650.0,586.63,58779.26,58874.70,58709.60,58823.43,-0.14,0.04
3,2025-11-11 09:04:00,KODEX 200,069500*001,58740.0,58815.0,58675.0,58700.0,82122.0,48424.0,586.40,58823.43,58916.00,58767.96,58780.09,-0.14,0.01
4,2025-11-11 09:05:00,KODEX 200,069500*001,58695.0,58810.0,58685.0,58800.0,38236.0,26858.0,587.43,58780.09,58890.11,58776.59,58881.69,-0.13,0.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23795,NaT,KODEX 200,069500*001,,,,,,,,,,,,,
23796,NaT,KODEX 200,069500*001,,,,,,,,,,,,,
23797,NaT,KODEX 200,069500*001,,,,,,,,,,,,,
23798,NaT,KODEX 200,069500*001,,,,,,,,,,,,,


### FnGuide data

In [6]:
FN_RAW_DIR = Path("../data/raw/fnguide")
FN_DB_DIR  = Path("../data/db/fnguide")

FN_DATASETS = {
    "ohlcv":      FN_RAW_DIR / "dataguide_kse+kosdaq_ohlcv_from(20160101)_to(20260207).csv",
    "consensus":  FN_RAW_DIR / "dataguide_kse+kosdaq_sales-consensus_from(20180101)_to(20260207).csv",
    "etf_const":  FN_RAW_DIR / "dataguide_etfconst(kodex200)_from(20250101)_to(20260207).csv",
}

In [7]:
fn_ds = {}
for name, csv_path in FN_DATASETS.items():
    output_dir = FN_DB_DIR / csv_path.stem
    fn_ds[name] = fn_dg6_ingest.open(str(csv_path), output_dir=str(output_dir))
    info = fn_ds[name].describe()
    print(f"{name:12s}  {info.format_name:20s}  {info.shape}  entities={info.entities}")

ohlcv         timeseries_wide       {'default': (7613009, 9)}  entities=4071
consensus     timeseries_wide       {'default': (4913310, 13)}  entities=4071
etf_const     misc_etf              {'default': (53836, 8)}  entities=0


In [8]:
df_ohlcv     = fn_ds["ohlcv"].load()
df_consensus = fn_ds["consensus"].load()
df_etf_const = fn_ds["etf_const"].load()

In [11]:
df_etf_const

Unnamed: 0,date,ETF코드,ETF명,구성종목코드,구성종목,주식수(계약수),금액,금액기준 구성비중(%)
0,2025-01-02,A069500,KODEX 200,,원화현금,,9945243,0.62
1,2025-01-02,A069500,KODEX 200,A000080,하이트진로,47.0,914620,0.06
2,2025-01-02,A069500,KODEX 200,A000100,유한양행,91.0,10765300,0.67
3,2025-01-02,A069500,KODEX 200,A000120,CJ대한통운,17.0,1429700,0.09
4,2025-01-02,A069500,KODEX 200,A000150,두산,11.0,2915000,0.18
...,...,...,...,...,...,...,...,...
53831,2026-02-06,A069500,KODEX 200,A000100,유한양행,90.0,9432000,0.25
53832,2026-02-06,A069500,KODEX 200,A454910,두산로보틱스,29.0,2949300,0.08
53833,2026-02-06,A069500,KODEX 200,A457190,이수스페셜티케미컬,30.0,3000000,0.08
53834,2026-02-06,A069500,KODEX 200,A450080,에코프로머티,38.0,2394000,0.06
