In [0]:
# ---- 입력변수 ----
TICKER = "^KS11"                 # KOSPI Composite Index (종목 티커)
STARTDATE = "2005-01-01"   # 조회 시작월
ENDDATE = "2025-05-31"  # 조회 종료월
INTERVAL = "1mo"    # 조회단위: 1d, 1wk, 1mo, 1y
# -----------------

In [0]:
!pip install yfinance pandas pyarrow

import pandas as pd
import yfinance as yf
from pathlib import Path

END_EXCLUSIVE = pd.to_datetime(ENDDATE) + pd.DateOffset(days=1)
OUTDIR = Path("./data")
OUTDIR.mkdir(parents=True, exist_ok=True)

Collecting yfinance
  Downloading yfinance-0.2.65-py2.py3-none-any.whl.metadata (5.8 kB)
Collecting multitasking>=0.0.7 (from yfinance)
  Downloading multitasking-0.0.12.tar.gz (19 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting frozendict>=2.3.4 (from yfinance)
  Downloading frozendict-2.4.6-py312-none-any.whl.metadata (23 kB)
Collecting peewee>=3.16.2 (from yfinance)
  Downloading peewee-3.18.2.tar.gz (949 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/949.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m949.2/949.2 kB[0m [31m12.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparin

In [0]:
# ---- Download data ----
# auto_adjust=True adjusts for splits/dividends (for indices Adj Close == Close)
df = yf.download(
    TICKER,
    start=STARTDATE,
    end=END_EXCLUSIVE,
    interval=INTERVAL,
    auto_adjust=True,
    actions=False,
    progress=False,
    threads=False
)

if df.empty:
    raise RuntimeError("Downloaded dataframe is empty. Check ticker or connection.")

# Keep standard columns if present; Yahoo may omit 'Adj Close' for indices
cols = [c for c in ["Open", "High", "Low", "Close", "Adj Close", "Volume"] if c in df.columns]
df = df[cols].copy()

# ---- Tidy + add YYYYMM ----
# Yahoo monthly index uses the last trading day of each month; ensure MonthEnd index
df.index = pd.to_datetime(df.index).to_period("M").to_timestamp("M")

# Build YYYYMM (string and integer) for convenience
df["yyyymm"] = df.index.strftime("%Y%m")
df["yyyymm_int"] = df["yyyymm"].astype(int)

mask = (df["yyyymm_int"] >= 200501) & (df["yyyymm_int"] <= 202505)
df = df.loc[mask].copy()


# ---- Sanity checks (auto from yyyymm_int) ----
# Build expected monthly index from the first to last yyyymm in the filtered data
start_ym = int(df["yyyymm_int"].min())
end_ym   = int(df["yyyymm_int"].max())

start_y, start_m = divmod(start_ym, 100)
end_y, end_m     = divmod(end_ym, 100)

expected = pd.period_range(
    start=f"{start_y}-{start_m:02d}",
    end=f"{end_y}-{end_m:02d}",
    freq="M"
).to_timestamp("M")

# Find missing months vs expected, then (optionally) reindex to insert them
missing = expected.difference(df.index)
if len(missing) > 0:
    print(f"[WARN] Missing {len(missing)} month(s): {[d.strftime('%Y-%m') for d in missing]}")
    df = df.reindex(expected)  # keep NaN for missing months (or df = df.reindex(expected).ffill() to fill)

# Assert expected length dynamically
expected_len = len(expected)
assert len(df) == expected_len, f"Expected {expected_len} months, got {len(df)}"

# Helpful metadata
df.attrs["ticker"] = TICKER
df.attrs["source"] = "Yahoo Finance via yfinance"
df.attrs["interval"] = "1mo"
df.attrs["currency_note"] = "KOSPI index is quoted in KRW"


# print metadata
print(df.attrs)


# ---- Save outputs (dynamic names from settings) ----
# Derive YYYYMM from STARTDATE/ENDDATE (inclusive by month)
start_ym = pd.to_datetime(STARTDATE).to_period("M").strftime("%Y%m")
end_ym   = pd.to_datetime(ENDDATE).to_period("M").strftime("%Y%m")

# Make a filesystem-friendly ticker slug (strip non-alphanumerics)
ticker_slug = "".join(ch for ch in TICKER if ch.isalnum()).lower()

# Build a concise base name, e.g., "ks11_1mo_200501_202505"
base = f"{ticker_slug}_{INTERVAL}_{start_ym}_{end_ym}"

csv_path = OUTDIR / f"{base}.csv"
parquet_path = OUTDIR / f"{base}.parquet"

df.to_csv(csv_path, index_label="Date")
df.to_parquet(parquet_path, engine="pyarrow", index=True)

print(f"Saved:\n- {csv_path}\n- {parquet_path}")

df

{'ticker': '^KS11', 'source': 'Yahoo Finance via yfinance', 'interval': '1mo', 'currency_note': 'KOSPI index is quoted in KRW'}
Saved:
- data/ks11_1mo_200501_202505.csv
- data/ks11_1mo_200501_202505.parquet


Price,Open,High,Low,Close,Volume,yyyymm,yyyymm_int
Ticker,^KS11,^KS11,^KS11,^KS11,^KS11,Unnamed: 6_level_1,Unnamed: 7_level_1
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
2005-01-31,896.000000,934.099976,866.169983,932.700012,8288900,200501,200501
2005-02-28,930.159973,1011.650024,918.440002,1011.359985,8563800,200502,200502
2005-03-31,1015.159973,1025.079956,946.409973,965.679993,13025500,200503,200503
2005-04-30,963.780029,996.900024,902.880005,911.299988,8541300,200504,200504
2005-05-31,916.479980,972.169983,910.719971,970.210022,6670500,200505,200505
...,...,...,...,...,...,...,...
2025-01-31,2400.870117,2552.570068,2386.840088,2517.370117,8301400,202501,202501
2025-02-28,2468.739990,2680.699951,2437.610107,2532.780029,9552500,202502,202502
2025-03-31,2522.199951,2654.629883,2479.459961,2481.120117,9442200,202503,202503
2025-04-30,2511.239990,2571.409912,2284.719971,2556.610107,11379100,202504,202504
