In [1]:
import zipfile
from pathlib import Path
import pandas as pd

# ────────────────────────── 사용자 설정 ──────────────────────────
ROOT    = Path('RAW')        # zip 파일들이 있는 폴더
PATTERN = "LOCAL_PEOPLE_DONG_*.zip"             # 파일명 패턴
USE_COLS = ["기준일ID", "시간대구분", "행정동코드", "총생활인구수"]
DONG_PREFIX = "11230"                           # 동대문구
OUTFILE = ROOT / "dongdaemun_monthly_population.parquet"
# ────────────────────────────────────────────────────────────────

# 동대문구 14개 행정동 최신 코드(8자리) → 명칭
dong_map = {
    "11230536": "용신동",
    "11230545": "제기동",
    "11230560": "전농1동",
    "11230570": "전농2동",
    "11230600": "답십리1동",
    "11230610": "답십리2동",
    "11230650": "장안1동",
    "11230660": "장안2동",
    "11230705": "청량리동",
    "11230710": "회기동",
    "11230720": "휘경1동",
    "11230730": "휘경2동",
    "11230740": "이문1동",
    "11230750": "이문2동",
}

def csv_iter_from_zip(zpath: Path):
    with zipfile.ZipFile(zpath) as zf:
        for fn in zf.namelist():
            if fn.lower().endswith(".csv"):
                with zf.open(fn) as f:
                    df = pd.read_csv(
                        f,
                        usecols=USE_COLS,
                        dtype={
                            "기준일ID": str,
                            "시간대구분": str,
                            "행정동코드": str,
                            "총생활인구수": float   # ← 숫자형
                        },
                        index_col=False,
                        encoding="utf-8",
                    )
                    df.columns = df.columns.str.strip()
                    yield df

def load_all_zip(root: Path) -> pd.DataFrame:
    frames = []
    for zp in root.glob(PATTERN):
        frames.extend(csv_iter_from_zip(zp))
    if not frames:
        raise FileNotFoundError("지정 경로에서 매칭되는 zip 파일을 찾지 못함.")
    return pd.concat(frames, ignore_index=True)

def restore_code(series: pd.Series) -> pd.Series:
    """'11230.536' 같은 깨진 코드 → '11230536' 복구, 8자리 zero-pad"""
    s = series.astype(str).str.strip()
    dot = s.str.contains(r"\.")
    if dot.any():
        s.loc[dot] = (
            s.loc[dot].astype(float).mul(1000).round().astype(int).astype(str).str.zfill(8)
        )
    s.loc[~dot] = s.loc[~dot].str.zfill(8)
    return s

def preprocess(df: pd.DataFrame) -> pd.DataFrame:
    df["행정동코드"] = restore_code(df["행정동코드"])
    df["기준일ID"]   = pd.to_datetime(df["기준일ID"], format="%Y%m%d", errors="coerce")
    df = df[df["행정동코드"].str.startswith(DONG_PREFIX)]
    df["기준년월"] = df["기준일ID"].dt.to_period("M").astype(str)
    return df

def monthly_panel(df: pd.DataFrame) -> pd.DataFrame:
    panel = (df.groupby(["기준년월", "행정동코드"], as_index=False)["총생활인구수"]
               .sum())
    panel["행정동코드"] = panel["행정동코드"].str.zfill(8)
    panel["행정동명"]  = panel["행정동코드"].map(dong_map)
    missing = panel.loc[panel["행정동명"].isna(), "행정동코드"].unique()
    if len(missing):
        raise ValueError(f"매핑되지 않은 행정동코드: {missing}")
    return panel[["기준년월", "행정동코드", "행정동명", "총생활인구수"]]

if __name__ == "__main__":
    raw   = load_all_zip(ROOT)
    tidy  = preprocess(raw)
    panel_df = monthly_panel(tidy)
    panel_df.to_parquet(OUTFILE, index=False)
    # 필요 시 CSV 저장:
    # panel_df.to_csv(OUTFILE.with_suffix(".csv"), index=False, encoding="utf-8-sig")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["기준년월"] = df["기준일ID"].dt.to_period("M").astype(str)


In [2]:
panel_df.to_csv('PROCESSED/월별총유동인구.csv',index=False)