In [67]:
# 셀 1 ─────────────────────────────────────────────────────────────
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point, box
from pathlib import Path

# ----- 경로 설정 -----
from pathlib import Path

BASE_DIR        = Path('Data')                     # Data 폴더가 루트
CSV_VILLA_PATH  = BASE_DIR / '빌라_행정동매핑.csv'
CSV_RATIO_PATH  = BASE_DIR / '행정동별_인구비율_데이터.csv'
SHP_EMD_PATH    = BASE_DIR / '3. 2024년 2분기 기준 행정동 경계/bnd_dong_00_2024_2Q.shp'
OUT_CSV_PATH    = BASE_DIR / 'chunk_population_ratio.csv'


GRID_SIZE = 500      # 격자 크기(m)


In [68]:
# 셀 2 ─ 행정동 경계 로드 + 500 m 격자 생성
import geopandas as gpd
from shapely.geometry import box

# 1) 행정동 경계 로드
gdf_emd = gpd.read_file(SHP_EMD_PATH, encoding="euc-kr")
if gdf_emd.crs is None:
    gdf_emd.set_crs(epsg=5179, inplace=True)

# 2) 광진구(11215…) 필터용 코드 컬럼 찾기
for col in ["EMD_CD", "ADM_CD", "EMD_CD_N", "ADM_CD2"]:
    if col in gdf_emd.columns:
        CODE_COL = col
        break
else:
    raise ValueError("행정동 코드 컬럼을 찾을 수 없습니다")

gwangjin = (
    gdf_emd[gdf_emd[CODE_COL].astype(str).str.startswith("11050")]
    .to_crs(epsg=5179)
)

# 3) 500 m 격자 생성
minx, miny, maxx, maxy = gwangjin.total_bounds
polygons  = []          # 순수 폴리곤만 저장
grid_ids  = []          # grid_id 별도 저장
gid = 0
x = minx
while x < maxx:
    y = miny
    while y < maxy:
        polygons.append(box(x, y, x + GRID_SIZE, y + GRID_SIZE))
        grid_ids.append(gid)
        y  += GRID_SIZE
        gid += 1
    x += GRID_SIZE

# ★ geometry 파라미터로 polygons 리스트를 직접 전달
grid = gpd.GeoDataFrame(
    {"grid_id": grid_ids},
    geometry=polygons,
    crs=gwangjin.crs
)

# 광진구 내부만 자르기
grid = gpd.overlay(grid, gwangjin[["geometry"]], how="intersection")
grid.head()





  return ogr_read(


Unnamed: 0,grid_id,geometry
0,0,"POLYGON ((961288.789 1947608.144, 961288.789 1..."
1,1,"POLYGON ((961288.789 1948108.144, 961288.789 1..."
2,2,"POLYGON ((961288.789 1948608.144, 961288.789 1..."
3,3,"POLYGON ((961288.789 1948608.144, 961170.607 1..."
4,12,"POLYGON ((961788.789 1947608.144, 961788.789 1..."


In [79]:
# ── 격자(500 m) + grid_id 라벨 시각화 ──────────────────────
import folium, geopandas as gpd

center = grid.unary_union.centroid
center_wgs = gpd.GeoSeries([center], crs=grid.crs).to_crs(4326).iloc[0]

m = folium.Map(location=[center_wgs.y, center_wgs.x],
               zoom_start=14, tiles="cartodbpositron")

# 1) 파란 격자 테두리
folium.GeoJson(
    grid.to_crs(4326).__geo_interface__,
    style_function=lambda x: {
        "fillColor": "#00000000",
        "color": "blue",
        "weight": 1,
        "opacity": 0.8,
    }
).add_to(m)

# 2) grid_id 라벨 (붉은 글씨, 마우스 이벤트 통과)
for _, row in grid.iterrows():
    centroid_wgs = gpd.GeoSeries([row.geometry.centroid],
                                 crs=grid.crs).to_crs(4326).iloc[0]
    folium.Marker(
        location=[centroid_wgs.y, centroid_wgs.x],
        icon=folium.DivIcon(
            html=f'<div style="pointer-events:none; font-size:8pt; '
                 f'color:red;"><b>{row["grid_id"]}</b></div>'
        )
    ).add_to(m)

# (선택) 광진구 외곽선
folium.GeoJson(
    gwangjin.to_crs(4326).unary_union.__geo_interface__,
    style_function=lambda x: {"color": "black", "weight": 2, "fillOpacity": 0}
).add_to(m)

m   # 지도 출력


  center = grid.unary_union.centroid
  gwangjin.to_crs(4326).unary_union.__geo_interface__,


In [69]:
# 셀 3 ─ 빌라 CSV → GeoDataFrame & 격자 매핑
import geopandas as gpd
import pandas as pd

# 1) 빌라 CSV 로드
villa_df = pd.read_csv(CSV_VILLA_PATH)

# 2) 위·경도를 숫자로 변환 + 결측 제거
villa_df["위도"] = pd.to_numeric(villa_df["위도"], errors="coerce")
villa_df["경도"] = pd.to_numeric(villa_df["경도"], errors="coerce")
villa_df = villa_df.dropna(subset=["위도", "경도"])          # 좌표 없는 행 제거

# 3) GeoDataFrame (EPSG:4326 → 5179)
villa = gpd.GeoDataFrame(
    villa_df,
    geometry=gpd.points_from_xy(villa_df["경도"], villa_df["위도"]),
    crs="EPSG:4326"
).to_crs(grid.crs)                                           # grid 와 동일 CRS(5179)

# 4) 격자 매핑 (within → 격자 내부)
villa = gpd.sjoin(
    villa,
    grid[["grid_id", "geometry"]],
    how="left",
    predicate="within"
)

print(f"겹치지 못한 빌라: {villa['grid_id'].isna().sum()}")  # NaN 이면 격자 밖
villa.head()


겹치지 못한 빌라: 0


Unnamed: 0,위도,경도,행정동명,행정동코드,대지위치,시군구코드명,법정동코드명,대지면적,건축면적,주용도코드명,세대수,구,건축면적_결측,geometry,index_right,grid_id
0,37.53776,127.064704,자양4동,11050670,서울특별시 광진구 자양동 843-1,서울특별시 광진구,자양동,270.0,156.22,공동주택,16.0,광진구,False,POINT (961543.459 1948804.581),10,15
1,37.535979,127.075837,자양3동,11050660,서울특별시 광진구 자양동 227-273,서울특별시 광진구,자양동,245.9,147.5,공동주택,10.0,광진구,False,POINT (962526.144 1948602.451),35,38
2,37.561104,127.079313,중곡1동,11050550,서울특별시 광진구 중곡동 257-21,서울특별시 광진구,중곡동,156.7,92.57,공동주택,3.0,광진구,False,POINT (962845.654 1951388.537),73,56
3,37.561599,127.081699,중곡1동,11050550,서울특별시 광진구 중곡동 163-35,서울특별시 광진구,중곡동,0.0,57.85,공동주택,2.0,광진구,False,POINT (963056.627 1951442.58),73,56
4,37.566586,127.085604,중곡3동,11050570,서울특별시 광진구 중곡동 29-22,서울특별시 광진구,중곡동,223.5,133.74,공동주택,6.0,광진구,False,POINT (963403.992 1951994.278),104,69


In [70]:
# 셀 4 ─ 행정동별 연령대 비율 머지 + 빌라별 추정 인구수
ratio = (
    pd.read_csv(CSV_RATIO_PATH)
      .rename(columns={'행정동':'행정동명'})
)

age_cols = [c for c in ratio.columns if '~' in c or '이상' in c]
F = '인구/세대_비율'

villa = villa.merge(ratio[['행정동명', F] + age_cols], on='행정동명', how='left')

for col in age_cols:
    villa[f'{col}_cnt'] = villa['세대수'] * villa[F] * villa[col]

villa.head()


Unnamed: 0,위도,경도,행정동명,행정동코드,대지위치,시군구코드명,법정동코드명,대지면적,건축면적,주용도코드명,...,55~59세_cnt,60~64세_cnt,65~69세_cnt,70~74세_cnt,75~79세_cnt,80~84세_cnt,85~89세_cnt,90~94세_cnt,95~99세_cnt,100세 이상_cnt
0,37.53776,127.064704,자양4동,11050670,서울특별시 광진구 자양동 843-1,서울특별시 광진구,자양동,270.0,156.22,공동주택,...,6.189655,6.096577,4.89898,3.363201,2.438631,1.706421,0.788056,0.28854,0.058949,0.01241
1,37.535979,127.075837,자양3동,11050660,서울특별시 광진구 자양동 227-273,서울특별시 광진구,자양동,245.9,147.5,공동주택,...,4.984645,4.766836,4.165593,2.511608,1.792385,1.327272,0.857622,0.390241,0.081678,0.018151
2,37.561104,127.079313,중곡1동,11050550,서울특별시 광진구 중곡동 257-21,서울특별시 광진구,중곡동,156.7,92.57,공동주택,...,0.768358,0.733546,0.635947,0.39599,0.328852,0.2412,0.108789,0.036677,0.006216,0.000622
3,37.561599,127.081699,중곡1동,11050550,서울특별시 광진구 중곡동 163-35,서울특별시 광진구,중곡동,0.0,57.85,공동주택,...,0.512239,0.48903,0.423964,0.263994,0.219235,0.1608,0.072526,0.024452,0.004144,0.000414
4,37.566586,127.085604,중곡3동,11050570,서울특별시 광진구 중곡동 29-22,서울특별시 광진구,중곡동,223.5,133.74,공동주택,...,1.776597,1.875151,1.541382,0.957943,0.846249,0.570298,0.254926,0.059132,0.017083,0.0


In [75]:
print("ratio_df columns :", ratio_df.columns.tolist())
# 빌라 CSV 에서 실제 동 이름 열 확인
print("villa columns :", villa.columns.tolist())


ratio_df columns : ['행정동', '전체 인구수', '전체세대', '인구/세대_비율', '0~4세', '5~9세', '10~14세', '15~19세', '20~24세', '25~29세', '30~34세', '35~39세', '40~44세', '45~49세', '50~54세', '55~59세', '60~64세', '65~69세', '70~74세', '75~79세', '80~84세', '85~89세', '90~94세', '95~99세', '100세 이상']
villa columns : ['위도', '경도', '행정동명', '행정동코드', '대지위치', '시군구코드명', '법정동코드명', '대지면적', '건축면적', '주용도코드명', '세대수', '구', '건축면적_결측', 'geometry', 'index_right', 'grid_id', '인구/세대_비율', '0~4세', '5~9세', '10~14세', '15~19세', '20~24세', '25~29세', '30~34세', '35~39세', '40~44세', '45~49세', '50~54세', '55~59세', '60~64세', '65~69세', '70~74세', '75~79세', '80~84세', '85~89세', '90~94세', '95~99세', '100세 이상', '0~4세_cnt', '5~9세_cnt', '10~14세_cnt', '15~19세_cnt', '20~24세_cnt', '25~29세_cnt', '30~34세_cnt', '35~39세_cnt', '40~44세_cnt', '45~49세_cnt', '50~54세_cnt', '55~59세_cnt', '60~64세_cnt', '65~69세_cnt', '70~74세_cnt', '75~79세_cnt', '80~84세_cnt', '85~89세_cnt', '90~94세_cnt', '95~99세_cnt', '100세 이상_cnt']


In [76]:
# ────────────────────────────────────────────────────────
# 셀 5 ─ 청크 × 행정동별 집계 + 총인구·비율 계산
# ────────────────────────────────────────────────────────
# 0) 변수 설정 ── 동 이름 열
VILLA_EMD_COL = "행정동명"   # 빌라 CSV 쪽 열 이름
RATIO_EMD_COL = "행정동"     # ratio_df  쪽 열 이름

# 1) 빌라 ← 연령대 비율 CSV 머지 (필요 시 한 번만 실행)
age_cols = [c for c in ratio_df.columns if ("~" in c) or ("이상" in c)]
merge_cols = [RATIO_EMD_COL, "인구/세대_비율"] + age_cols

if "인구/세대_비율" not in villa.columns:       # 머지 안 했을 때만
    villa = (
        villa.merge(
            ratio_df[merge_cols],
            left_on=VILLA_EMD_COL,
            right_on=RATIO_EMD_COL,
            how="left"
        )
        .drop(columns=[RATIO_EMD_COL])           # 중복 열 제거
    )

    # *_cnt 컬럼 만들기
    for col in age_cols:
        villa[f"{col}_cnt"] = (
            villa["세대수"] * villa["인구/세대_비율"] * villa[col]
        ).round().astype(int)

# 2) 집계 ── 청크(grid_id) × 행정동
cnt_cols = [f"{c}_cnt" for c in age_cols]
agg_dict = {c: "sum" for c in cnt_cols}
agg_dict.update({"세대수": "sum"})

chunk = (
    villa.dropna(subset=["grid_id"])
         .groupby(["grid_id", VILLA_EMD_COL])   # ★ 청크 × 동
         .agg(agg_dict)
         .reset_index()
)

# 3) 총인구 + 연령대 비율(소수점 4자리)
chunk["총인구"] = chunk[cnt_cols].sum(axis=1)

for c in cnt_cols:
    ratio_col = c.replace("_cnt", "_비율")
    chunk[ratio_col] = (chunk[c] / chunk["총인구"]).round(4)

chunk.head()


Unnamed: 0,grid_id,행정동명,0~4세_cnt,5~9세_cnt,10~14세_cnt,15~19세_cnt,20~24세_cnt,25~29세_cnt,30~34세_cnt,35~39세_cnt,...,55~59세_비율,60~64세_비율,65~69세_비율,70~74세_비율,75~79세_비율,80~84세_비율,85~89세_비율,90~94세_비율,95~99세_비율,100세 이상_비율
0,2,자양4동,17.578077,20.191034,20.571101,26.604657,85.229919,127.702354,107.938894,77.29603,...,0.0859,0.0846,0.068,0.0467,0.0338,0.0237,0.0109,0.004,0.0008,0.0002
1,14,자양4동,101.09188,116.119051,118.304821,153.003926,490.159005,734.418844,620.758784,444.531049,...,0.0859,0.0846,0.068,0.0467,0.0338,0.0237,0.0109,0.004,0.0008,0.0002
2,15,자양4동,64.50078,74.088734,75.483346,97.622803,312.741622,468.589454,396.069657,283.629108,...,0.0859,0.0846,0.068,0.0467,0.0338,0.0237,0.0109,0.004,0.0008,0.0002
3,16,자양4동,12.842758,14.751817,15.029498,19.437688,62.270023,93.300903,78.861478,56.473426,...,0.0859,0.0846,0.068,0.0467,0.0338,0.0237,0.0109,0.004,0.0008,0.0002
4,16,화양동,6.454337,6.375625,7.910498,30.854877,258.370243,280.684931,143.648345,65.015635,...,0.0353,0.031,0.0287,0.0215,0.0177,0.0118,0.0066,0.0017,0.0004,0.0001


In [77]:
# merge 이후
chunk_gdf = grid.merge(chunk, on='grid_id', how='left')

# --- 컬럼 재배치 : geometry → 맨 뒤 ---
cols = [c for c in chunk_gdf.columns if c != 'geometry'] + ['geometry']
chunk_gdf = chunk_gdf[cols]

# CSV 저장
chunk_gdf.to_csv(OUT_CSV_PATH, index=False, encoding='utf-8-sig')


### 청크별 어린이집이랑 공원 

### 시각화

In [61]:
# -*- coding: utf-8 -*-
"""
광진구 500 m 격자 + 빌라수 라벨 + 연령대별 인구수 툴팁 지도
----------------------------------------------------------------
필요 패키지 : geopandas, pandas, shapely, folium
"""

import geopandas as gpd
import pandas as pd
from shapely.geometry import box
from shapely import wkt
import folium
from pathlib import Path

# ---------------------------------------------------------------
# 0. 경로 설정
# ---------------------------------------------------------------
BASE_DIR         = Path("Data")
CSV_VILLA_PATH   = BASE_DIR / "빌라_행정동매핑.csv"
SHP_EMD_PATH     = BASE_DIR / "3. 2024년 2분기 기준 행정동 경계/bnd_dong_00_2024_2Q.shp"
CSV_RATIO_PATH   = BASE_DIR / "chunk_population_ratio.csv"       # *_cnt 컬럼 포함
GRID_SIZE        = 500                                           # m

# ---------------------------------------------------------------
# 1. 광진구 행정동 경계 → EPSG 5179
# ---------------------------------------------------------------
emd = gpd.read_file(SHP_EMD_PATH, encoding="euc-kr")
if emd.crs is None:
    emd.set_crs(epsg=5179, inplace=True)

CODE_COL = next(c for c in ["EMD_CD", "ADM_CD", "EMD_CD_N", "ADM_CD2"] if c in emd.columns)
gwangjin = emd[emd[CODE_COL].astype(str).str.startswith("11050")].to_crs(5179)  # 광진구(kosis 코드)

# ---------------------------------------------------------------
# 2. 500 m 격자 생성
# ---------------------------------------------------------------
minx, miny, maxx, maxy = gwangjin.total_bounds
polys, ids = [], []
gid = 0
x = minx
while x < maxx:
    y = miny
    while y < maxy:
        polys.append(box(x, y, x + GRID_SIZE, y + GRID_SIZE))
        ids.append(gid)
        y += GRID_SIZE; gid += 1
    x += GRID_SIZE

grid = gpd.GeoDataFrame({"grid_id": ids}, geometry=polys, crs=5179)
grid = gpd.overlay(grid, gwangjin[["geometry"]], how="intersection")


# ── 디버그 셀 : grid 상태 확인 ─────────────────────────────
print("grid 행 개수 :", len(grid))
print("grid CRS    :", grid.crs)
print("grid bounds :", grid.total_bounds)   # (minx, miny, maxx, maxy)
grid.head()



# ---------------------------------------------------------------
# 3. 빌라 CSV → GeoDataFrame → 격자 매핑
# ---------------------------------------------------------------
villa_df = pd.read_csv(CSV_VILLA_PATH)
villa_df["위도"] = pd.to_numeric(villa_df["위도"], errors="coerce")
villa_df["경도"] = pd.to_numeric(villa_df["경도"], errors="coerce")
villa_df = villa_df.dropna(subset=["위도", "경도"])

villa = gpd.GeoDataFrame(
    villa_df,
    geometry=gpd.points_from_xy(villa_df["경도"], villa_df["위도"]),
    crs=4326
).to_crs(5179)

villa = gpd.sjoin(villa, grid[["grid_id", "geometry"]], how="left", predicate="within")
villa_cnt = villa.groupby("grid_id").size()
grid["빌라수"] = grid["grid_id"].map(villa_cnt).fillna(0).astype(int)

# ---------------------------------------------------------------
# 4. 격자별 연령대 인구수(_cnt) CSV 병합
#    (geometry WKT 복원 → grid_id 결합)
# ---------------------------------------------------------------
ratio_df = pd.read_csv(CSV_RATIO_PATH)
if "geometry" in ratio_df.columns and isinstance(ratio_df["geometry"].iloc[0], str):
    ratio_df["geometry"] = ratio_df["geometry"].apply(wkt.loads)

cnt_cols = [c for c in ratio_df.columns if c.endswith("_cnt")]
grid = grid.merge(ratio_df[["grid_id"] + cnt_cols], on="grid_id", how="left")

# 총인구 없으면 생성
if "총인구" not in grid.columns:
    grid["총인구"] = grid[cnt_cols].sum(axis=1).fillna(0).astype(int)

# ---------------------------------------------------------------
# 5. Folium 지도 (빌라수 라벨 + 연령대 인구수 툴팁)
# ---------------------------------------------------------------
center = grid.to_crs(4326).geometry.unary_union.centroid
print(center.y, center.x)   # 37~38 , 126~128 근처가 나오면 정상

m = folium.Map(location=[center.y, center.x], zoom_start=14, tiles="cartodbpositron")

for _, row in grid.iterrows():
    geom      = row.geometry
    villa_num = row["빌라수"]

    geom_wgs = gpd.GeoSeries([geom], crs=5179).to_crs(4326).iloc[0]

    # props: 연령대별 인구수
    # props 만들 때 NaN 은 0 처리
    props = {c: int(row[c]) if pd.notna(row[c]) else 0 for c in cnt_cols}
    fields  = list(props.keys())
    aliases = [c.replace("_cnt", "") + " :" for c in fields]

    folium.GeoJson(
        data={"type": "Feature", "geometry": geom_wgs.__geo_interface__, "properties": props},
        style_function=lambda x: {"fillColor": "#00000000", "color": "blue", "weight": 1, "opacity": 0.6},
        highlight_function=lambda x: {"fillColor": "#FFFF0033", "weight": 2},
        tooltip=folium.GeoJsonTooltip(
            fields   = fields,
            aliases  = aliases,
            localize = False,
            sticky   = False
        )
    ).add_to(m)

    # 빌라수 라벨 (pointer‑events:none 필수)
    if villa_num > 0:
        centroid_wgs = gpd.GeoSeries([geom.centroid], crs=5179).to_crs(4326).iloc[0]
        folium.Marker(
            location=[centroid_wgs.y, centroid_wgs.x],
            icon=folium.DivIcon(
                html=f'<div style="pointer-events:none; font-size:9pt; color:red;"><b>{villa_num}</b></div>'
            )
        ).add_to(m)

# ---------------------------------------------------------------
# 6. 결과 지도 출력
# ---------------------------------------------------------------
m


  return ogr_read(


grid 행 개수 : 166
grid CRS    : EPSG:5179
grid bounds : [ 960788.78870001 1947108.144       966015.4532     1952784.8993    ]
37.546718894012535 127.08574596414618


  center = grid.to_crs(4326).geometry.unary_union.centroid
