In [2]:
!pip install geopandas

Collecting geopandas
  Downloading geopandas-1.0.1-py3-none-any.whl.metadata (2.2 kB)
Collecting pyogrio>=0.7.2 (from geopandas)
  Downloading pyogrio-0.10.0-cp311-cp311-win_amd64.whl.metadata (5.6 kB)
Collecting pyproj>=3.3.0 (from geopandas)
  Downloading pyproj-3.7.1-cp311-cp311-win_amd64.whl.metadata (31 kB)
Collecting shapely>=2.0.0 (from geopandas)
  Downloading shapely-2.1.0-cp311-cp311-win_amd64.whl.metadata (7.0 kB)
Collecting certifi (from pyogrio>=0.7.2->geopandas)
  Downloading certifi-2025.1.31-py3-none-any.whl.metadata (2.5 kB)
Downloading geopandas-1.0.1-py3-none-any.whl (323 kB)
   ---------------------------------------- 0.0/323.6 kB ? eta -:--:--
   --------------------------------------- 323.6/323.6 kB 10.1 MB/s eta 0:00:00
Downloading pyogrio-0.10.0-cp311-cp311-win_amd64.whl (16.2 MB)
   ---------------------------------------- 0.0/16.2 MB ? eta -:--:--
   ----- ---------------------------------- 2.0/16.2 MB 42.9 MB/s eta 0:00:01
   ----------- ---------------------


[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: C:\Users\user\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [14]:
# 셀 1 ─────────────────────────────────────────────────────────────
import geopandas as gpd
import pandas as pd
from shapely.geometry import Point, box
from pathlib import Path

# ----- 경로 설정 -----
from pathlib import Path

BASE_DIR        = Path('Data')                     # Data 폴더가 루트
CSV_VILLA_PATH  = BASE_DIR / '빌라_행정동매핑.csv'
CSV_RATIO_PATH  = BASE_DIR / '행정동별_인구비율_데이터.csv'
SHP_EMD_PATH    = BASE_DIR / 'emd_20230729 (1)' / 'emd.shp'
OUT_CSV_PATH    = BASE_DIR / 'chunk_population_ratio.csv'


GRID_SIZE = 500      # 격자 크기(m)


In [15]:
# 셀 2 ─ 행정동 경계 로드 + 500 m 격자 생성
gdf_emd = gpd.read_file(SHP_EMD_PATH, encoding='euc-kr')
if gdf_emd.crs is None:
    gdf_emd.set_crs(epsg=5179, inplace=True)

gwangjin = gdf_emd[gdf_emd['EMD_CD'].astype(str).str.startswith('11215')].to_crs(epsg=5179)

minx, miny, maxx, maxy = gwangjin.total_bounds
polys, gid = [], 0
x = minx
while x < maxx:
    y = miny
    while y < maxy:
        polys.append({'grid_id': gid, 'geometry': box(x, y, x+GRID_SIZE, y+GRID_SIZE)})
        y += GRID_SIZE; gid += 1
    x += GRID_SIZE

grid = gpd.GeoDataFrame(polys, crs=gwangjin.crs)
grid = gpd.overlay(grid, gwangjin[['geometry']], how='intersection')
grid.head()


Unnamed: 0,grid_id,geometry
0,0,"POLYGON ((961288.889 1947608.268, 961288.889 1..."
1,1,"POLYGON ((961288.889 1948108.268, 961288.889 1..."
2,2,"POLYGON ((961288.889 1948608.268, 961288.889 1..."
3,3,"POLYGON ((961288.889 1948608.268, 961170.455 1..."
4,12,"POLYGON ((961288.889 1947608.268, 961788.889 1..."


In [16]:
# 셀 3 ─ 빌라 CSV → GeoDataFrame & 격자 매핑
villa_df = pd.read_csv(CSV_VILLA_PATH)
villa = gpd.GeoDataFrame(
    villa_df,
    geometry=gpd.points_from_xy(villa_df['경도'], villa_df['위도']),
    crs='EPSG:4326'
).to_crs(epsg=5179)

villa = gpd.sjoin(villa, grid[['grid_id','geometry']], predicate='within', how='left')
print('겹치지 못한 빌라:', villa['grid_id'].isna().sum())
villa.head()


겹치지 못한 빌라: 0


Unnamed: 0,위도,경도,행정동명,행정동코드,대지위치,시군구코드명,법정동코드명,대지면적,건축면적,주용도코드명,세대수,구,건축면적_결측,geometry,index_right,grid_id
0,37.53776,127.064704,자양4동,11050670,서울특별시 광진구 자양동 843-1,서울특별시 광진구,자양동,270.0,156.22,공동주택,16.0,광진구,False,POINT (961543.459 1948804.581),7,15
1,37.535979,127.075837,자양3동,11050660,서울특별시 광진구 자양동 227-273,서울특별시 광진구,자양동,245.9,147.5,공동주택,10.0,광진구,False,POINT (962526.144 1948602.451),25,38
2,37.561104,127.079313,중곡1동,11050550,서울특별시 광진구 중곡동 257-21,서울특별시 광진구,중곡동,156.7,92.57,공동주택,3.0,광진구,False,POINT (962845.654 1951388.537),57,56
3,37.561599,127.081699,중곡1동,11050550,서울특별시 광진구 중곡동 163-35,서울특별시 광진구,중곡동,0.0,57.85,공동주택,2.0,광진구,False,POINT (963056.627 1951442.58),57,56
4,37.566586,127.085604,중곡3동,11050570,서울특별시 광진구 중곡동 29-22,서울특별시 광진구,중곡동,223.5,133.74,공동주택,6.0,광진구,False,POINT (963403.992 1951994.278),75,69


In [17]:
# 셀 4 ─ 행정동별 연령대 비율 머지 + 빌라별 추정 인구수
ratio = (
    pd.read_csv(CSV_RATIO_PATH)
      .rename(columns={'행정동':'행정동명'})
)

age_cols = [c for c in ratio.columns if '~' in c or '이상' in c]
F = '인구/세대_비율'

villa = villa.merge(ratio[['행정동명', F] + age_cols], on='행정동명', how='left')

for col in age_cols:
    villa[f'{col}_cnt'] = villa['세대수'] * villa[F] * villa[col]

villa.head()


Unnamed: 0,위도,경도,행정동명,행정동코드,대지위치,시군구코드명,법정동코드명,대지면적,건축면적,주용도코드명,...,55~59세_cnt,60~64세_cnt,65~69세_cnt,70~74세_cnt,75~79세_cnt,80~84세_cnt,85~89세_cnt,90~94세_cnt,95~99세_cnt,100세 이상_cnt
0,37.53776,127.064704,자양4동,11050670,서울특별시 광진구 자양동 843-1,서울특별시 광진구,자양동,270.0,156.22,공동주택,...,6.189655,6.096577,4.89898,3.363201,2.438631,1.706421,0.788056,0.28854,0.058949,0.01241
1,37.535979,127.075837,자양3동,11050660,서울특별시 광진구 자양동 227-273,서울특별시 광진구,자양동,245.9,147.5,공동주택,...,4.984645,4.766836,4.165593,2.511608,1.792385,1.327272,0.857622,0.390241,0.081678,0.018151
2,37.561104,127.079313,중곡1동,11050550,서울특별시 광진구 중곡동 257-21,서울특별시 광진구,중곡동,156.7,92.57,공동주택,...,0.768358,0.733546,0.635947,0.39599,0.328852,0.2412,0.108789,0.036677,0.006216,0.000622
3,37.561599,127.081699,중곡1동,11050550,서울특별시 광진구 중곡동 163-35,서울특별시 광진구,중곡동,0.0,57.85,공동주택,...,0.512239,0.48903,0.423964,0.263994,0.219235,0.1608,0.072526,0.024452,0.004144,0.000414
4,37.566586,127.085604,중곡3동,11050570,서울특별시 광진구 중곡동 29-22,서울특별시 광진구,중곡동,223.5,133.74,공동주택,...,1.776597,1.875151,1.541382,0.957943,0.846249,0.570298,0.254926,0.059132,0.017083,0.0


In [18]:
# 셀 5 ─ 격자별 집계 + 비율 계산
cnt_cols = [f"{c}_cnt" for c in age_cols]
agg      = {c: "sum" for c in cnt_cols}
agg.update({"세대수": "sum",
            "행정동명": lambda s: s.value_counts().idxmax()})  # ★ 추가

chunk = (
    villa.dropna(subset=["grid_id"])
         .groupby("grid_id")
         .agg(agg)
)

# 총인구
chunk['총인구'] = chunk[cnt_cols].sum(axis=1)

# 연령대별 비율 계산
ratio_cols = []
for c in cnt_cols:
    r = c.replace('_cnt', '_비율')
    chunk[r] = chunk[c] / chunk['총인구']
    ratio_cols.append(r)

# 비율을 소수점 4자리로 반올림
chunk[ratio_cols] = chunk[ratio_cols].round(4)

chunk.head()


Unnamed: 0_level_0,0~4세_cnt,5~9세_cnt,10~14세_cnt,15~19세_cnt,20~24세_cnt,25~29세_cnt,30~34세_cnt,35~39세_cnt,40~44세_cnt,45~49세_cnt,...,55~59세_비율,60~64세_비율,65~69세_비율,70~74세_비율,75~79세_비율,80~84세_비율,85~89세_비율,90~94세_비율,95~99세_비율,100세 이상_비율
grid_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,25.900758,29.750871,30.310887,39.201148,125.583677,188.165509,159.044657,113.893335,110.32323,92.192699,...,0.0859,0.0846,0.068,0.0467,0.0338,0.0237,0.0109,0.004,0.0008,0.0002
14,92.769198,106.559214,108.565035,140.407435,449.805247,673.955688,569.653022,407.933744,395.146639,330.2082,...,0.0859,0.0846,0.068,0.0467,0.0338,0.0237,0.0109,0.004,0.0008,0.0002
15,64.50078,74.088734,75.483346,97.622803,312.741622,468.589454,396.069657,283.629108,274.73846,229.587913,...,0.0859,0.0846,0.068,0.0467,0.0338,0.0237,0.0109,0.004,0.0008,0.0002
16,19.297095,21.127442,22.939996,50.292566,320.640266,373.985835,222.509823,121.489061,100.513256,82.628934,...,0.0568,0.0538,0.0454,0.0322,0.0245,0.0168,0.0085,0.0027,0.0006,0.0001
17,1.978394,1.954268,2.42474,9.457691,79.196096,86.036033,44.031341,19.928705,14.041775,11.315451,...,0.0353,0.031,0.0287,0.0215,0.0177,0.0118,0.0066,0.0017,0.0004,0.0001


In [19]:
# ── merge 이후 ───────────────────────────────────────────────
chunk_gdf = grid.merge(chunk, on="grid_id", how="left")

# ★ 어린이집 CSV를 격자에 붙이기 ────────────────────────────
cc_path = BASE_DIR / "광진구_실제운영중_어린이집.csv"
cc_df   = pd.read_csv(cc_path)

# 1) 어린이집 → GeoDataFrame (좌표 4326 → 5179)
cc = gpd.GeoDataFrame(
        cc_df,
        geometry=gpd.points_from_xy(cc_df["시설 경도(좌표값)"], cc_df["시설 위도(좌표값)"]),
        crs="EPSG:4326"
     ).to_crs(grid.crs)

# 2) 격자 조인 후 grid_id별 집계
cc_agg = (
    gpd.sjoin(cc[["어린이집명","정원","geometry"]],
              grid[["grid_id","geometry"]],
              predicate="within",
              how="left")
      .dropna(subset=["grid_id"])
      .groupby("grid_id")
      .agg(어린이집수   = ("어린이집명","count"),
           어린이집목록 = ("어린이집명", lambda s: " / ".join(s)),
           정원합계     = ("정원","sum"))
      .reset_index()
)

# 3) chunk_gdf 와 병합
chunk_gdf = chunk_gdf.merge(cc_agg, on="grid_id", how="left")

# NaN → 0 / "" 보정
chunk_gdf[["어린이집수","정원합계"]] = chunk_gdf[["어린이집수","정원합계"]].fillna(0).astype(int)
chunk_gdf["어린이집목록"] = chunk_gdf["어린이집목록"].fillna("")

# ── 컬럼 재배치 : geometry → 맨 뒤 ──────────────────────────
cols = [c for c in chunk_gdf.columns if c != "geometry"] + ["geometry"]
chunk_gdf = chunk_gdf[cols]

# CSV 저장
chunk_gdf.to_csv(OUT_CSV_PATH, index=False, encoding="utf-8-sig")
print("✅  저장 완료 →", OUT_CSV_PATH)


✅  저장 완료 → Data\chunk_population_ratio.csv


In [8]:
# merge 이후
chunk_gdf = grid.merge(chunk, on='grid_id', how='left')

# --- 컬럼 재배치 : geometry → 맨 뒤 ---
cols = [c for c in chunk_gdf.columns if c != 'geometry'] + ['geometry']
chunk_gdf = chunk_gdf[cols]

# CSV 저장
chunk_gdf.to_csv(OUT_CSV_PATH, index=False, encoding='utf-8-sig')


### 시각화

In [9]:
import geopandas as gpd

SHP_EMD_PATH = "Data/emd_20230729 (1)/emd.shp"
emd = gpd.read_file(SHP_EMD_PATH, encoding="euc-kr")

print("▶ emd.crs :", emd.crs)
print("▶ 첫 번째 좌표 값 예시 :", emd.geometry.iloc[0].centroid.x, emd.geometry.iloc[0].centroid.y)


▶ emd.crs : None
▶ 첫 번째 좌표 값 예시 : 953149.1845269037 1954555.436493793


In [10]:
!pip install shapely




[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: C:\Users\user\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [12]:
!pip install folium

Collecting folium
  Downloading folium-0.19.5-py2.py3-none-any.whl.metadata (4.1 kB)
Collecting branca>=0.6.0 (from folium)
  Downloading branca-0.8.1-py3-none-any.whl.metadata (1.5 kB)
Collecting jinja2>=2.9 (from folium)
  Downloading jinja2-3.1.6-py3-none-any.whl.metadata (2.9 kB)
Collecting requests (from folium)
  Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting xyzservices (from folium)
  Downloading xyzservices-2025.1.0-py3-none-any.whl.metadata (4.3 kB)
Collecting MarkupSafe>=2.0 (from jinja2>=2.9->folium)
  Downloading MarkupSafe-3.0.2-cp311-cp311-win_amd64.whl.metadata (4.1 kB)
Collecting charset-normalizer<4,>=2 (from requests->folium)
  Downloading charset_normalizer-3.4.1-cp311-cp311-win_amd64.whl.metadata (36 kB)
Collecting idna<4,>=2.5 (from requests->folium)
  Downloading idna-3.10-py3-none-any.whl.metadata (10 kB)
Collecting urllib3<3,>=1.21.1 (from requests->folium)
  Downloading urllib3-2.4.0-py3-none-any.whl.metadata (6.5 kB)
Downloading fol


[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: C:\Users\user\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [13]:
import geopandas as gpd
import pandas as pd
import shapely
from shapely import wkt          # ← 추가
from shapely.geometry import Polygon, mapping
import folium


# === 1. 광진구 행정동 경계 ======================================
shp_path = "Data/emd_20230729 (1)/emd.shp"
emd = gpd.read_file(shp_path, encoding="euc-kr")
emd.set_crs(epsg=5179, inplace=True)               # 좌표계 지정
gwangjin_gdf = emd[emd["EMD_CD"].astype(str).str.startswith("11215")]

# === 2. 500 m 격자 생성 ==========================================
minx, miny, maxx, maxy = gwangjin_gdf.total_bounds
grid_size = 500
grid_list, grid_id = [], 0

x = minx
while x < maxx:
    y = miny
    while y < maxy:
        grid_list.append({"grid_id": grid_id,
                          "geometry": Polygon([
                              (x, y),
                              (x+grid_size, y),
                              (x+grid_size, y+grid_size),
                              (x, y+grid_size)
                          ])})
        y += grid_size; grid_id += 1
    x += grid_size

grid_gdf      = gpd.GeoDataFrame(grid_list, crs=5179)
grid_clipped  = gpd.clip(grid_gdf, gwangjin_gdf)   # 광진구 내부만

# === 3. 빌라 좌표 → 격자별 빌라 수 ===============================
df_villa = pd.read_csv("Data/빌라_행정동매핑.csv")
df_villa = df_villa.dropna(subset=["위도","경도"])
gdf_villa = gpd.GeoDataFrame(
    df_villa,
    geometry=gpd.points_from_xy(df_villa["경도"], df_villa["위도"]),
    crs="EPSG:4326"
).to_crs(5179)

grid_clipped["빌라수"] = grid_clipped.geometry.apply(
    lambda poly: gdf_villa.within(poly).sum()
)

# === 4. ★ 연령대 비율 CSV 병합 (tooltip용) ========================
ratio_csv = "Data/chunk_population_ratio.csv"
df_ratio  = pd.read_csv(ratio_csv)

# WKT → geometry 복원 후 GeoDataFrame (crs=5179 가정)
df_ratio["geometry"] = df_ratio["geometry"].apply(lambda w: shapely.wkt.loads(w))
gdf_ratio = gpd.GeoDataFrame(df_ratio, geometry="geometry", crs=5179)

# grid_id 기준으로 격자에 비율 붙이기
grid_clipped = grid_clipped.merge(
    gdf_ratio[["grid_id"] + [c for c in gdf_ratio.columns if c.endswith("_비율")]],
    on="grid_id",
    how="left"
)

ratio_cols = [c for c in grid_clipped.columns if c.endswith("_비율")]
for c in ratio_cols:
    grid_clipped[c] = (grid_clipped[c].astype(float) * 100).round(1).astype(str) + "%"

aliases = [c.replace("_비율","")+" :" for c in ratio_cols]

# === 5. 지도 생성 ================================================
center = gwangjin_gdf.to_crs(4326).geometry.unary_union.centroid
m = folium.Map(location=[center.y, center.x], zoom_start=14,
               tiles="cartodbpositron")

# === 6. 격자 + 세대수 + 툴팁 =====================================
for _, row in grid_clipped.iterrows():
    geom      = row.geometry
    villa_cnt = int(row["빌라수"])
    
    # geometry 4326 변환
    geom_wgs = gpd.GeoSeries([geom], crs=5179).to_crs(4326).iloc[0]
    
    # Feature = geometry + properties (툴팁용)
    feature = {
        "type": "Feature",
        "geometry": geom_wgs.__geo_interface__,
        "properties": {c: row[c] for c in ratio_cols}
    }
    
    folium.GeoJson(
        data=feature,
        style_function=lambda x: {
            "fillColor": "#00000000",
            "color": "blue",
            "weight": 2,
            "opacity": 0.7
        },
        tooltip=folium.GeoJsonTooltip(
            fields   = ratio_cols,
            aliases  = aliases,
            sticky   = False,
            localize = True
        )
    ).add_to(m)
    
    # 중심 라벨(빌라 수)
    centroid_wgs = gpd.GeoSeries([geom.centroid], crs=5179).to_crs(4326).iloc[0]
    folium.Marker(
        location=[centroid_wgs.y, centroid_wgs.x],
        icon=folium.DivIcon(
            html=f'<div style="font-size:9pt;color:red;"><b>{villa_cnt}</b></div>'
        )
    ).add_to(m)

# === 7. 광진구 외곽선 + 행정동 경계 ===============================
gwangjin_union_wgs = gpd.GeoSeries([gwangjin_gdf.unary_union], crs=5179).to_crs(4326).iloc[0]
folium.GeoJson(gwangjin_union_wgs.__geo_interface__,
               style_function=lambda x: {"color":"black","weight":3}).add_to(m)

for g in gwangjin_gdf.geometry:
    folium.GeoJson(gpd.GeoSeries([g], crs=5179).to_crs(4326).iloc[0].__geo_interface__,
                   style_function=lambda x: {"color":"gray","weight":1}).add_to(m)

# === 8. 결과 보기 ================================================
m


  center = gwangjin_gdf.to_crs(4326).geometry.unary_union.centroid
  gwangjin_union_wgs = gpd.GeoSeries([gwangjin_gdf.unary_union], crs=5179).to_crs(4326).iloc[0]
