1. 임포트 & 데이터 로드

In [1]:
# 셀 1
import os
os.chdir(os.path.abspath(".."))    # 프로젝트 루트로 이동
import pandas as pd


2. 막대차트: 각 기준별 “활발 상권 수”

In [2]:
# 셀 2
df = pd.read_csv("data/processed/merged_annual_2024.csv", encoding="utf-8-sig")
df.head()


Unnamed: 0,상권_코드,resident_pop_2024,floating_pop_2024,estimated_sales_2024
0,3001491,105840,52278828,451309752219
1,3001492,21240,173404731,2323168283547
2,3001493,54746,84336760,592441401861
3,3001494,38850,217027839,1233135489141
4,3001495,202576,96379422,1797325989068


3. 히스토그램: 지표별 분포 & 활발 상권 강조

In [3]:
# 셀 3
metrics = ["resident_pop_2024","floating_pop_2024","estimated_sales_2024"]

# 75%, 90%, 50% 값
q75 = {m: df[m].quantile(0.75) for m in metrics}
q90 = {m: df[m].quantile(0.90) for m in metrics}
med = {m: df[m].median()          for m in metrics}

print("75%:", q75)
print("90%:", q90)
print("50%:", med)


75%: {'resident_pop_2024': np.float64(74461.0), 'floating_pop_2024': np.float64(29391231.0), 'estimated_sales_2024': np.float64(51203896016.5)}
90%: {'resident_pop_2024': np.float64(120792.0), 'floating_pop_2024': np.float64(47808535.0), 'estimated_sales_2024': np.float64(163943965337.0)}
50%: {'resident_pop_2024': np.float64(37840.0), 'floating_pop_2024': np.float64(15279374.0), 'estimated_sales_2024': np.float64(16923528105.0)}


4. 산점도: 지표 간 관계 & 활발 상권 표시

In [4]:
# 셀 4
# high25/high10/high50 플래그
for m in metrics:
    df[f"{m}_high25"] = df[m] >= q75[m]
    df[f"{m}_high10"] = df[m] >= q90[m]
    df[f"{m}_high50"] = df[m] >= med[m]

# 2개 이상 만족 → is_active_25, is_active_10
df["is_active_25"] = df[[f"{m}_high25" for m in metrics]].sum(axis=1) >= 2
df["is_active_10"] = df[[f"{m}_high10" for m in metrics]].sum(axis=1) >= 2

# 모두 median 이상 → is_active_50
df["is_active_50"] = df[[f"{m}_high50" for m in metrics]].all(axis=1)

# 플래그가 잘 생성되었는지 확인
print("■ is_active_25:", df["is_active_25"].sum())
print("■ is_active_10:", df["is_active_10"].sum())
print("■ is_active_50:", df["is_active_50"].sum())


■ is_active_25: 353
■ is_active_10: 124
■ is_active_50: 423


In [5]:
# 셀 5
df.to_csv("data/processed/merged_annual_2024_with_flags.csv",
          index=False, encoding="utf-8-sig")


In [6]:
import os

# 폴더가 없으면 생성
os.makedirs("data/processed", exist_ok=True)

# 이제 안전하게 저장
df.to_csv(
    "data/processed/merged_annual_2024_with_flags.csv",
    index=False,
    encoding="utf-8-sig"
)
print("✅ merged_annual_2024_with_flags.csv 저장 완료")


✅ merged_annual_2024_with_flags.csv 저장 완료
