In [1]:
import pandas as pd
import re
from sklearn.preprocessing import MinMaxScaler

In [None]:
# -------------------
# 1. 데이터 불러오기
# -------------------
file_path = "../../data/RST-HEATSHELTER/RST-HEATSHELTER.csv"
df = pd.read_csv(file_path, encoding='cp949')

# -------------------
# 2. 구이름 추출 (도로명주소에서 구 추출)
# -------------------
def extract_gu(address: str) -> str:
    """주소에서 자치구 이름 추출"""
    if pd.isna(address):
        return None
    match = re.search(r"(\S+구)", address)
    return match.group(1) if match else None

df["구이름"] = df["도로명주소"].apply(extract_gu)

# -------------------
# 3. 숫자형 변환 (쉼표 제거 후 float 변환)
# -------------------
for col in ["이용가능인원", "시설면적"]:
    df[col] = (
        df[col].astype(str)
        .str.replace(",", "", regex=False)
        .replace("nan", None)
        .astype(float)
    )

# -------------------
# 4. 결측치 → 0 처리
# -------------------
df["이용가능인원"] = df["이용가능인원"].fillna(0)
df["시설면적"] = df["시설면적"].fillna(0)

# -------------------
# 5. 구별 집계
# -------------------
gu_stats = df.groupby("구이름").agg({
    "쉼터명칭": "count",     # 쉼터 개수
    "이용가능인원": "sum",   # 총 수용인원
    "시설면적": "sum"        # 총 면적
}).reset_index()

gu_stats.rename(columns={
    "쉼터명칭": "heatshelter_count",
    "이용가능인원": "heatshelter_capacity",
    "시설면적": "heatshelter_area"
}, inplace=True)

# -------------------
# 6. MinMax Scaling (0~100점)
# -------------------
scaler = MinMaxScaler(feature_range=(0, 100))
scaled_values = scaler.fit_transform(
    gu_stats[["heatshelter_count", "heatshelter_capacity", "heatshelter_area"]]
)

gu_stats[["heatshelter_count_score", "heatshelter_capacity_score", "heatshelter_area_score"]] = scaled_values

# -------------------
# 7. 평균 점수 계산
# -------------------
gu_stats["heatshelter_mean_score"] = gu_stats[
    ["heatshelter_count_score", "heatshelter_capacity_score", "heatshelter_area_score"]
].mean(axis=1)

# -------------------
# 8. 저장
# -------------------
output_path = "../../result/RST/RST-HEATSHELTER_result.csv"
gu_stats.to_csv(output_path, index=False, encoding="utf-8-sig")

print("처리 완료:", output_path)
gu_stats.head()

처리 완료: ../../result/RST/RST_HEATSHELTER_result.csv


Unnamed: 0,구이름,heatshelter_count,heatshelter_capacity,heatshelter_area,heatshelter_count_score,heatshelter_capacity_score,heatshelter_area_score,heatshelter_mean_score
0,강남구,119,2745.0,14410.87,20.869565,3.600465,6.491836,10.320622
1,강동구,231,14059.0,43172.37,69.565217,72.761171,40.084412,60.8036
2,강북구,94,2639.0,12667.72,10.0,2.952503,4.455888,5.802797
3,강서구,186,9587.0,41772.75,50.0,45.424537,38.449697,44.624745
4,관악구,141,3343.0,34954.38,30.434783,7.255945,30.486044,22.72559
