In [1]:
import pandas as pd
import re
from sklearn.preprocessing import MinMaxScaler

In [None]:
# -------------------
# 1. 데이터 불러오기
# -------------------
file_path = "../../data/RST-COLDSHELTER/RST-COLDSHELTER.csv"
df = pd.read_csv(file_path, encoding='cp949')

# -------------------
# 2. 사용여부 'Y'인 쉼터만 필터링
# -------------------
df = df[df["사용여부"] == "Y"].copy()

# -------------------
# 3. 구이름 추출 (도로명주소 기준)
# -------------------
def extract_gu(address: str) -> str:
    """주소에서 자치구 이름 추출"""
    if pd.isna(address):
        return None
    match = re.search(r"(\S+구)", address)
    return match.group(1) if match else None

df["구이름"] = df["도로명주소"].apply(extract_gu)

# -------------------
# 4. 숫자형 변환 (쉼표 제거 후 float 변환)
# -------------------
for col in ["이용가능인원", "시설면적"]:
    df[col] = (
        df[col].astype(str)
        .str.replace(",", "", regex=False)
        .astype(float)
    )

# -------------------
# 5. 자치구별 집계
# -------------------
gu_stats = df.groupby("구이름").agg({
    "쉼터명칭": "count",     # 쉼터 개수
    "이용가능인원": "sum",   # 총 수용인원
    "시설면적": "sum"        # 총 면적
}).reset_index()

gu_stats.rename(columns={
    "쉼터명칭": "coldshelter_count",
    "이용가능인원": "coldshelter_capacity",
    "시설면적": "coldshelter_area"
}, inplace=True)

# -------------------
# 6. MinMax Scaling (0~100점)
# -------------------
scaler = MinMaxScaler(feature_range=(0, 100))
scaled_values = scaler.fit_transform(
    gu_stats[["coldshelter_count", "coldshelter_capacity", "coldshelter_area"]]
)

gu_stats[["coldshelter_count_score", "coldshelter_capacity_score", "coldshelter_area_score"]] = scaled_values

# -------------------
# 7. 평균 점수 계산
# -------------------
gu_stats["coldshelter_mean_score"] = gu_stats[
    ["coldshelter_count_score", "coldshelter_capacity_score", "coldshelter_area_score"]
].mean(axis=1)

# -------------------
# 8. 저장
# -------------------
output_path = "../../result/RST/RST-COLDSHELTER_result.csv"
gu_stats.to_csv(output_path, index=False, encoding="utf-8-sig")

print("처리 완료:", output_path)
gu_stats.head()

처리 완료: ../../result/RST/RST_COLDSHELTER_result.csv


Unnamed: 0,구이름,coldshelter_count,coldshelter_capacity,coldshelter_area,coldshelter_count_score,coldshelter_capacity_score,coldshelter_area_score,coldshelter_mean_score
0,강남구,62,2207.0,12517.2,62.5,28.848241,13.346158,34.898133
1,강동구,60,2852.0,9479.59,59.722222,38.84669,9.446687,36.0052
2,강북구,85,2716.0,12760.9,94.444444,36.73849,13.659003,48.280646
3,강서구,87,5223.0,25360.29,97.222222,75.600682,29.833218,67.552041
4,관악구,80,2098.0,26105.03,87.5,27.15858,30.789263,48.482614
