In [1]:
from IPython.display import display, HTML
display(HTML("""
<style>
div.container{width:99% !important;}
div.cell.code_cell.rendered{width:100%;}
div.input_prompt{padding:0px;}
div.CodeMirror {font-family:Consolas; font-size:15pt;}
div.text_cell_render.rendered_html{font-size:18pt;}
div.text_cell_render ul li{font-size:22pt; line-height:30px;}
div.output {font-size:22pt; font-weight:bold;}
div.input {font-family:Consolas; font-size:22pt;}
div.prompt {min-width:70px;}
div#toc-wrapper{padding-top:120px;}
div.text_cell_render ul li{font-size:22pt;padding:5px;}
table.dataframe{font-size:22px;}
</style>
"""))

In [2]:
import pandas as pd
import numpy as np

# =========================
# 0) 파일 경로
# =========================
food_path = r"C:\ai\lecNote\1st_Project\data\서울_일반음식점_전처리.csv"
emp_path  = r"C:\ai\lecNote\1st_Project\data\고용지표_수정.csv"

In [3]:
# =========================
# 1) 로드
# =========================
df  = pd.read_csv(food_path, low_memory=False, encoding="utf-8-sig")  # 필요시 cp949
emp = pd.read_csv(emp_path,  low_memory=False, encoding="utf-8-sig")  # 필요시 cp949

In [4]:
# =========================
# 2) df: 인허가일자 -> 인허가연월(YYYYMM)
# =========================
df["인허가일자"] = pd.to_datetime(df["인허가일자"], errors="coerce")
df["인허가연월"] = (df["인허가일자"].dt.year * 100 + df["인허가일자"].dt.month).astype("Int64")

In [5]:
# =========================
# 3) emp: 시점 -> 연월(YYYYMM)
# =========================
def to_yyyymm_from_text(s: pd.Series) -> pd.Series:
    s = s.astype(str).str.strip()
    ym6 = s.str.extract(r"(\d{6})")[0]
    out = pd.to_numeric(ym6, errors="coerce")

    need = out.isna()
    if need.any():
        tmp = s[need].str.extract(r"(?P<y>\d{4})\D+(?P<m>\d{1,2})")
        y = pd.to_numeric(tmp["y"], errors="coerce")
        m = pd.to_numeric(tmp["m"], errors="coerce")
        out.loc[need] = y * 100 + m

    return out.astype("Int64")

emp["연월"] = to_yyyymm_from_text(emp["시점"])

In [6]:
# =========================
# 4) 월별 1행으로 정리(중복 있으면 평균)
# =========================
base_cols = ["경제활동참가율", "실업률", "고용률"]
keep_cols = [c for c in base_cols if c in emp.columns]

emp_m = (
    emp.dropna(subset=["연월"])
       .groupby("연월", as_index=False)[keep_cols]
       .mean()
)

In [7]:
# =========================
# 5) ✅ 인허가연월 기준으로 고용지표 붙이기(새 컬럼 추가)
# =========================
tmp = df[["인허가연월"]].merge(emp_m, left_on="인허가연월", right_on="연월", how="left")

df["인허가일자_경제활동참가율"] = tmp["경제활동참가율"]
df["인허가일자_실업률"]       = tmp["실업률"]
df["인허가일자_고용률"]       = tmp["고용률"]

In [8]:
# =========================
# 6) 저장
# =========================
save_path = r"C:\ai\lecNote\1st_Project\data\서울_일반음식점_인허가일자기준_고용지표_추가.csv"
df.to_csv(save_path, index=False, encoding="utf-8-sig")
print("저장 완료:", save_path)

저장 완료: C:\ai\lecNote\1st_Project\data\서울_일반음식점_인허가일자기준_고용지표_추가.csv
