In [1]:
from IPython.display import display, HTML
display(HTML("""
<style>
div.container{width:99% !important;}
div.cell.code_cell.rendered{width:100%;}
div.input_prompt{padding:0px;}
div.CodeMirror {font-family:Consolas; font-size:15pt;}
div.text_cell_render.rendered_html{font-size:18pt;}
div.text_cell_render ul li{font-size:22pt; line-height:30px;}
div.output {font-size:22pt; font-weight:bold;}
div.input {font-family:Consolas; font-size:22pt;}
div.prompt {min-width:70px;}
div#toc-wrapper{padding-top:120px;}
div.text_cell_render ul li{font-size:22pt;padding:5px;}
table.dataframe{font-size:22px;}
</style>
"""))

In [2]:
# ============================================================
# [단락 0] 라이브러리
# ============================================================
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [16]:
# ============================================================
# [단락 1] 데이터 로드
# ============================================================
path = r"C:\ai\lecNote\1st_Project\data\서울_일반음식점_전처리_폐업률머지.csv"
df = pd.read_csv(path, low_memory=False)

print(df.shape)
print(df[["구","업태_그룹","폐업률_3년이내_pct"]].head())

(241267, 24)
     구   업태_그룹  폐업률_3년이내_pct
0  은평구      기타          31.0
1  은평구      한식          33.2
2  서초구  분식/간편식          39.7
3  서초구   양식/외식          30.0
4   중구   주점/치킨          24.5


In [17]:
# ============================================================
# [단락 2] (구, 업태_그룹) 유니크 데이터셋 만들기
#  - 조합당 폐업률이 하나로 고정이므로 조합당 1개 행만 사용
# ============================================================
g = (
    df[["구", "업태_그룹", "폐업률_3년이내_pct"]]
      .dropna()
      .groupby(["구", "업태_그룹"], as_index=False)
      .agg({"폐업률_3년이내_pct": "first"})
)

y = g["폐업률_3년이내_pct"].astype("float32").values

print("유니크 조합 수:", len(g))
print(g.head())


유니크 조합 수: 175
     구   업태_그룹  폐업률_3년이내_pct
0  강남구      기타          38.3
1  강남구  분식/간편식          34.0
2  강남구   양식/외식          30.3
3  강남구   주점/치킨          33.3
4  강남구    중/일식          28.1


In [18]:
# ============================================================
# [단락 3] 원-핫 인코딩
# ============================================================
X_df = pd.get_dummies(g[["구", "업태_그룹"]].astype(str), drop_first=False)
X = X_df.astype("float32").values

print("X shape:", X.shape)
print("원핫 컬럼 수:", X_df.shape[1])


X shape: (175, 32)
원핫 컬럼 수: 32


In [19]:
# ============================================================
# [단락 4] Train / Val / Test split
# ============================================================
idx = np.arange(len(g))
tr, te = train_test_split(idx, test_size=0.2, random_state=42)
tr, va = train_test_split(tr, test_size=0.2, random_state=42)

X_tr, X_va, X_te = X[tr], X[va], X[te]
y_tr, y_va, y_te = y[tr], y[va], y[te]

print("train/val/test:", X_tr.shape, X_va.shape, X_te.shape)


train/val/test: (112, 32) (28, 32) (35, 32)


In [23]:
# ============================================================
# [단락 5] 모델 정의(회귀)
# ============================================================
inp = keras.Input(shape=(X.shape[1],), name="onehot")
x = layers.Dense(64, activation="relu")(inp)
x = layers.Dropout(0.2)(x)
x = layers.Dense(32, activation="relu")(x)
out = layers.Dense(1, activation="linear")(x)

model = keras.Model(inp, out)
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    loss="mse",
    metrics=[keras.metrics.MAE]
)

model.summary()


Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 onehot (InputLayer)         [(None, 32)]              0         
                                                                 
 dense_15 (Dense)            (None, 64)                2112      
                                                                 
 dropout_5 (Dropout)         (None, 64)                0         
                                                                 
 dense_16 (Dense)            (None, 32)                2080      
                                                                 
 dense_17 (Dense)            (None, 1)                 33        
                                                                 
Total params: 4,225
Trainable params: 4,225
Non-trainable params: 0
_________________________________________________________________


In [24]:
# ============================================================
# [단락 6] 학습
# ============================================================
callbacks = [
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=20, restore_best_weights=True)
]

history = model.fit(
    X_tr, y_tr,
    validation_data=(X_va, y_va),
    epochs=300,
    batch_size=32,
    callbacks=callbacks,
    verbose=1
)


Epoch 1/300
Epoch 2/300
Epoch 3/300
Epoch 4/300
Epoch 5/300
Epoch 6/300
Epoch 7/300
Epoch 8/300
Epoch 9/300
Epoch 10/300
Epoch 11/300
Epoch 12/300
Epoch 13/300
Epoch 14/300
Epoch 15/300
Epoch 16/300
Epoch 17/300
Epoch 18/300
Epoch 19/300
Epoch 20/300
Epoch 21/300
Epoch 22/300
Epoch 23/300
Epoch 24/300
Epoch 25/300
Epoch 26/300
Epoch 27/300
Epoch 28/300
Epoch 29/300
Epoch 30/300
Epoch 31/300
Epoch 32/300
Epoch 33/300
Epoch 34/300
Epoch 35/300
Epoch 36/300
Epoch 37/300
Epoch 38/300
Epoch 39/300
Epoch 40/300
Epoch 41/300
Epoch 42/300
Epoch 43/300
Epoch 44/300
Epoch 45/300
Epoch 46/300
Epoch 47/300
Epoch 48/300
Epoch 49/300
Epoch 50/300


Epoch 51/300
Epoch 52/300
Epoch 53/300
Epoch 54/300
Epoch 55/300
Epoch 56/300
Epoch 57/300
Epoch 58/300
Epoch 59/300
Epoch 60/300
Epoch 61/300
Epoch 62/300
Epoch 63/300
Epoch 64/300
Epoch 65/300
Epoch 66/300
Epoch 67/300
Epoch 68/300
Epoch 69/300
Epoch 70/300
Epoch 71/300
Epoch 72/300
Epoch 73/300
Epoch 74/300
Epoch 75/300
Epoch 76/300
Epoch 77/300
Epoch 78/300
Epoch 79/300
Epoch 80/300
Epoch 81/300
Epoch 82/300
Epoch 83/300
Epoch 84/300
Epoch 85/300
Epoch 86/300
Epoch 87/300
Epoch 88/300
Epoch 89/300
Epoch 90/300
Epoch 91/300
Epoch 92/300
Epoch 93/300
Epoch 94/300
Epoch 95/300
Epoch 96/300
Epoch 97/300
Epoch 98/300
Epoch 99/300


Epoch 100/300
Epoch 101/300
Epoch 102/300
Epoch 103/300
Epoch 104/300
Epoch 105/300
Epoch 106/300
Epoch 107/300
Epoch 108/300
Epoch 109/300
Epoch 110/300
Epoch 111/300
Epoch 112/300
Epoch 113/300
Epoch 114/300
Epoch 115/300
Epoch 116/300
Epoch 117/300
Epoch 118/300
Epoch 119/300
Epoch 120/300
Epoch 121/300
Epoch 122/300
Epoch 123/300
Epoch 124/300
Epoch 125/300
Epoch 126/300
Epoch 127/300
Epoch 128/300
Epoch 129/300
Epoch 130/300
Epoch 131/300
Epoch 132/300
Epoch 133/300
Epoch 134/300
Epoch 135/300
Epoch 136/300
Epoch 137/300
Epoch 138/300
Epoch 139/300
Epoch 140/300
Epoch 141/300
Epoch 142/300
Epoch 143/300
Epoch 144/300


In [25]:
# ============================================================
# [단락 7] 평가 + 예측 출력
# ============================================================
res = model.evaluate(X_te, y_te, verbose=0)
print("\n[Test] MSE, MAE =", res)

pred = model.predict(X_te, verbose=0).reshape(-1)

print("\n예측 vs 실제 (10개)")
for p, t in list(zip(pred[:10], y_te[:10])):
    print(f"pred={p:.1f}%  true={t:.1f}%")



[Test] MSE, MAE = [10.760030746459961, 2.719385862350464]

예측 vs 실제 (10개)
pred=20.5%  true=23.2%
pred=22.7%  true=24.6%
pred=25.9%  true=28.6%
pred=44.5%  true=48.0%
pred=32.7%  true=30.6%
pred=30.8%  true=27.9%
pred=36.5%  true=37.2%
pred=38.3%  true=45.6%
pred=38.1%  true=37.3%
pred=32.9%  true=36.5%


In [30]:
# ============================================================
# [단락 7] 검증(Val) 예측 + 결과 출력 (소수 1자리 "표시" 강제)
# ============================================================
pred_va = model.predict(X_va, verbose=0).reshape(-1)

mae_va = float(np.mean(np.abs(pred_va - y_va)))
mse_va = float(np.mean((pred_va - y_va) ** 2))
rmse_va = float(np.sqrt(mse_va))

print(f"\n[Validation] MAE = {mae_va:.1f} | RMSE = {rmse_va:.1f} | MSE = {mse_va:.1f}")

val_result = g.iloc[va][["구", "업태_그룹"]].copy()
val_result["true_pct"] = y_va
val_result["pred_pct"] = pred_va
val_result["abs_error"] = np.abs(val_result["pred_pct"] - val_result["true_pct"])

# ✅ 출력용 포맷(문자열) 컬럼 생성: 소수 1자리로 "강제 표시"
val_result["true_pct"]  = val_result["true_pct"].map(lambda x: f"{x:.1f}")
val_result["pred_pct"]  = val_result["pred_pct"].map(lambda x: f"{x:.1f}")
val_result["abs_error"] = val_result["abs_error"].map(lambda x: f"{x:.1f}")

print("\n[검증 예측 결과]")
print(val_result.to_string(index=False))

print("\n[오차 큰 순]")
print(val_result.sort_values("abs_error", ascending=False).to_string(index=False))



[Validation] MAE = 3.2 | RMSE = 3.7 | MSE = 13.4

[검증 예측 결과 일부(20개)]
   구  업태_그룹 true_pct pred_pct abs_error
 도봉구     기타     32.1     34.8       2.7
 관악구     카페     31.7     30.2       1.5
 동작구     카페     29.6     25.4       4.2
 양천구  주점/치킨     34.1     34.6       0.5
 송파구     카페     33.1     29.2       3.9
동대문구     카페     24.6     29.4       4.8
 금천구     기타     32.5     34.7       2.2
영등포구  주점/치킨     31.1     29.0       2.1
 은평구     한식     33.2     32.6       0.6
 금천구     카페     25.7     30.3       4.6
 노원구  양식/외식     29.7     31.4       1.7
 서초구  양식/외식     30.0     27.4       2.6
동대문구  주점/치킨     29.9     30.8       0.9
 광진구 분식/간편식     43.3     37.3       6.0
 송파구  양식/외식     35.0     31.5       3.5
 종로구  주점/치킨     24.3     19.1       5.2
 종로구     카페     19.9     18.7       1.2
서대문구     한식     27.8     31.1       3.3
 서초구     카페     31.2     25.0       6.2
 성동구  양식/외식     27.4     29.9       2.5
  중구     카페     24.0     26.7       2.7
 중랑구 분식/간편식     45.1     39.7       5.4
 은평구     기