In [16]:
from IPython.display import display, HTML
display(HTML("""
<style>
div.container{width:86% !important;}
div.cell.code_cell.rendered{width:100%;}
div.CodeMirror {font-family:Consolas; font-size:12pt;}
div.output {font-size:15pt; font-weight:bold;}
div.input {font-family:Consolas; font-size:12pt;}
div.prompt {min-width:70px;}
div#toc-wrapper{padding-top:120px;}
div.text_cell_render ul li{font-size:12pt;padding:5px;}
table.dataframe{font-size:15px;}
</style>
"""))

In [18]:
import pandas as pd
import numpy as np
import lightgbm as lgb

from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

# 1) 데이터 로드
df = pd.read_csv("model_1/total_data.csv", parse_dates=["일시"])

# 2) 사용자 정의 구간 및 라벨
bins   = [0, 1500, 3000, 4500, 6000, 7500, 9000, np.inf]
labels = ["0–1499", "1500–2999", "3000–4499", "4500–5999", 
          "6000–7499", "7500–8999", "9000+"]

# 3) y_bin 생성
df["y_bin"] = pd.cut(df["총대여량"], bins=bins, labels=labels, right=False)

# 4) 필요 피처 생성
df["month"]      = df["일시"].dt.month
df["weekday"]    = df["일시"].dt.weekday
df["is_weekend"] = (df["weekday"] >= 5).astype(int)

le_gu = LabelEncoder()
df["gu_le"] = le_gu.fit_transform(df["행정구"])

FEATURES = [
    "gu_le", "month", "weekday", "is_weekend",
    "평균기온(°C)", "일강수량(mm)", "평균 풍속(m/s)", "평균 상대습도(%)"
]

# 5) 결측 제거 및 라벨 인코딩
df = df.dropna(subset=FEATURES + ["y_bin"])
df["y_label"] = LabelEncoder().fit_transform(df["y_bin"])

# 6) 시간 기준 train/valid 분리
cut = pd.to_datetime("2024-01-01")
train = df[df["일시"] < cut]
valid = df[df["일시"] >= cut]

X_tr = train[FEATURES]; y_tr = train["y_label"]
X_va = valid[FEATURES]; y_va = valid["y_label"]

# 7) 모델 학습
model = lgb.LGBMClassifier(
    objective="multiclass",
    num_class=len(labels),
    learning_rate=0.05,
    num_leaves=31,
    n_estimators=500,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)
model.fit(
    X_tr, y_tr,
    eval_set=[(X_va, y_va)],
    eval_metric="multi_logloss",
    callbacks=[lgb.early_stopping(50), lgb.log_evaluation(100)]
)

# 8) 모델 평가
y_pred = model.predict(X_va)
print("Accuracy:", accuracy_score(y_va, y_pred))
print("F1-macro:", f1_score(y_va, y_pred, average="macro"))
print("Confusion Matrix:\n", confusion_matrix(y_va, y_pred))

# 9) 예측 예시
sample = pd.DataFrame([{
    "gu_le":        le_gu.transform(["강남구"])[0],
    "month":        6,
    "weekday":      6,
    "is_weekend":   1,
    "평균기온(°C)":   25.0,
    "일강수량(mm)":   16.5,
    "평균 풍속(m/s)": 1.2,
    "평균 상대습도(%)": 60.0
}])
pred_bin = labels[model.predict(sample)[0]]
print("▶ 강남구 2025-06-22 예측 구간:", pred_bin)


Training until validation scores don't improve for 50 rounds
[100]	valid_0's multi_logloss: 0.891787
[200]	valid_0's multi_logloss: 0.844936
[300]	valid_0's multi_logloss: 0.839155
Early stopping, best iteration is:
[267]	valid_0's multi_logloss: 0.837633
Accuracy: 0.6463561643835617
F1-macro: 0.6263009455122465
Confusion Matrix:
 [[ 776  311   17    1    0    0    0]
 [ 275 1616  471   47    4    2    0]
 [  10  430 1366  361   32    5    3]
 [   1   52  186  605  227   21   15]
 [   0    8   26  189  349  129   34]
 [   0    0   11   23  103  249  109]
 [   0    2    4    2   29   87  937]]
▶ 강남구 2025-06-22 예측 구간: 3000–4499
