In [None]:
!pip install catboost

In [None]:
import pandas as pd
import numpy as np
from catboost import CatBoostRegressor, Pool
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error

scaler = StandardScaler()


# -------------------------------------------------
# 1. CSV 파일 읽기 + 최근 30000개만 사용
# -------------------------------------------------
file_path = "/content/final_dataset_complete.csv"
sd = pd.read_csv(file_path)

# 최근 35064개 사용
df = sd.head(35064).reset_index(drop=True)
# 최근 168행 제거
df = df.iloc[168:].reset_index(drop=True)
sd = sd.tail(8760)
y_t = df.iloc[:, 0].values
X_t = df.iloc[:, 2:].values

# -------------------------------------------------
# 2. 첫 번째 열이 y, 나머지 열이 X
# -------------------------------------------------
y = df.iloc[:, 0].values
X = df.iloc[:, 2:].values  # 두 번째 열 제외한 나머지 feature


print("X shape:", X.shape)
print("y shape:", y.shape)

# -------------------------------------------------
# 3. Train/Test Split (앞쪽 1/4 = test)
# -------------------------------------------------
test_size = len(df) // 4

X_train, X_test = X[test_size:], X[:test_size]
y_train, y_test = y[test_size:], y[:test_size]

scaler.fit(X_train)

# train과 test 모두 transform
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

# -------------------------------------------------
# 4. CatBoost 모델 (MAE + Early Stopping)
# -------------------------------------------------
model = CatBoostRegressor(
    iterations=8000,
    learning_rate=0.03,
    depth=8,

    # ◆◆ MAE 기준으로 변경됨 ◆◆
    loss_function='MAE',
    eval_metric='MAE',

    random_seed=42,
    l2_leaf_reg=3,
    subsample=0.8,
    bootstrap_type='Bernoulli',

    # Early stopping
    od_type='Iter',
    od_wait=200,
    verbose=200
)

# -------------------------------------------------
# 5. Fit
# -------------------------------------------------
train_pool = Pool(X_train, y_train)
test_pool = Pool(X_test, y_test)

model.fit(
    train_pool,
    eval_set=test_pool,
    use_best_model=True
)

# -------------------------------------------------
# 6. Predict
# -------------------------------------------------
X_t = scaler.transform(X_t)
y_pred = model.predict(X_t)

print("Sample predictions:", y_pred[:5])

mae = mean_absolute_error(y_t, y_pred)
print("Test MAE (last 15% of data):", mae)

model.save_model("/content/catboost_model.cbm")

In [None]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from lightgbm import early_stopping, log_evaluation

# -------------------------------------------------
# 1. CSV 읽기 + 최근 30000개만 사용
# -------------------------------------------------
file_path = "/content/final_dataset_complete.csv"
sd = pd.read_csv(file_path)
df = sd.head(35064).reset_index(drop=True)
df = df.iloc[168:].reset_index(drop=True)

sd = sd.tail(8760)
y_t = sd.iloc[:, 0].values
X_t = sd.iloc[:, 2:].values

# -------------------------------------------------
# 2. 첫 번째 열 = y, 나머지 = X
# -------------------------------------------------
y = df.iloc[:, 0].values
X = df.iloc[:, 2:].values

# -------------------------------------------------
# 3. Train/Test Split (앞쪽 1/4 test)
# -------------------------------------------------
test_size = len(df) // 4
X_train, X_test = X[test_size:], X[:test_size]
y_train, y_test = y[test_size:], y[:test_size]

scaler2 = StandardScaler()
scaler2.fit(X_train)

# train과 test 모두 transform
X_train = scaler2.transform(X_train)
X_test = scaler2.transform(X_test)

# -------------------------------------------------
# 4. Dataset 생성
# -------------------------------------------------
train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_test, label=y_test)

# -------------------------------------------------
# 5. 파라미터
# -------------------------------------------------
params = {
    "objective": "regression_l1",   # ← MAE 기반 학습
    "metric": "l1",
    "learning_rate": 0.03,
    "num_leaves": 64,
    "feature_fraction": 0.9,
    "bagging_fraction": 0.8,
    "bagging_freq": 1,
    "lambda_l2": 2.0,
    "random_state": 42,
}

# -------------------------------------------------
# 6. Train (callback 기반 early stopping)
# -------------------------------------------------
model = lgb.train(
    params,
    train_data,
    num_boost_round=5000,
    valid_sets=[valid_data],

    # ← early stopping 처리
    callbacks=[
        early_stopping(stopping_rounds=200),  # 200회 개선 없으면 stop
        log_evaluation(200)                   # 200 iteration마다 로그 출력
    ]
)

# -------------------------------------------------
# 7. Predict
# -------------------------------------------------
X_t = scaler.transform(X_t)
y_pred = model.predict(X_t)

print("Sample predictions:", y_pred[:5])

mae = mean_absolute_error(y_t, y_pred)
print("Test MAE (last 15% of data):", mae)

model.save_model("/content/lightgbm_model.cbm")