In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score, GridSearchCV
from sklearn.metrics import make_scorer, mean_squared_error, r2_score

In [7]:
# 1. 데이터 불러오기 및 정렬
df = pd.read_csv("C:/Stocker_Project/Stocker/data/data_preprocessing.csv", parse_dates=["날짜"])

# 2. 피처 엔지니어링
df["전일_감성점수"]   = df["감성점수"].shift(1)
df["전일_변동률"]     = df["변동률(%)"].shift(1)
df["전일_상승여부"]   = df["상승 여부"].shift(1)       # 전일 상승 여부
df["5일_이동평균"]    = df["종가"].rolling(window=5).mean().shift(1)
df["거래량_변화율"]   = df["거래량"].pct_change().shift(1).round(3)

# 3. 결측치 제거
df.dropna(inplace=True)


# 4. 피처(X)와 타깃(y) 설정
X = df[["전일_감성점수", "전일_상승여부", "5일_이동평균", "거래량_변화율"]]
y = df["변동률(%)"]

# 5. 시계열 분할 (과거→최근 80:20)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, shuffle=False
)

# 6. 모델 정의
models = {
    "RandomForest": RandomForestRegressor(random_state=42),
    "GradientBoosting": GradientBoostingRegressor(random_state=42),
    "XGBoost": XGBRegressor(
        random_state=42,
        use_label_encoder=False,
        eval_metric="rmse"
    )
}

# 7. 학습 & 평가
results = {}
for name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2  = r2_score(y_test, y_pred)
    results[name] = (mse, r2)

# 8. 결과 출력
print("모델\t\tMSE\t\tR²")
for name, (mse, r2) in results.items():
    print(f"{name:16s}{mse:8.4f}\t{r2:8.4f}")


모델		MSE		R²
RandomForest      2.0703	 -0.1220
GradientBoosting  2.4507	 -0.3282
XGBoost           2.2560	 -0.2226


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
