# LightGBMモデルでトレーニングする

In [None]:
!pip install lightgbm

In [None]:
# 必要なライブラリのインポート
from sklearn.model_selection import GridSearchCV
import lightgbm as lgb
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# データの読み込み
data_path = '/Users/hayakawakazue/Downloads/house_price/train/low_price_Important_Features.csv'
data = pd.read_csv(data_path)

# 選択された特徴量
important_features = [
    'OverallConditionArea', 'OverallQual_TotalArea', 'Age', 'TotalBsmtSF_OverallQual', 
    'BsmtUnfSF', 'TotalArea', 'YearBuilt', 'GrLivArea_OverallQual', 'OverallQual_GrLivArea', 
    'RemodelAge', 'QualityScore', '1stFlrSF', 'log_1stFlrSF', 'GarageArea', 'TotalRmsAbvGrd_OverallCond', 
    'LotArea', 'log_LotArea', 'GarageYrBlt', 'BsmtFinType1', 'LotFrontage'
]

# 特徴量と目的変数を分ける
X = data[important_features]
y = data['SalePrice']

# データの分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# ハイパーパラメータの設定
param_grid = {
    'num_leaves': [31, 50],
    'learning_rate': [0.05, 0.1],
    'feature_fraction': [0.8, 0.9]
}

# LightGBMモデルの初期化
lgb_estimator = lgb.LGBMRegressor(objective='regression', boosting_type='gbdt')

# GridSearchCVの設定
grid = GridSearchCV(estimator=lgb_estimator, param_grid=param_grid, scoring='neg_mean_absolute_error', cv=10, verbose=0)

# グリッドサーチの実行
grid.fit(X_train, y_train)

# 最適パラメータの表示
print(f"Best parameters found: {grid.best_params_}")

# 最適パラメータでのモデル訓練
best_params = grid.best_params_

# LightGBMのデータセット作成
train_data = lgb.Dataset(X_train, label=y_train)
valid_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

params = {
    'objective': 'regression',
    'metric': 'mae',
    'boosting_type': 'gbdt',
    'num_leaves': best_params['num_leaves'],
    'learning_rate': best_params['learning_rate'],
    'feature_fraction': best_params['feature_fraction']
}

model = lgb.train(
    params,
    train_data,
    valid_sets=[valid_data],
    num_boost_round=1000,
    callbacks=[lgb.early_stopping(stopping_rounds=100)]
)

# 予測
y_pred = model.predict(X_test, num_iteration=model.best_iteration)

# 評価
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
rmse = mse ** 0.5
mape = np.mean(np.abs((y_test - y_pred) / y_test)) * 100

print(f"MAE: {mae}")
print(f"MSE: {mse}")
print(f"R2: {r2}")
print(f"RMSE: {rmse}")
print(f"MAPE: {mape}%")

