In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.ensemble import GradientBoostingRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import VotingRegressor
import joblib
import os
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error

# 保存されたモデルの読み込み
xgb_model = joblib.load('/Users/hayakawakazue/Downloads/second_apt_2024_summer/model/xgboost_0806_1.joblib')
gbr_model = joblib.load('/Users/hayakawakazue/Downloads/second_apt_2024_summer/model/gradient_boosting_0806_1.joblib')
cat_model = joblib.load('/Users/hayakawakazue/Downloads/second_apt_2024_summer/model/catboost_model_0806_1.joblib')

# 特徴量と目的変数の再分割（念のため）
data_path = '/Users/hayakawakazue/Downloads/second_apt_2024_summer/train/train_31_32_35_36_37_38_39/train_31_32_35_36_37_38_39_scaled_features.csv'
data = pd.read_csv(data_path)

# 特徴量を選択する
selected_features = [
    '長期価格トレンド', '建築年', '建築年スコア', 'エリア人気度スコア', 
    '駅距離_面積比', '面積/築年数比', '取引時点', '市区町村コード', 
    '面積×容積率', '最寄駅：距離（分）'
]

X = data[selected_features]
y = data['取引価格（総額）_log']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=52)

# アンサンブルモデルの定義
ensemble_model = VotingRegressor(
    estimators=[
        ('xgb', xgb_model),
        ('gbr', gbr_model),
        ('cat', cat_model)
    ]
)

# アンサンブルモデルのトレーニング
ensemble_model.fit(X_train, y_train)

# 予測の実行
ensemble_pred = ensemble_model.predict(X_test)

# 評価指標の計算
mae = mean_absolute_error(y_test, ensemble_pred)
mse = mean_squared_error(y_test, ensemble_pred)
r2 = r2_score(y_test, ensemble_pred)
rmse = np.sqrt(mse)
mape = mean_absolute_percentage_error(y_test, ensemble_pred) * 100

# 結果の表示
print(f'Mean Absolute Error (MAE): {mae:.4f}')
print(f'Mean Squared Error (MSE): {mse:.4f}')
print(f'R-squared (R2): {r2:.4f}')
print(f'Root Mean Squared Error (RMSE): {rmse:.4f}')

# 実際の値の平均を計算
mean_actual_value = np.mean(y_test)
print(f'Mean Actual Value: {mean_actual_value}')

# 誤差のパーセンテージを計算
print(f'Mean Absolute Percentage Error (MAPE): {mape:.2f}%')

# アンサンブルモデルの保存
ensemble_model_path = os.path.join('/Users/hayakawakazue/Downloads/second_apt_2024_summer/model', 'ensemble_model_0806_1.joblib')
joblib.dump(ensemble_model, ensemble_model_path)
print(f"最適なアンサンブルモデルが {ensemble_model_path} に保存されました。")


Mean Absolute Error (MAE): 0.0845
Mean Squared Error (MSE): 0.0129
R-squared (R2): 0.8487
Root Mean Squared Error (RMSE): 0.1136
Mean Actual Value: 7.05634890397467
Mean Absolute Percentage Error (MAPE): 1.22%
最適なアンサンブルモデルが /Users/hayakawakazue/Downloads/second_apt_2024_summer/model/ensemble_model_0806_1.joblib に保存されました。


In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import xgboost as xgb
from sklearn.ensemble import GradientBoostingRegressor
from catboost import CatBoostRegressor
from sklearn.ensemble import VotingRegressor
import joblib
import os
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, mean_absolute_percentage_error

# 保存されたモデルの読み込み
gbr_model = joblib.load('/Users/hayakawakazue/Downloads/second_apt_2024_summer/model/gradient_boosting_0806_2.joblib')
cat_model = joblib.load('/Users/hayakawakazue/Downloads/second_apt_2024_summer/model/catboost_model_0806_2.joblib')

# 特徴量と目的変数の再分割（念のため）
data_path = '/Users/hayakawakazue/Downloads/second_apt_2024_summer/train/train_41_42_43_44_45_46_47/train_41_42_43_44_45_46_47_scaled_features.csv'
data = pd.read_csv(data_path)

# 特徴量の選択
selected_features = [
    '長期価格トレンド', 'エリア人気度スコア', '市区町村コード', 
    '取引時点', '駅距離_面積比', '面積/築年数比', '建築年スコア', 
    '築年数', '面積×容積率', '建築年×建ぺい率'
]

X = data[selected_features]
y = data['取引価格（総額）_log']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=52)

# アンサンブルモデルの定義
ensemble_model = VotingRegressor(
    estimators=[
        ('gbr', gbr_model),
        ('cat', cat_model)
    ]
)

# アンサンブルモデルのトレーニング
ensemble_model.fit(X_train, y_train)

# 予測の実行
ensemble_pred = ensemble_model.predict(X_test)

# 評価指標の計算
mae = mean_absolute_error(y_test, ensemble_pred)
mse = mean_squared_error(y_test, ensemble_pred)
r2 = r2_score(y_test, ensemble_pred)
rmse = np.sqrt(mse)
mape = mean_absolute_percentage_error(y_test, ensemble_pred) * 100

# 結果の表示
print(f'Mean Absolute Error (MAE): {mae:.4f}')
print(f'Mean Squared Error (MSE): {mse:.4f}')
print(f'R-squared (R2): {r2:.4f}')
print(f'Root Mean Squared Error (RMSE): {rmse:.4f}')

# 実際の値の平均を計算
mean_actual_value = np.mean(y_test)
print(f'Mean Actual Value: {mean_actual_value}')

# 誤差のパーセンテージを計算
print(f'Mean Absolute Percentage Error (MAPE): {mape:.2f}%')

# アンサンブルモデルの保存
ensemble_model_path = os.path.join('/Users/hayakawakazue/Downloads/second_apt_2024_summer/model', 'ensemble_model_0806_2.joblib')
joblib.dump(ensemble_model, ensemble_model_path)
print(f"最適なアンサンブルモデルが {ensemble_model_path} に保存されました。")


Mean Absolute Error (MAE): 0.0905
Mean Squared Error (MSE): 0.0154
R-squared (R2): 0.8647
Root Mean Squared Error (RMSE): 0.1241
Mean Actual Value: 7.105449318868734
Mean Absolute Percentage Error (MAPE): 1.29%
最適なアンサンブルモデルが /Users/hayakawakazue/Downloads/second_apt_2024_summer/model/ensemble_model_0806_2.joblib に保存されました。
