In [1]:
import pandas as pd
import joblib

# 数値データのみのテストデータを読み込む
test_data_path = '/Users/hayakawakazue/Downloads/house_price/test/test_preprocessed.csv'
test_data = pd.read_csv(test_data_path)

# モデルの読み込み
model_path = '/Users/hayakawakazue/Downloads/house_price/model/random_forest_best.joblib'
model = joblib.load(model_path)

# 重要な特徴量のリスト
important_features = [
    'TotalArea', 'QualityScore', 'OverallQual', 'GrLivArea', 
    'GarageScore', 'ExterQual', 'KitchenQual', 
    'GarageCars', 'BsmtQual', 'GarageArea', 'TotalBsmtSF', 
    'BsmtQualityIndex', '1stFlrSF', 'AvgQualityCondition', 'QualityCondition',
    'FullBath', 'YearBuilt', 'YearRemodAdd', 'TotalRooms', 'TotRmsAbvGrd'
]

# テストデータから重要な特徴量を選択
X_test = test_data[important_features]

# 取引価格のログを予測
y_pred_log = model.predict(X_test)

# 予測結果をデータフレームに追加
test_data['SalePrice'] = y_pred_log

# 予測結果を保存
predicted_test_path = '/Users/hayakawakazue/Downloads/house_price/test/test_data_predicted.csv'
test_data.to_csv(predicted_test_path, index=False)

# データの行数と列数をプリントする
print(f"\nデータの行数: {test_data.shape[0]}")
print(f"データの列数: {test_data.shape[1]}")

print(test_data.head())
print(f"予測結果を保存しました: {predicted_test_path}")



データの行数: 1459
データの列数: 22
   TotalArea  QualityScore  OverallQual  GrLivArea  GarageScore  ExterQual  \
0  -0.592104     -1.062297    -0.795151  -1.262931    -0.349711  -0.689604   
1  -0.062643     -0.490728    -0.071836  -0.363117    -0.962773  -0.689604   
2   0.016432     -0.490728    -0.795151   0.260310    -0.006514  -0.689604   
3  -0.028262      0.080842    -0.071836   0.208358    -0.041714  -0.689604   
4   0.047375      1.223981     1.374795  -0.464944     0.063885   1.052302   

   KitchenQual  GarageCars  BsmtQual  GarageArea  ...  1stFlrSF  \
0    -0.771091   -1.030859 -0.833633    1.252588  ... -0.720132   
1     0.735994   -1.030859 -0.833633   -0.766396  ...  0.474487   
2    -0.771091    0.315946  0.641645    0.054722  ... -0.631846   
3     0.735994    0.315946 -0.833633   -0.003240  ... -0.637364   
4     0.735994    0.315946  0.641645    0.170644  ...  0.339299   

   AvgQualityCondition  QualityCondition  FullBath  YearBuilt  YearRemodAdd  \
0            -0.398511  

# 予測したSalePriceを直接sample_submission.csvのSalePriceカラムに代入

In [2]:
import pandas as pd

# データの読み込み
sample_submission_path = '/Users/hayakawakazue/Downloads/house_price/sample_submission.csv'
sample_submission = pd.read_csv(sample_submission_path)

# 予測結果のデータ
predicted_data_path = '/Users/hayakawakazue/Downloads/house_price/test/test_data_predicted.csv'
predicted_data = pd.read_csv(predicted_data_path)

# IDを軸にして予測データをマージ（左結合でsample_submissionの順序を保持）
submission = pd.merge(sample_submission[['Id']], predicted_data[['Id', 'SalePrice']], on='Id', how='left')

# ファイルの保存
submission_path = '/Users/hayakawakazue/Downloads/house_price/submission_2024_07_03.csv'
submission.to_csv(submission_path, index=False)

# 結果の確認
print(submission.head())
print(f"予測結果を保存しました: {submission_path}")


     Id      SalePrice
0  1461  127893.449143
1  1462  154157.039617
2  1463  165214.304381
3  1464  178770.622678
4  1465  208825.434472
予測結果を保存しました: /Users/hayakawakazue/Downloads/house_price/submission_2024_07_03.csv
