In [1]:
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler

# 数値データのみのテストデータを読み込む
test_data_path = '/Users/hayakawakazue/Downloads/house_price/test/test_encoded.csv'
test_data = pd.read_csv(test_data_path)

# 重要な特徴量のリスト
important_features = [
    'TotalArea', 'QualityScore', 'OverallQual', 'GrLivArea', 
    'GarageScore', 'ExterQual', 'KitchenQual', 
    'GarageCars', 'BsmtQual', 'GarageArea', 'TotalBsmtSF', 
    'BsmtQualityIndex', '1stFlrSF', 'AvgQualityCondition', 'QualityCondition',
    'FullBath', 'YearBuilt', 'YearRemodAdd', 'TotalRooms', 'TotRmsAbvGrd'
]

# テストデータから重要な特徴量を選択
X_test = test_data[important_features]

# スケーラーを読み込む
scaler_path = '/Users/hayakawakazue/Downloads/house_price/model/train_scaler.joblib'
scaler = joblib.load(scaler_path)

# スケーリングの適用
X_test_scaled = scaler.transform(X_test)

# 前処理後のデータフレームに変換
X_test_scaled_df = pd.DataFrame(X_test_scaled, columns=important_features)

# ID列を保持するために結合
X_test_scaled_df['Id'] = test_data['Id'].values

# 前処理済みのデータを保存
preprocessed_test_path = '/Users/hayakawakazue/Downloads/house_price/test/test_preprocessed.csv'
X_test_scaled_df.to_csv(preprocessed_test_path, index=False)

print(X_test_scaled_df.head())

print(f"予測の準備が出来たデータセットを保存しました: {preprocessed_test_path}")


   TotalArea  QualityScore  OverallQual  GrLivArea  GarageScore  ExterQual  \
0  -0.592104     -1.062297    -0.795151  -1.262931    -0.349711  -0.689604   
1  -0.062643     -0.490728    -0.071836  -0.363117    -0.962773  -0.689604   
2   0.016432     -0.490728    -0.795151   0.260310    -0.006514  -0.689604   
3  -0.028262      0.080842    -0.071836   0.208358    -0.041714  -0.689604   
4   0.047375      1.223981     1.374795  -0.464944     0.063885   1.052302   

   KitchenQual  GarageCars  BsmtQual  GarageArea  ...  BsmtQualityIndex  \
0    -0.771091   -1.030859 -0.833633    1.252588  ...         -0.727880   
1     0.735994   -1.030859 -0.833633   -0.766396  ...         -0.727880   
2    -0.771091    0.315946  0.641645    0.054722  ...          0.502577   
3     0.735994    0.315946 -0.833633   -0.003240  ...         -0.727880   
4     0.735994    0.315946  0.641645    0.170644  ...          0.502577   

   1stFlrSF  AvgQualityCondition  QualityCondition  FullBath  YearBuilt  \
0 -0.