In [14]:
import pandas as pd
import joblib
from sklearn.preprocessing import StandardScaler

# 数値データのみのテストデータを読み込む
test_data_path = '/Users/hayakawakazue/Downloads/house_price/test/test_encoded.csv'
test_data = pd.read_csv(test_data_path)

# 選択された特徴量
important_features = [
    'MSSubClass', 'LotFrontage', 'LotArea', 'OverallQual', 'OverallCond', 'YearBuilt', 
    'YearRemodAdd', 'ExterQual', 'BsmtQual', 'BsmtFinType1', 'BsmtUnfSF', 'TotalBsmtSF', 
    'HeatingQC', '1stFlrSF', 'GrLivArea', 'FullBath', 'KitchenQual', 'TotRmsAbvGrd', 
    'GarageYrBlt', 'GarageFinish', 'GarageCars', 'GarageArea', 'GrLivArea_OverallQual', 
    'TotalBsmtSF_OverallQual', 'TotalArea', 'QualityScore', 'Age', 'RemodelAge', 
    'OverallConditionArea', 'TotalRmsAbvGrd_OverallCond', 'Functional_OverallQual', 
    'log_LotArea', 'sqrt_GrLivArea', 'OverallQual_GrLivArea', 'log_1stFlrSF', 'sqrt_TotalBsmtSF', 
    'OverallQual_TotalArea', 'BsmtQual_ExterQual', 'Exterior2nd_VinylSd', 'Foundation_CBlock', 
    'Foundation_PConc', 'GarageType_Detchd'
]

# テストデータから重要な特徴量を選択
X_test = test_data[important_features]

# スケーラーを読み込む
scaler_path = '/Users/hayakawakazue/Downloads/house_price/model/scaler.joblib'
scaler = joblib.load(scaler_path)

# スケーリングの適用
X_test_scaled = scaler.transform(X_test)

# 前処理後のデータフレームに変換
X_test_scaled_df = pd.DataFrame(X_test_scaled, columns=important_features)

# ID列を保持するために結合
X_test_scaled_df['Id'] = test_data['Id'].values

# 前処理済みのデータを保存
preprocessed_test_path = '/Users/hayakawakazue/Downloads/house_price/test/test_preprocessed.csv'
X_test_scaled_df.to_csv(preprocessed_test_path, index=False)

print(X_test_scaled_df.head())

print(f"予測の準備が出来たデータセットを保存しました: {preprocessed_test_path}")


   MSSubClass  LotFrontage   LotArea  OverallQual  OverallCond  YearBuilt  \
0   -0.872563     0.633321  0.549553    -0.795151     0.381743  -0.342192   
1   -0.872563     0.691373  1.285681    -0.071836     0.381743  -0.441866   
2    0.073375     0.285010  1.164060    -0.795151    -0.517200   0.853898   
3    0.073375     0.517217  0.092012    -0.071836     0.381743   0.887122   
4    1.492282    -1.514594 -1.292020     1.374795    -0.517200   0.687774   

   YearRemodAdd  ExterQual  BsmtQual  BsmtFinType1  ...  \
0     -1.156380  -0.689604 -0.833633     -0.275966  ...   
1     -1.301740  -0.689604 -0.833633      0.690246  ...   
2      0.636400  -0.689604  0.641645      1.173351  ...   
3      0.636400  -0.689604 -0.833633      1.173351  ...   
4      0.345679   1.052302  0.641645      0.690246  ...   

   OverallQual_GrLivArea  log_1stFlrSF  sqrt_TotalBsmtSF  \
0              -1.080550     -0.665286         -0.290137   
1              -0.338664      0.598927          0.699279   
2 