In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## lightGBMで予測する

In [6]:
import pandas as pd
import numpy as np
import lightgbm as lgb
import os

# データの読み込み
test_file_path = '/content/drive/My Drive/hotel_service/data/test_0111_1.csv'
test = pd.read_csv(test_file_path, low_memory=False)

# 保存済みLightGBMモデルのロード
lgb_model_path = '/content/drive/My Drive/models/lgbm_final_model_0111_1.txt'
lgb_model = lgb.Booster(model_file=lgb_model_path)

# ✅ LightGBM用前処理（カテゴリ変数をcategory型に変換）
categorical_features = list(test.select_dtypes(include='object').columns)
for col in categorical_features:
    test[col] = test[col].astype('category')

# ✅ 'id'列を除いた特徴量で予測
X_test = test.drop(columns=['id']).copy()

# ✅ 予測の実行
y_pred = lgb_model.predict(X_test)

# ✅ 提出ファイルの作成（ヘッダーなしでCSV保存）
submission = pd.DataFrame({
    'id': test['id'],
    'y': y_pred
})

# ✅ ファイル保存（フォルダが存在しない場合は作成）
submission_file_path = '/content/drive/My Drive/hotel_service/submission/submission_0111_4.csv'
os.makedirs(os.path.dirname(submission_file_path), exist_ok=True)
submission.to_csv(submission_file_path, index=False, header=False)

print(f"✅ 提出ファイルが保存されました！場所: {submission_file_path}")


Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.



✅ 提出ファイルが保存されました！場所: /content/drive/My Drive/hotel_service/submission/submission_0111_4.csv


In [7]:
submission.head()

Unnamed: 0,id,y
0,0,188.635686
1,1,129.684333
2,2,99.704552
3,3,131.254351
4,4,126.702345


In [2]:
!pip install catboost

Collecting catboost
  Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl.metadata (1.2 kB)
Downloading catboost-1.2.7-cp310-cp310-manylinux2014_x86_64.whl (98.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m98.7/98.7 MB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: catboost
Successfully installed catboost-1.2.7


### catboostで予測する

In [8]:
import pandas as pd
import numpy as np
from catboost import CatBoostRegressor, Pool
import os

# テストデータの読み込み
test_file_path = '/content/drive/My Drive/hotel_service/data/test_0111_1.csv'
test = pd.read_csv(test_file_path, low_memory=False)

# CatBoostモデルのロード
model_path = '/content/drive/My Drive/hotel_service/model/catboost_0111_4.cbm'
cat_model = CatBoostRegressor()
cat_model.load_model(model_path)

# カテゴリカル変数リストの作成（テストデータから確認）
categorical_features = list(test.select_dtypes(include='object').columns)

# NaNの処理（カテゴリ変数は'missing'に、数値は中央値で補完）
for col in categorical_features:
    test[col] = test[col].fillna("missing").astype(str)  # CatBoost用にNaNを文字列変換
for col in test.select_dtypes(include=[np.number]).columns:
    test[col] = test[col].fillna(test[col].median())  # 数値変数は中央値で補完

# 'id'と'targetエンコーディング列'を除いたデータで予測
X_test = test.drop(columns=['id']).copy()

# 予測の実行
y_pred = cat_model.predict(X_test)

# 提出ファイルの作成（ヘッダーなし）
submission = pd.DataFrame({
    'id': test['id'],
    'y': y_pred
})

# 提出ファイルの保存
submission_file_path = '/content/drive/My Drive/hotel_service/submission/submission_0111_5.csv'
os.makedirs(os.path.dirname(submission_file_path), exist_ok=True)  # フォルダが存在しない場合の作成
submission.to_csv(submission_file_path, index=False, header=False)

print(f"✅ 提出ファイルが保存されました！場所: {submission_file_path}")


✅ 提出ファイルが保存されました！場所: /content/drive/My Drive/hotel_service/submission/submission_0111_5.csv


In [9]:
submission.head()

Unnamed: 0,id,y
0,0,223.19186
1,1,142.575656
2,2,126.900222
3,3,141.887829
4,4,165.240886


In [None]:
submission.head()

Unnamed: 0,id,y
0,0,224.687688
1,1,130.025444
2,2,107.263757
3,3,133.796716
4,4,148.251916


In [None]:
# テストデータの読み込み
submission_file_path = '/content/drive/My Drive/hotel_service/submission/sample_submit.csv'
submission = pd.read_csv(submission_file_path, low_memory=False)

submission.head()

Unnamed: 0,0,10
0,1,10
1,2,10
2,3,10
3,4,10
4,5,10
