In [3]:
!kaggle competitions download -c titanic --path ./data
!unzip -o ./data/titanic.zip -d ./data

titanic.zip: Skipping, found more recently modified local copy (use --force to force download)
Archive:  ./data/titanic.zip
  inflating: ./data/gender_submission.csv  
  inflating: ./data/test.csv         
  inflating: ./data/train.csv        


In [1]:
import polars as pl

train = pl.read_csv("./data/train.csv")
test = pl.read_csv("./data/test.csv")

In [4]:
train = train.with_columns(
  (pl.col("Sex") == "male").alias("IsMale"),
)
test = test.with_columns(
  (pl.col("Sex") == "male").alias("IsMale"),
)

In [5]:
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

use_cols = [
    "Pclass",
    "IsMale",
    "Age",
    "SibSp",
    "Parch",
    "Fare",
]
target_col = "Survived"

# 目的変数と説明変数
x = train.select(use_cols)
y = train[target_col]

# 学習用・検証用データに分割
x_train, x_valid, y_train, y_valid = train_test_split(x.to_pandas(), y.to_pandas(), test_size=0.2, random_state=42)

# LightGBMデータセット作成
train_data = lgb.Dataset(x_train, label=y_train)
valid_data = lgb.Dataset(x_valid, label=y_valid)

# パラメータ設定（シンプルな例）
params = {
    "objective": "binary",
    "metric": "binary_error",
    "verbosity": -1,
    "seed": 42,
}

# モデル学習
model = lgb.train(params, train_data, valid_sets=[valid_data], num_boost_round=100, callbacks=[lgb.early_stopping(10)])

# 検証データで予測
y_pred = model.predict(x_valid)
y_pred_label = (y_pred > 0.5).astype(int)

# 精度表示
print("Accuracy:", accuracy_score(y_valid, y_pred_label))

Training until validation scores don't improve for 10 rounds
Early stopping, best iteration is:
[25]	valid_0's binary_error: 0.178771
Accuracy: 0.8212290502793296


In [6]:
# テストデータの特徴量を学習時と同じカラムで抽出
test_x = test.select(use_cols).to_pandas()

# 欠損値補完（学習時と同様の処理が必要）
# ここでは簡単に平均値で補完
# test_x = test_x.fillna(test_x.mean())

# 予測
test_pred = model.predict(test_x)
test_pred_label = (test_pred > 0.5).astype(int)

# PassengerIdと予測結果を組み合わせてsubmissionデータフレーム作成
submission = test.select(["PassengerId"]).to_pandas()
submission[target_col] = test_pred_label

# CSVとして保存
submission.to_csv("./data/submission.csv", index=False)

`kaggle competitions submit -c titanic -f data/submission.csv -m "Message"`