In [5]:
import pandas as pd
from sklearn.model_selection import train_test_split
from lightgbm import LGBMClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler



In [6]:
# データの読み込み
data = pd.read_csv('drive/MyDrive/心不全予測/train.csv')

# 特徴量とターゲット変数の分割
X = data.drop(columns=['id','target'], axis=1)
y = data['target']

# データの標準化
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 訓練データとテストデータに分割
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# LightGBMモデルの定義とトレーニング
model = LGBMClassifier()
model.fit(X_train, y_train)

# テストデータでの予測
y_pred = model.predict(X_test)

# 精度の評価
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)


[LightGBM] [Info] Number of positive: 160, number of negative: 640
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000178 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 747
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 12
[LightGBM] [Info] [binary:BoostFromScore]: pavg=0.200000 -> initscore=-1.386294
[LightGBM] [Info] Start training from score -1.386294
Accuracy: 0.835


In [7]:
# テストデータのCSVファイルを読み込む
test_df = pd.read_csv('drive/MyDrive/心不全予測/test.csv')

# テストデータから特徴量を取得
X_test = test_df.drop('id', axis=1)
# テストデータでの予測
y_pred = model.predict(X_test)
y_pred = [1 if p >= 0.5 else 0 for p in y_pred]

# 予測結果を持つDataFrameを作成
submission_df = pd.DataFrame({
    'index': test_df['id'],  # テストデータのインデックス
    'prediction': y_pred        # 予測結果
})

# CSVファイルとして保存（ヘッダ無し）
submission_df.to_csv('drive/MyDrive/心不全予測/LightGBM_2.csv', header=False, index=False)


In [8]:
# テストデータのCSVファイルを読み込む
df = pd.read_csv('drive/MyDrive/心不全予測/LightGBM_2.csv')
print(df.head())

   1  0
0  2  0
1  4  0
2  5  0
3  6  0
4  8  0
