In [9]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression

# データの読み込み
data = pd.read_csv('drive/MyDrive/心不全予測/train.csv')

# 特徴量とターゲット変数の分割
X = data.drop(columns=['id','target'],axis=1)
y = data['target']

# 訓練データとテストデータに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 基本学習器の定義
models = [
    RandomForestClassifier(n_estimators=100, random_state=42),
    KNeighborsClassifier(n_neighbors=5),
    SVC(kernel='rbf', probability=True)
]

# 基本学習器の予測結果を格納する配列
X_train_meta = np.zeros((len(X_train), len(models)))
X_test_meta = np.zeros((len(X_test), len(models)))

# 基本学習器の訓練と予測
for i, model in enumerate(models):
    model.fit(X_train, y_train)
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)
    X_train_meta[:, i] = y_pred_train
    X_test_meta[:, i] = y_pred_test

# メタモデルの定義と訓練
meta_model = LogisticRegression()
meta_model.fit(X_train_meta, y_train)

# メタモデルの予測
y_pred_train_meta = meta_model.predict(X_train_meta)
y_pred_test_meta = meta_model.predict(X_test_meta)

# 精度の評価
accuracy_train = accuracy_score(y_train, y_pred_train_meta)
accuracy_test = accuracy_score(y_test, y_pred_test_meta)
print("Training Accuracy:", accuracy_train)
print("Test Accuracy:", accuracy_test)


Training Accuracy: 1.0
Test Accuracy: 0.87


In [10]:
# テストデータのCSVファイルを読み込む
test_df = pd.read_csv('drive/MyDrive/心不全予測/test.csv')

# テストデータから特徴量を取得
X_test = test_df.drop('id', axis=1)

# テストデータでの予測
y_pred = model.predict(X_test)
y_pred = [1 if p >= 0.5 else 0 for p in y_pred]

# 予測結果を持つDataFrameを作成
submission_df = pd.DataFrame({
    'index': test_df['id'],  # テストデータのインデックス
    'prediction': y_pred        # 予測結果
})

# CSVファイルとして保存（ヘッダ無し）
submission_df.to_csv('drive/MyDrive/心不全予測/スタッキング_1.csv',header=False, index=False)

In [11]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# データの読み込み
data = pd.read_csv('drive/MyDrive/心不全予測/train.csv')

# 特徴量とターゲット変数の分割
X = data.drop(columns=['id','target'], axis=1)
y = data['target']

# 訓練データとテストデータに分割
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 基本学習器の定義
models = [
    RandomForestClassifier(n_estimators=100, random_state=42),
    KNeighborsClassifier(n_neighbors=5),
    SVC(kernel='rbf', probability=True)
]

# 基本学習器の予測結果を格納する配列
X_train_meta = np.zeros((len(X_train), len(models)))
X_test_meta = np.zeros((len(X_test), len(models)))

# 基本学習器の訓練と予測
for i, model in enumerate(models):
    model.fit(X_train, y_train)
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)
    X_train_meta[:, i] = y_pred_train
    X_test_meta[:, i] = y_pred_test

# メタモデルとしてニューラルネットワークを定義
meta_model = Sequential([
    Dense(128, activation='relu', input_shape=(len(models),)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(1, activation='sigmoid')
])

# メタモデルのコンパイル
meta_model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 基本学習器の予測結果を標準化
scaler = StandardScaler()
X_train_meta_scaled = scaler.fit_transform(X_train_meta)
X_test_meta_scaled = scaler.transform(X_test_meta)

# メタモデルのトレーニング
meta_model.fit(X_train_meta_scaled, y_train, epochs=50, batch_size=32, verbose=1)

# メタモデルの予測
y_pred_train_meta = meta_model.predict(X_train_meta_scaled)
y_pred_test_meta = meta_model.predict(X_test_meta_scaled)

# 二値化（0.5以上を1、それ以下を0とする）
y_pred_train_meta_binary = np.where(y_pred_train_meta >= 0.5, 1, 0)
y_pred_test_meta_binary = np.where(y_pred_test_meta >= 0.5, 1, 0)

# 精度の評価
accuracy_train = accuracy_score(y_train, y_pred_train_meta_binary)
accuracy_test = accuracy_score(y_test, y_pred_test_meta_binary)
print("Training Accuracy:", accuracy_train)
print("Test Accuracy:", accuracy_test)


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Training Accuracy: 1.0
Test Accuracy: 0.87


In [12]:
# テストデータのCSVファイルを読み込む
test_df = pd.read_csv('drive/MyDrive/心不全予測/test.csv')

# テストデータから特徴量を取得
X_test = test_df.drop('id', axis=1)

# テストデータでの予測
y_pred = model.predict(X_test)
y_pred = [1 if p >= 0.5 else 0 for p in y_pred]

# 予測結果を持つDataFrameを作成
submission_df = pd.DataFrame({
    'index': test_df['id'],  # テストデータのインデックス
    'prediction': y_pred        # 予測結果
})

# CSVファイルとして保存（ヘッダ無し）
submission_df.to_csv('drive/MyDrive/心不全予測/スタッキング_2.csv',header=False, index=False)