In [9]:
from google.colab import drive

# Google Driveをマウント
drive.mount('/content/drive')

# マウント完了後のメッセージ
print("Google Driveがマウントされました！")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Google Driveがマウントされました！


In [10]:
!pip install -U scikit-learn tensorflow




In [11]:
import tensorflow as tf

# GPUメモリの動的割り当てを有効化
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        # セッション開始前にメモリの設定を変更
        tf.config.experimental.set_memory_growth(gpus[0], True)
        print("メモリ動的制御を有効化しました。")
    except RuntimeError as e:
        print(e)
else:
    print("GPUが見つかりませんでした。")


メモリ動的制御を有効化しました。


In [12]:
!pip install optuna



In [13]:
import optuna
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import log_loss
from sklearn.utils.class_weight import compute_class_weight
import pandas as pd
import numpy as np
import joblib
import os

# 保存先のパスを指定
save_path = '/content/drive/My Drive/signate/submission'
os.makedirs(save_path, exist_ok=True)

# 乱数シードの固定
tf.random.set_seed(42)
np.random.seed(42)

# 1. データの前処理関数
def load_and_preprocess_data(file_path):
    train = pd.read_csv(file_path, low_memory=False, sep='\t')

    # ✅ 新しい特徴量の作成 (ここに移動しました)
    train['C2_I11_interaction'] = train['C2'] * train['I11']
    train['I5_I12_I6_sum'] = train['I5'] + train['I12'] + train['I6']

    target_column = 'click'
    X = train.drop(columns=[target_column, 'id'])
    y = train[target_column]

    # データ分割
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # スケーリング
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    joblib.dump(scaler, os.path.join(save_path, 'scaler_0103.pkl'))

    return X_train_scaled, X_test_scaled, y_train, y_test

# データのロード
file_path = '/content/drive/My Drive/signate/train/train_0101.tsv'
X_train_scaled, X_test_scaled, y_train, y_test = load_and_preprocess_data(file_path)

# ===========================
# Optunaの目的関数の定義
# ===========================
def objective(trial):
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
    batch_size = trial.suggest_categorical('batch_size', [512, 1024, 2048])
    units_1 = trial.suggest_int('units_1', 32, 128, step=32)
    units_2 = trial.suggest_int('units_2', 32, 128, step=32)
    dropout_rate = trial.suggest_float('dropout_rate', 0.1, 0.5)

    # モデルの構築
    model = Sequential([
        Dense(units_1, activation='relu'),
        Dropout(dropout_rate),
        Dense(units_2, activation='relu'),
        Dropout(dropout_rate),
        Dense(1, activation='sigmoid', kernel_regularizer=l2(0.001))
    ])

    # モデルのコンパイル
    model.compile(
        optimizer=Adam(learning_rate=learning_rate),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    # ✅ クラスの重み計算（修正済み）
    existing_classes = np.unique(y_train.values)
    class_weights_values = compute_class_weight(
        class_weight='balanced',
        classes=existing_classes,
        y=y_train.values
    )
    # class_weights_dict = {label: weight for label, weight in zip(existing_classes, class_weights_values)}

    # ✅ モデルのトレーニング（修正済み）
    early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
    history = model.fit(
        X_train_scaled, y_train.values,  # `y_train.values` を明示的に変換
        validation_split=0.2,
        batch_size=batch_size,
        epochs=50,
        #class_weight=class_weights_dict,
        callbacks=[early_stopping],
        verbose=0
    )

    # 6. モデルの評価
    y_pred_prob = model.predict(X_test_scaled).flatten()
    y_pred_prob = np.clip(y_pred_prob, 1e-7, 1-1e-7)
    loss = log_loss(y_test, y_pred_prob)

    return loss

# ===========================
# Optunaの実行
# ===========================
study = optuna.create_study(direction='minimize', sampler=optuna.samplers.TPESampler(seed=42))
study.optimize(objective, n_trials=20, show_progress_bar=True)

# 最適なハイパーパラメータの表示
print("Best parameters:", study.best_params)
print("Best logloss:", study.best_value)


[I 2025-01-04 07:50:23,442] A new study created in memory with name: no-name-be67ef4f-312c-440c-b5bf-3fb7aabf8cac


  0%|          | 0/20 [00:00<?, ?it/s]

[1m7307/7307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1ms/step
[I 2025-01-04 07:52:41,184] Trial 0 finished with value: 0.21536789021748523 and parameters: {'learning_rate': 0.0001329291894316216, 'batch_size': 512, 'units_1': 32, 'units_2': 32, 'dropout_rate': 0.12323344486727979}. Best is trial 0 with value: 0.21536789021748523.
[1m7307/7307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1ms/step
[I 2025-01-04 07:53:46,533] Trial 1 finished with value: 0.21251690207788487 and parameters: {'learning_rate': 0.003967605077052989, 'batch_size': 1024, 'units_1': 128, 'units_2': 128, 'dropout_rate': 0.18493564427131048}. Best is trial 1 with value: 0.21251690207788487.
[1m7307/7307[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1ms/step
[I 2025-01-04 07:54:36,683] Trial 2 finished with value: 0.2212924077781052 and parameters: {'learning_rate': 3.511356313970405e-05, 'batch_size': 2048, 'units_1': 64, 'units_2': 64, 'dropout_rate': 0.34474115788895177}. Best i

In [14]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.regularizers import l2
from sklearn.metrics import log_loss
import joblib
import numpy as np
import os

# 保存先のパス設定
save_path = '/content/drive/My Drive/signate/submission'
os.makedirs(save_path, exist_ok=True)

# ✅ 保存済みのスケーラーをロード
scaler_path = os.path.join(save_path, 'scaler_0103.pkl')
scaler = joblib.load(scaler_path)
print(f"スケーラーをロードしました: {scaler_path}")

# ✅ トレーニングデータをそのまま使用
X_train_scaled, X_test_scaled, y_train, y_test = X_train_scaled, X_test_scaled, y_train, y_test

# ✅ 最適化されたパラメータを一つ一つ設定
learning_rate = 0.0012106581337174955
batch_size = 1024
units_1 = 128
units_2 = 96
dropout_rate = 0.23605961372266365

# ===========================
# 1. モデルの構築
# ===========================
model = Sequential([
    Dense(units_1, activation='relu'),
    Dropout(dropout_rate),
    Dense(units_2, activation='relu'),
    Dropout(dropout_rate),
    Dense(1, activation='sigmoid')
])

# ===========================
# 2. モデルのコンパイル
# ===========================
model.compile(
    optimizer=Adam(learning_rate=learning_rate),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

# ===========================
# 3. 早期終了の設定
# ===========================
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True
)

# ===========================
# 4. モデルのトレーニング
# ===========================
history = model.fit(
    X_train_scaled, y_train,
    validation_split=0.2,
    batch_size=batch_size,
    epochs=100,
    callbacks=[early_stopping],
    verbose=1
)

# ===========================
# 5. モデルの評価
# ===========================
y_pred_prob = model.predict(X_test_scaled).flatten()
y_pred_prob = np.clip(y_pred_prob, 1e-7, 1-1e-7)
logloss = log_loss(y_test, y_pred_prob)
print(f"Final Log Loss with Optuna Parameters: {logloss}")

# ===========================
# 6. モデルの保存 (Keras形式)
# ===========================
model_save_path_keras = '/content/drive/My Drive/signate/submission/nn_model.keras'
model.save(model_save_path_keras)
print(f"モデルをKeras形式で保存しました: {model_save_path_keras}")



スケーラーをロードしました: /content/drive/My Drive/signate/submission/scaler_0103.pkl
Epoch 1/100
[1m731/731[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 4ms/step - accuracy: 0.9218 - loss: 0.2499 - val_accuracy: 0.9286 - val_loss: 0.2188
Epoch 2/100
[1m731/731[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9279 - loss: 0.2223 - val_accuracy: 0.9290 - val_loss: 0.2166
Epoch 3/100
[1m731/731[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9282 - loss: 0.2202 - val_accuracy: 0.9291 - val_loss: 0.2156
Epoch 4/100
[1m731/731[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9284 - loss: 0.2190 - val_accuracy: 0.9291 - val_loss: 0.2151
Epoch 5/100
[1m731/731[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9283 - loss: 0.2181 - val_accuracy: 0.9292 - val_loss: 0.2146
Epoch 6/100
[1m731/731[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.9285 - los