<a href="https://colab.research.google.com/github/entanglement-nak/portfolio-nak/blob/main/CNN_memory_profiler.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install memory_profiler

Collecting memory_profiler
  Downloading memory_profiler-0.61.0-py3-none-any.whl (31 kB)
Installing collected packages: memory_profiler
Successfully installed memory_profiler-0.61.0


In [None]:
%load_ext memory_profiler

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense, Conv1D, Flatten, MaxPooling1D
from keras.utils import to_categorical
from keras.optimizers import Adam
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from memory_profiler import profile
import time

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# データの読み込みと前処理
# profile
def load_and_preprocess_data(train_path, test_path):
    # データの読み込み
    train_df = pd.read_excel(train_path)
    test_df = pd.read_excel(test_path)

    # データの結合
    combined_data = pd.concat([train_df, test_df])
    y = combined_data.iloc[:, 0]
    X = combined_data.iloc[:, 1:]

    # 特徴量の列名を文字列に変換
    X.columns = X.columns.astype(str)

    # 特徴量のデータを数値型に変換し、欠損値を埋める
    X_cleaned = X.apply(pd.to_numeric, errors='coerce').fillna(0)

    # 特徴量の正規化
    scaler = StandardScaler()
    X_normalized = scaler.fit_transform(X_cleaned)

    return X_normalized, y

In [None]:
# CNNモデルの構築
# profile
def build_cnn_model(input_shape):
    model = Sequential()
    model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=input_shape))
    model.add(MaxPooling1D(pool_size=2))
    model.add(Flatten())
    model.add(Dense(50, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer=Adam(), loss='binary_crossentropy', metrics=['accuracy'])
    return model

In [None]:
# メイン関数
def main():
    # データパス
    train_path = r"/content/drive/MyDrive/Wafer/Wafer_TRAIN.xlsx"
    test_path = r"/content/drive/MyDrive/Wafer/Wafer_TEST.xlsx"

    # データの読み込みと前処理
    X, y = load_and_preprocess_data(train_path, test_path)

    # データの分割
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

    # データの形状調整
    X_train_reshaped = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
    X_test_reshaped = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

    # ラベルのエンコーディング
    y_train_encoded = (y_train + 1) / 2
    y_test_encoded = (y_test + 1) / 2

    # CNNモデルの構築
    model = build_cnn_model((X_train.shape[1], 1))

    # モデル訓練の時間計測を開始
    start_train_time = time.time()
    model.fit(X_train_reshaped, y_train_encoded, epochs=10, validation_data=(X_test_reshaped, y_test_encoded))
    # モデル訓練の時間計測を終了
    end_train_time = time.time()
    train_elapsed_time = end_train_time - start_train_time

    # 予測時間の計測を開始
    start_predict_time = time.time()
    y_pred = model.predict(X_test_reshaped)
    # 予測時間の計測を終了
    end_predict_time = time.time()
    predict_elapsed_time = end_predict_time - start_predict_time

    y_pred = (y_pred > 0.5).astype(int).flatten()

    # 評価指標の計算
    accuracy = accuracy_score(y_test_encoded, y_pred)
    f1_macro = f1_score(y_test_encoded, y_pred, average='macro')
    f1_weighted = f1_score(y_test_encoded, y_pred, average='weighted')
    mcc = matthews_corrcoef(y_test_encoded, y_pred)

    # 結果の出力
    print(f'Accuracy: {accuracy}')
    print(f'Macro F1 Score: {f1_macro}')
    print(f'Weighted F1 Score: {f1_weighted}')
    print(f'MCC: {mcc}')
    print(f"予測にかかった時間: {predict_elapsed_time} 秒")

In [None]:
# メモリの計測
for i in range(10):
    print(f"{i+1}回目の計測")
    %memit main()

1回目の計測
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.9986040018613308
Macro F1 Score: 0.9964099585644512
Weighted F1 Score: 0.9986053078394838
MCC: 0.9928227646601666
予測にかかった時間: 0.27057743072509766 秒
peak memory: 979.52 MiB, increment: 54.25 MiB
2回目の計測
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.9916240111679852
Macro F1 Score: 0.9777484019714772
Weighted F1 Score: 0.9914931863317187
MCC: 0.9562516322654538
予測にかかった時間: 0.2935771942138672 秒
peak memory: 997.50 MiB, increment: 53.61 MiB
3回目の計測
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Accuracy: 0.9990693345742205
Macro F1 Score: 0.9976021512575037
Weighted F1 Score: 0.9990693345742205
MCC: 0.9952043025150075
予測にかかった時間: 0.3854804039001465 秒
peak memory: 997.73 MiB, increment: 34.86 MiB
4回目の計測
Epoch 1/10
Epoch 2/10
Epoc