<a href="https://colab.research.google.com/github/entanglement-nak/portfolio-nak/blob/main/RF_memory_profiler%E8%A8%82%EF%BC%91.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef, classification_report
from sklearn.utils import resample
!pip install memory_profiler
from memory_profiler import profile

Collecting memory_profiler
  Downloading memory_profiler-0.61.0-py3-none-any.whl (31 kB)
Installing collected packages: memory_profiler
Successfully installed memory_profiler-0.61.0


In [None]:
# memory_profilerのIPython拡張機能をロードする
%load_ext memory_profiler

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# データの読み込み
# profile
def load_data(train_path, test_path):
    train_df = pd.read_excel(train_path, header=None)
    test_df = pd.read_excel(test_path, header=None)
    return train_df, test_df

In [None]:
# データの前処理
# profile
def preprocess_data(train_df, test_df):
    combined_data = pd.concat([train_df, test_df], axis=0)
    combined_data_filled = combined_data.fillna(combined_data.mean())
    combined_data_cleaned = combined_data_filled.replace([np.inf, -np.inf], np.nan).fillna(combined_data_filled.mean())
    X = combined_data_cleaned.drop(combined_data_cleaned.columns[0], axis=1)
    y = combined_data_cleaned[combined_data_cleaned.columns[0]]
    return X, y

In [None]:
# データの分割とアップサンプリング
# profile
def split_and_upsample(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    df_train = pd.concat([X_train, y_train], axis=1)
    df_normal_train = df_train[df_train[df_train.columns[-1]] == 1]
    df_anomaly_train = df_train[df_train[df_train.columns[-1]] == -1]
    df_anomaly_upsampled = resample(df_anomaly_train, replace=True, n_samples=len(df_normal_train), random_state=123)
    df_upsampled_train = pd.concat([df_normal_train, df_anomaly_upsampled])
    X_train_upsampled = df_upsampled_train.drop(df_upsampled_train.columns[-1], axis=1)
    y_train_upsampled = df_upsampled_train[df_upsampled_train.columns[-1]]
    return X_train_upsampled, y_train_upsampled, X_test, y_test

In [None]:
# モデルの訓練（ランダムフォレストへ変更）
def train_model(X_train_upsampled, y_train_upsampled):
    random_forest = RandomForestClassifier(n_estimators=500, max_depth=11, min_samples_split=12, min_samples_leaf=1)
    random_forest.fit(X_train_upsampled, y_train_upsampled)
    return random_forest

In [None]:
import time

In [None]:
# 予測と評価（時間計測付き）
def predict_and_evaluate(model, X_test, y_test):
    # 予測時間の計測を開始
    start_time = time.time()
    y_pred = model.predict(X_test)
    # 予測時間の計測を終了
    end_time = time.time()

    # 予測にかかった時間を計算
    elapsed_time = end_time - start_time

    # 評価指標の計算
    accuracy = accuracy_score(y_test, y_pred)
    mcc = matthews_corrcoef(y_test, y_pred)
    f1 = f1_score(y_test, y_pred, average='binary')
    report = classification_report(y_test, y_pred)

    return accuracy, mcc, f1, report, elapsed_time

In [None]:
# メイン関数
# profile
def main():
    # データパス
    train_path = r"/content/drive/MyDrive/Wafer/Wafer_TRAIN.xlsx"
    test_path = r"/content/drive/MyDrive/Wafer/Wafer_TEST.xlsx"

    # データの読み込み
    train_df, test_df = load_data(train_path, test_path)

    # データの前処理
    X, y = preprocess_data(train_df, test_df)

    # データの分割とアップサンプリング
    X_train_upsampled, y_train_upsampled, X_test, y_test = split_and_upsample(X, y)

    # モデルの訓練
    model = train_model(X_train_upsampled, y_train_upsampled)

    # 予測と評価
    accuracy, mcc, f1, report, elapsed_time = predict_and_evaluate(model, X_test, y_test)

    # 結果の出力
    print(f"Accuracy: {accuracy}, MCC: {mcc}, F1 Score: {f1}")
    print(report)
    print(f"予測にかかった時間: {elapsed_time} 秒")

In [None]:
# 10回計測する
for i in range(10):
    print(f"{i+1}回目の計測")
    %memit main()
    print("\n")

1回目の計測
Accuracy: 0.9990697674418605, MCC: 0.9948874555978666, F1 Score: 0.999482669425763
              precision    recall  f1-score   support

          -1       0.99      1.00      1.00       216
           1       1.00      1.00      1.00      1934

    accuracy                           1.00      2150
   macro avg       1.00      1.00      1.00      2150
weighted avg       1.00      1.00      1.00      2150

予測にかかった時間: 0.11068916320800781 秒
peak memory: 306.88 MiB, increment: 113.08 MiB


2回目の計測
Accuracy: 0.9990697674418605, MCC: 0.9948874555978666, F1 Score: 0.999482669425763
              precision    recall  f1-score   support

          -1       0.99      1.00      1.00       216
           1       1.00      1.00      1.00      1934

    accuracy                           1.00      2150
   macro avg       1.00      1.00      1.00      2150
weighted avg       1.00      1.00      1.00      2150

予測にかかった時間: 0.1037590503692627 秒
peak memory: 316.29 MiB, increment: 49.11 MiB


3回目の