<a href="https://colab.research.google.com/github/entanglement-nak/portfolio-nak/blob/main/LogiReg_memory_profiler_%E5%BF%85%E8%A6%81%E7%AE%87%E6%89%80%E3%81%AE%E3%81%BF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, matthews_corrcoef
from sklearn.utils import resample
!pip install memory_profiler
from memory_profiler import profile
from memory_profiler import memory_usage

Collecting memory_profiler
  Downloading memory_profiler-0.61.0-py3-none-any.whl (31 kB)
Installing collected packages: memory_profiler
Successfully installed memory_profiler-0.61.0


In [None]:
#memory_profilerのIPython拡張機能をロードする
%load_ext memory_profiler

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# データの読み込み
# profile
def load_data(train_path, test_path):
    train_df = pd.read_excel(train_path, header=None)
    test_df = pd.read_excel(test_path, header=None)
    return train_df, test_df

In [None]:
# データの前処理
# profile
def preprocess_data(train_df, test_df):
    combined_data = pd.concat([train_df, test_df], axis=0)
    combined_data_filled = combined_data.fillna(combined_data.mean())
    combined_data_cleaned = combined_data_filled.replace([np.inf, -np.inf], np.nan).fillna(combined_data_filled.mean())
    X = combined_data_cleaned.drop(combined_data_cleaned.columns[0], axis=1)
    y = combined_data_cleaned[combined_data_cleaned.columns[0]]
    return X, y

In [None]:
# データの分割とアップサンプリング
# profile
def split_and_upsample(X, y):
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
    df_train = pd.concat([X_train, y_train], axis=1)
    df_normal_train = df_train[df_train[df_train.columns[-1]] == 1]
    df_anomaly_train = df_train[df_train[df_train.columns[-1]] == -1]
    df_anomaly_upsampled = resample(df_anomaly_train, replace=True, n_samples=len(df_normal_train), random_state=123)
    df_upsampled_train = pd.concat([df_normal_train, df_anomaly_upsampled])
    X_train_upsampled = df_upsampled_train.drop(df_upsampled_train.columns[-1], axis=1)
    y_train_upsampled = df_upsampled_train[df_upsampled_train.columns[-1]]
    return X_train_upsampled, y_train_upsampled, X_test, y_test

In [None]:
# モデルの訓練
# profile
def train_model(X_train_upsampled, y_train_upsampled):
    logreg = LogisticRegression(max_iter=1000)
    logreg.fit(X_train_upsampled, y_train_upsampled)
    return logreg

In [None]:
# 予測と評価
# profile
def predict_and_evaluate(model, X_test, y_test):
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    macro_f1 = f1_score(y_test, y_pred, average='macro')
    weighted_f1 = f1_score(y_test, y_pred, average='weighted')
    mcc = matthews_corrcoef(y_test, y_pred)
    return accuracy, macro_f1, weighted_f1, mcc

In [None]:
# メイン関数
def main():
    # データパス
    train_path = r"/content/drive/MyDrive/Wafer/Wafer_TRAIN.xlsx"
    test_path = r"/content/drive/MyDrive/Wafer/Wafer_TEST.xlsx"

    # データの読み込み
    train_df, test_df = load_data(train_path, test_path)

    # データの前処理
    X, y = preprocess_data(train_df, test_df)

    # データの分割とアップサンプリング
    X_train_upsampled, y_train_upsampled, X_test, y_test = split_and_upsample(X, y)

    # モデルの訓練
    model = train_model(X_train_upsampled, y_train_upsampled)

    # 予測と評価
    accuracy, macro_f1, weighted_f1, mcc = predict_and_evaluate(model, X_test, y_test)

    # 結果の出力
    print(f"Accuracy: {accuracy}, Macro F1: {macro_f1}, Weighted F1: {weighted_f1}, MCC: {mcc}")

In [None]:
#10回計測する
for i in range(10):
  print(f"{i+1}回目の計測")
  %memit main()
  print("\n")

1回目の計測
Accuracy: 0.8944186046511627, Macro F1: 0.7880780956425417, Weighted F1: 0.9080340998952081, MCC: 0.6244731316185205
peak memory: 300.92 MiB, increment: 113.12 MiB


2回目の計測
Accuracy: 0.8944186046511627, Macro F1: 0.7880780956425417, Weighted F1: 0.9080340998952081, MCC: 0.6244731316185205
peak memory: 310.87 MiB, increment: 64.38 MiB


3回目の計測
Accuracy: 0.8944186046511627, Macro F1: 0.7880780956425417, Weighted F1: 0.9080340998952081, MCC: 0.6244731316185205
peak memory: 310.79 MiB, increment: 62.97 MiB


4回目の計測
Accuracy: 0.8944186046511627, Macro F1: 0.7880780956425417, Weighted F1: 0.9080340998952081, MCC: 0.6244731316185205
peak memory: 314.48 MiB, increment: 66.57 MiB


5回目の計測
Accuracy: 0.8944186046511627, Macro F1: 0.7880780956425417, Weighted F1: 0.9080340998952081, MCC: 0.6244731316185205
peak memory: 310.93 MiB, increment: 62.70 MiB


6回目の計測
Accuracy: 0.8944186046511627, Macro F1: 0.7880780956425417, Weighted F1: 0.9080340998952081, MCC: 0.6244731316185205
peak memory: 31

In [None]:
import time

# 予測と評価
def predict_and_evaluate(model, X_test, y_test):
    # 予測時間の計測を開始
    start_time = time.time()
    predictions = model.predict(X_test)
    # 予測時間の計測を終了
    end_time = time.time()
    elapsed_time = end_time - start_time

    # 評価指標の計算
    accuracy = accuracy_score(y_test, predictions)
    macro_f1 = f1_score(y_test, predictions, average='macro')
    weighted_f1 = f1_score(y_test, predictions, average='weighted')
    mcc = matthews_corrcoef(y_test, predictions)

    return accuracy, macro_f1, weighted_f1, mcc, elapsed_time

def main():
    # データパス
    train_path = r"/content/drive/MyDrive/Wafer/Wafer_TRAIN.xlsx"
    test_path = r"/content/drive/MyDrive/Wafer/Wafer_TEST.xlsx"

    # データの読み込み
    train_df, test_df = load_data(train_path, test_path)

    # データの前処理
    X, y = preprocess_data(train_df, test_df)

    # データの分割とアップサンプリング
    X_train_upsampled, y_train_upsampled, X_test, y_test = split_and_upsample(X, y)

    # モデルの訓練
    model = train_model(X_train_upsampled, y_train_upsampled)  # ここでモデルを定義

    # 予測と評価
    accuracy, macro_f1, weighted_f1, mcc, elapsed_time = predict_and_evaluate(model, X_test, y_test)

    # 結果の出力
    print(f"Accuracy: {accuracy}, Macro F1: {macro_f1}, Weighted F1: {weighted_f1}, MCC: {mcc}")
    print(f"予測にかかった時間: {elapsed_time} 秒")

# 10回計測する
for i in range(10):
    print(f"{i+1}回目の計測")
    main()
    print("\n")

1回目の計測
Accuracy: 0.8944186046511627, Macro F1: 0.7880780956425417, Weighted F1: 0.9080340998952081, MCC: 0.6244731316185205
予測にかかった時間: 0.004404306411743164 秒


2回目の計測
Accuracy: 0.8944186046511627, Macro F1: 0.7880780956425417, Weighted F1: 0.9080340998952081, MCC: 0.6244731316185205
予測にかかった時間: 0.004326820373535156 秒


3回目の計測
Accuracy: 0.8944186046511627, Macro F1: 0.7880780956425417, Weighted F1: 0.9080340998952081, MCC: 0.6244731316185205
予測にかかった時間: 0.004365205764770508 秒


4回目の計測
Accuracy: 0.8944186046511627, Macro F1: 0.7880780956425417, Weighted F1: 0.9080340998952081, MCC: 0.6244731316185205
予測にかかった時間: 0.005676746368408203 秒


5回目の計測
Accuracy: 0.8944186046511627, Macro F1: 0.7880780956425417, Weighted F1: 0.9080340998952081, MCC: 0.6244731316185205
予測にかかった時間: 0.004373073577880859 秒


6回目の計測
Accuracy: 0.8944186046511627, Macro F1: 0.7880780956425417, Weighted F1: 0.9080340998952081, MCC: 0.6244731316185205
予測にかかった時間: 0.004389762878417969 秒


7回目の計測
Accuracy: 0.8944186046511627, Mac