<h2 style='color:pink'>데이터 로딩 부분<h2>

In [45]:
import os
import pandas as pd

def load_motor_data(base_path, motor_numbers):
    data = {'normal': [], 'abnormal': []}

    for motor in motor_numbers:
        # 정상 데이터 경로
        normal_path = os.path.join(base_path, "normal", str(motor), "time_vib")
        if os.path.exists(normal_path):
            normal_files = [os.path.join(normal_path, file) for file in os.listdir(normal_path) if file.endswith('.csv')]
            for file in normal_files:
                df = pd.read_csv(file, header=None, names=['time', 'vibration'])
                data['normal'].append(df['vibration'].values)

        # 비정상 데이터 경로
        abnormal_path = os.path.join(base_path, "abnormal", str(motor), "time_vib")
        if os.path.exists(abnormal_path):
            abnormal_files = [os.path.join(abnormal_path, file) for file in os.listdir(abnormal_path) if file.endswith('.csv')]
            for file in abnormal_files:
                df = pd.read_csv(file, header=None, names=['time', 'vibration'])
                data['abnormal'].append(df['vibration'].values)

    return data

# Train/Test Split
train_motors = range(1, 17)  # 모터 1~16번: Train
test_motors = range(17, 21)  # 모터 17~20번: Test

base_path = "./motor_data"

# Train 데이터 로드
train_data = load_motor_data(base_path, train_motors)
print(train_data)
# Test 데이터 로드
test_data = load_motor_data(base_path, test_motors)



{'normal': [array([-0.06803,  0.20064,  0.27741, ...,  0.12388,  0.46932,  0.27741]), array([ 0.31684,  0.24008,  0.00978, ..., -0.25889, -0.06698,  0.20169]), array([-0.1825 ,  0.20132, -0.48956, ..., -0.10574,  0.43161,  0.43161]), array([-0.18194, -0.41223, -0.14356, ...,  0.43217,  0.00997,  0.43217]), array([ 0.08527,  0.35395,  0.27718, ...,  0.31557,  0.2388 , -0.14502]), array([-0.29641, -0.37318, -0.06612, ..., -0.21965,  0.35608, -0.02774]), array([ 0.04832,  0.27861, -0.1436 , ...,  0.16346,  0.35537,  0.24023]), array([ 0.08614,  0.20128, -0.18254, ...,  0.20128,  0.04775,  0.31643]), array([-0.4499 ,  0.12583,  0.27936, ..., -0.64181, -0.18123, -0.10446]), array([-0.25874, -0.25874,  0.16346, ..., -0.18198, -0.02845,  0.24023]), array([ 0.0473 , -0.02946,  0.43112, ...,  0.20083, -0.3749 , -0.22137]), array([-0.10551,  0.00963,  0.12478, ...,  0.35507,  0.23993,  0.0864 ]), array([-0.06691,  0.47044,  0.16339, ..., -0.37396, -0.14367, -0.06691]), array([ 0.08606, -0.02909,

<h2 style='color:pink'>정상, 비정상 데이터 끼리 묶기<h2>

In [46]:
import numpy as np

def combine_csv_data(data):
    combined_data = {}
    combined_data['normal'] = np.concatenate(data['normal'])  # 정상 데이터 결합
    combined_data['abnormal'] = np.concatenate(data['abnormal'])  # 비정상 데이터 결합
    return combined_data

# Train/Test 데이터 결합
train_combined = combine_csv_data(train_data)
test_combined = combine_csv_data(test_data)

# 출력 테스트
print("Train Normal Data Shape:", train_combined['normal'].shape)
print("Train Abnormal Data Shape:", train_combined['abnormal'].shape)
print("Test Normal Data Shape:", test_combined['normal'].shape)
print("Test Abnormal Data Shape:", test_combined['abnormal'].shape)


Train Normal Data Shape: (1700864,)
Train Abnormal Data Shape: (1705984,)
Test Normal Data Shape: (425984,)
Test Abnormal Data Shape: (425984,)


<h2 style='color:pink'>데이터 reshape<h2>

In [47]:
from sklearn.preprocessing import MinMaxScaler
"""
# Train/Test 데이터를 정규화
scaler = MinMaxScaler()
"""
train_normal = train_combined['normal'].reshape(-1)
test_normal = test_combined['normal'].reshape(-1)

train_abnormal = train_combined['abnormal'].reshape(-1)
test_abnormal = test_combined['abnormal'].reshape(-1)

train_normal

array([-0.06803,  0.20064,  0.27741, ...,  0.00967, -0.14386, -0.259  ])

<h2 style='color:pink'>데이터 슬라이싱 후 특징 추출<h2>

In [48]:
import numpy as np

# 데이터 슬라이싱 및 통계 feature 추출 함수
def extract_statistical_features(data, slice_size=256):
    features = []
    for i in range(0, len(data), slice_size):
        slice_data = data[i:i + slice_size]
        
        # 슬라이스 크기가 정확히 slice_size인지 확인
        if len(slice_data) == slice_size:
            # 통계적 특징 추출
            mean = np.mean(slice_data)
            variance = np.var(slice_data)
            rms = np.sqrt(np.mean(slice_data**2))  # Root Mean Square
            skewness = np.mean((slice_data - mean)**3) / (np.std(slice_data)**3)  # 왜도
            kurtosis = np.mean((slice_data - mean)**4) / (np.std(slice_data)**4)  # 첨도
            
            # feature 추가
            features.append([mean, variance, rms, skewness, kurtosis])
    
    return np.array(features)

# 데이터 슬라이싱 및 feature 추출
train_normal_features = extract_statistical_features(train_normal, slice_size=256)
test_normal_features = extract_statistical_features(test_normal, slice_size=256)
train_abnormal_features = extract_statistical_features(train_abnormal, slice_size=256)
test_abnormal_features = extract_statistical_features(test_abnormal, slice_size=256)

# 결과 확인
print("Train Normal Features Shape:", train_normal_features.shape)
print("Test Normal Features Shape:", test_normal_features.shape)
print("Train Abnormal Features Shape:", train_abnormal_features.shape)
print("Test Abnormal Features Shape:", test_abnormal_features.shape)


Train Normal Features Shape: (6644, 5)
Test Normal Features Shape: (1664, 5)
Train Abnormal Features Shape: (6664, 5)
Test Abnormal Features Shape: (1664, 5)


<h2 style='color:pink'>모델 설정 및 학습<h2>

In [42]:
import tensorflow as tf
from tensorflow.keras import layers, models

# 데이터 준비
X_train = train_normal_features
X_test = test_normal_features

# Autoencoder 모델 정의
input_dim = X_train.shape[1]  # feature 개수
encoding_dim = 2  # 압축된 차원 (조정 가능)

# Encoder
input_layer = layers.Input(shape=(input_dim,))
encoded = layers.Dense(encoding_dim, activation="relu")(input_layer)

# Decoder
decoded = layers.Dense(input_dim, activation="sigmoid")(encoded)

# Autoencoder 모델 생성
autoencoder = models.Model(input_layer, decoded)

# 모델 컴파일
autoencoder.compile(optimizer="adam", loss="mse")

# 모델 학습
history = autoencoder.fit(
    X_train, X_train,  # 입력과 출력이 동일
    epochs=50,
    batch_size=32,
    shuffle=True,
    validation_data=(X_test, X_test),
    verbose=1   
)

# 모델 평가
print("Training Complete!")


Epoch 1/50
[1m208/208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 936us/step - loss: 0.6524 - val_loss: 0.5473
Epoch 2/50
[1m208/208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 641us/step - loss: 0.5937 - val_loss: 0.4929
Epoch 3/50
[1m208/208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.5379 - val_loss: 0.4473
Epoch 4/50
[1m208/208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 920us/step - loss: 0.4911 - val_loss: 0.4094
Epoch 5/50
[1m208/208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.4583 - val_loss: 0.3782
Epoch 6/50
[1m208/208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.4226 - val_loss: 0.3526
Epoch 7/50
[1m208/208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.3998 - val_loss: 0.3315
Epoch 8/50
[1m208/208[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.3761 - val_loss: 0.3140
Epoch 9/50
[1m208/208[0m [32m━━

In [50]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

# 데이터 준비
X_train = train_normal_features
X_test = test_normal_features

# Autoencoder 모델 정의
input_dim = X_train.shape[1]  # feature 개수
encoding_dim = 4  # Latent Space 크기 조정 가능

# Encoder
input_layer = layers.Input(shape=(input_dim,))
encoded = layers.Dense(64, activation="relu")(input_layer)  # 중간 레이어 추가
encoded = layers.BatchNormalization()(encoded)  # Batch Normalization
encoded = layers.Dropout(0.2)(encoded)  # Dropout 추가
encoded = layers.Dense(encoding_dim, activation="relu")(encoded)

# Decoder
decoded = layers.Dense(64, activation="relu")(encoded)  # 중간 레이어 추가
decoded = layers.BatchNormalization()(decoded)  # Batch Normalization
decoded = layers.Dropout(0.2)(decoded)  # Dropout 추가
decoded = layers.Dense(input_dim, activation="sigmoid")(decoded)

# Autoencoder 모델 생성
autoencoder = models.Model(input_layer, decoded)

# 모델 컴파일
autoencoder.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss="mse")

# Early Stopping과 Learning Rate Scheduler 콜백 정의
early_stopping = EarlyStopping(
    monitor="val_loss",
    patience=5,
    restore_best_weights=True
)

reduce_lr = ReduceLROnPlateau(
    monitor="val_loss",
    factor=0.5,  # 학습률 감소 비율
    patience=3,  # 3번의 에포크 동안 개선이 없으면 학습률 감소
    min_lr=1e-6  # 최소 학습률
)

# 모델 학습
history = autoencoder.fit(
    X_train, X_train,  # 입력과 출력이 동일
    epochs=100,  # 최대 100 에포크까지 실행
    batch_size=64,  # 배치 크기 증가
    shuffle=True,
    validation_data=(X_test, X_test),
    verbose=1,
    callbacks=[early_stopping, reduce_lr]  # 콜백 추가
)

# 학습 완료
print("Training Complete!")


Epoch 1/100
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - loss: 0.6176 - val_loss: 0.3333 - learning_rate: 0.0010
Epoch 2/100
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.3513 - val_loss: 0.2257 - learning_rate: 0.0010
Epoch 3/100
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.2694 - val_loss: 0.2129 - learning_rate: 0.0010
Epoch 4/100
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.2585 - val_loss: 0.2095 - learning_rate: 0.0010
Epoch 5/100
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.2548 - val_loss: 0.2081 - learning_rate: 0.0010
Epoch 6/100
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.2548 - val_loss: 0.2072 - learning_rate: 0.0010
Epoch 7/100
[1m104/104[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.2540 - val_loss: 0.2066 - learnin

<h2 style='color:pink'>모델 predict<h2>

In [None]:
# 테스트 데이터 복원, 정상 데이터 predict
reconstructed_test = autoencoder.predict(X_test)
print(reconstructed_test)
# 비정상 데이터 복원, 비정상 데이터 predict
reconstructed_abnormal = autoencoder.predict(test_abnormal_features)

# 재구성 오차 계산 => 복원값과 원래 값의 오차 계산
test_reconstruction_error = np.mean((X_test - reconstructed_test)**2, axis=1)
abnormal_reconstruction_error = np.mean((test_abnormal_features - reconstructed_abnormal)**2, axis=1)

# 임계값 설정 (평균 + 2 * 표준편차)
threshold = np.mean(test_reconstruction_error) + 2 * np.std(test_reconstruction_error)
print("Anomaly Detection Threshold:", threshold)

# 비정상 데이터 탐지
anomalies = abnormal_reconstruction_error > threshold
print("Number of Anomalies Detected:", np.sum(anomalies))


[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step
[[0.00562102 0.03055853 0.18477488 0.07543717 0.9997627 ]
 [0.00520115 0.03024077 0.18430085 0.09988247 0.9997798 ]
 [0.00543737 0.03109953 0.18503712 0.0642519  0.9997669 ]
 ...
 [0.00434339 0.0326921  0.20073302 0.00580141 0.9998931 ]
 [0.00527208 0.03138538 0.18512967 0.06065068 0.9997718 ]
 [0.00467092 0.03253074 0.18549296 0.04828071 0.9997899 ]]
[1m52/52[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 638us/step
Anomaly Detection Threshold: 0.3110938381614809
Number of Anomalies Detected: 1571


<h2 style='color:pink'>f1score<h2>

In [55]:
from sklearn.metrics import f1_score, precision_score, recall_score, classification_report

# 임계값 설정
threshold = np.mean(test_reconstruction_error) + 1.* np.std(test_reconstruction_error)

# 예측 라벨 생성
# test_reconstruction_error는 정상 데이터
# abnormal_reconstruction_error는 비정상 데이터
test_labels = np.zeros(len(test_reconstruction_error))  # 정상 라벨 (0)
abnormal_labels = np.ones(len(abnormal_reconstruction_error))  # 비정상 라벨 (1)

# Combine true labels and reconstruction errors
true_labels = np.concatenate([test_labels, abnormal_labels])
all_reconstruction_errors = np.concatenate([test_reconstruction_error, abnormal_reconstruction_error])

# 예측 라벨 생성: 재구성 오차가 임계값보다 크면 비정상(1), 작으면 정상(0)
predicted_labels = (all_reconstruction_errors > threshold).astype(int)

# F1-score, Precision, Recall 계산
f1 = f1_score(true_labels, predicted_labels)
precision = precision_score(true_labels, predicted_labels)
recall = recall_score(true_labels, predicted_labels)

print("F1-score:", f1)
print("Precision:", precision)
print("Recall:", recall)

# Classification Report
print("\nClassification Report:")
print(classification_report(true_labels, predicted_labels))


F1-score: 0.9352477477477478
Precision: 0.8797669491525424
Recall: 0.9981971153846154

Classification Report:
              precision    recall  f1-score   support

         0.0       1.00      0.86      0.93      1664
         1.0       0.88      1.00      0.94      1664

    accuracy                           0.93      3328
   macro avg       0.94      0.93      0.93      3328
weighted avg       0.94      0.93      0.93      3328

