https://huggingface.co/keras-io/timeseries-anomaly-detection

In [1]:
## 버전 확인 (hugging face 모델: keras<3.x)
import tensorflow as tf
import keras
print(tf.__version__)
print(keras.__version__)

2024-12-03 15:09:28.769918: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-12-03 15:09:28.797196: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


2.15.0
2.15.0


In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from keras.models import load_model  # 모델 불러오기
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score

In [4]:
data = pd.read_csv('./preprocessed_data0912(symbolic).csv')
data

Unnamed: 0,test_id,date,time,temp,rh,label
0,61,2024-09-11,4:27:10 오후,0.000000,0.000000,1
1,62,2024-09-11,4:27:16 오후,0.000000,0.000000,1
2,63,2024-09-11,4:28:16 오후,0.000000,0.000000,1
3,64,2024-09-11,4:30:56 오후,26.010921,67.549190,0
4,65,2024-09-11,4:30:57 오후,26.018158,67.339410,0
...,...,...,...,...,...,...
1682,1311,2024-09-12,9:49:19 오전,25.884330,65.530960,0
1683,1312,2024-09-12,9:49:20 오전,25.896990,65.491170,0
1684,1313,2024-09-12,9:49:21 오전,25.860823,65.505640,0
1685,1314,2024-09-12,9:49:22 오전,25.938585,65.469475,0


In [5]:
# '오후', '오전'을 AM/PM으로 변환하는 함수
def convert_korean_time_to_ampm(time_str):
    if '오전' in time_str:
        return time_str.replace('오전', 'AM')
    elif '오후' in time_str:
        return time_str.replace('오후', 'PM')
    else:
        return time_str

# 'time' 컬럼에 있는 '오전', '오후'를 AM/PM으로 변환
data['time'] = data['time'].apply(convert_korean_time_to_ampm)

# 날짜 및 시간 합치기
data['datetime'] = pd.to_datetime(data['date'] + ' ' + data['time'], format='%Y-%m-%d %I:%M:%S %p')

# 불필요한 칼럼 제거
data = data.drop(columns=['test_id', 'date', 'time'])

# 칼럼순서 변경
data = data[['datetime', 'temp', 'rh', 'label']]

data['datetime'] = pd.to_datetime(data['datetime'])
data = data.set_index('datetime')

In [6]:
# 데이터 정규화
scaler = MinMaxScaler()
data[['temp', 'rh']] = scaler.fit_transform(data[['temp', 'rh']])

# 시계열 데이터로 변환
def create_sequences(df, sequence_length):
    xs, ys = [], []
    for i in range(len(df) - sequence_length):
        x = df.iloc[i:i+sequence_length][['temp', 'rh']].values
        y = df.iloc[i+sequence_length]['label']
        xs.append(x)
        ys.append(y)
    return np.array(xs), np.array(ys)

sequence_length = 288  # 모델이 기대하는 시퀀스 길이
X, y = create_sequences(data, sequence_length)

# 현재 X shape 확인
print("X shape before reshape:", X.shape)

X shape before reshape: (1399, 288, 2)


In [7]:
# 데이터 차원 조정 (마지막 차원을 1로 변경)
X = X[:, :, 0:1]

# 현재 X shape 확인
print("X shape after reshape:", X.shape)

# 훈련 세트와 테스트 세트로 나누기
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

X shape after reshape: (1399, 288, 1)


In [8]:
# 모델 로드

# Note: 'keras<3.x' or 'tf_keras' must be installed (legacy)
# See https://github.com/keras-team/tf-keras for more details.
from huggingface_hub import from_pretrained_keras
model = from_pretrained_keras("keras-io/timeseries-anomaly-detection")
model.summary()

  from .autonotebook import tqdm as notebook_tqdm
Fetching 36 files: 100%|█████████████████████████████████| 36/36 [00:00<00:00, 130168.06it/s]






Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 144, 32)           256       
                                                                 
 dropout (Dropout)           (None, 144, 32)           0         
                                                                 
 conv1d_1 (Conv1D)           (None, 72, 16)            3600      
                                                                 
 conv1d_transpose (Conv1DTr  (None, 144, 16)           1808      
 anspose)                                                        
                                                                 
 dropout_1 (Dropout)         (None, 144, 16)           0         
                                                                 
 conv1d_transpose_1 (Conv1D  (None, 288, 32)           3616      
 Transpose)                                             

In [14]:
# 모델 컴파일
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 모델 학습
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.1)

# 예측
y_pred = model.predict(X_test)

# 모델 평가
loss, accuracy = model.evaluate(X_test, y_test)
print(" ")
print(" ")
print("==========================")
print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
 
 
Test Loss: 0.4407126307487488
Test Accuracy: 0.9714285731315613
