In [1]:
import os
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import train_test_split

In [2]:
# 데이터셋 불러오기
def load_and_prepare_data(base_dir, max_sequence_length=None):
    sequences = []
    labels = []

    for label_dir in ['true', 'false']:
        label_path = os.path.join(base_dir, label_dir)
        label = 1 if label_dir == 'true' else 0  # true 디렉토리 1, false 디렉토리 0

        for file_name in os.listdir(label_path):
            file_path = os.path.join(label_path, file_name)
            if file_name.endswith('.txt'):
                data = pd.read_csv(file_path, delimiter=',')
                
                sequences.append(data.values)
                labels.append(label)

    # 시퀀스 길이 맞추기 (패딩추가)
    if max_sequence_length is None:
        max_sequence_length = max(len(seq) for seq in sequences)
    
    padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length, padding='post', dtype='float32')

    return np.array(padded_sequences), np.array(labels), max_sequence_length

In [3]:
# 슬라이딩 윈도우
def create_sliding_windows(data, labels, window_size, stride=1):
    num_samples = data.shape[0]
    num_timesteps = data.shape[1]
    num_features = data.shape[2]
    
    windows = []
    window_labels = []

    for i in range(num_samples):
        for start in range(0, num_timesteps - window_size + 1, stride):
            end = start + window_size
            window = data[i, start:end, :]
            windows.append(window)
            window_labels.append(labels[i])
    
    return np.array(windows), np.array(window_labels)

In [None]:
# 데이터셋 불러오기
base_dir = './training_data'
X, y, max_len = load_and_prepare_data(base_dir)

# 슬라이딩 윈도우 적용
window_size = 10
X_windows, y_windows = create_sliding_windows(X, y, window_size)

print("Original:", X.shape)
print("after x_windows:", X_windows.shape)
print("after y_windows:", y_windows.shape)

In [None]:
# 셔플
assert len(X_windows) == len(y_windows)

indices = np.arange(len(y_windows))
np.random.shuffle(indices)

X_shuffled = X_windows[indices]
y_shuffled = y_windows[indices]

# Train/Validation split
X_train, X_val, y_train, y_val = train_test_split(X_shuffled, y_shuffled, test_size=0.2)
print(X_trains)
print(X_val)

In [7]:
# 모델 구성
model = Sequential()
model.add(LSTM(16, return_sequences=True, input_shape=(window_size, X_windows.shape[2])))
model.add(LSTM(units=32, return_sequences=True)) 
model.add(Dropout(0.1))
model.add(LSTM(units=16, return_sequences=False)) 
model.add(Dropout(0.1))
model.add(Dense(units=1, activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
# 학습
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = model.fit(X_train, y_train, epochs=15, batch_size=256, validation_data=(X_val, y_val), callbacks=[early_stopping])

Epoch 1/15
[1m36214/36214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m636s[0m 17ms/step - accuracy: 0.7293 - loss: 0.5726 - val_accuracy: 0.7343 - val_loss: 0.5610
Epoch 2/15
[1m36214/36214[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 16ms/step - accuracy: 0.7356 - loss: 0.5590