In [1]:
import os
import numpy as np
import pandas as pd
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping

In [2]:
# 데이터셋 불러오기
def load_and_prepare_data(base_dir, max_sequence_length=None):
    sequences = []
    labels = []

    for label_dir in ['true', 'false']:
        label_path = os.path.join(base_dir, label_dir)
        label = 1 if label_dir == 'true' else 0  # true 디렉토리는 1로, false 디렉토리는 0으로 라벨링

        for file_name in os.listdir(label_path):
            file_path = os.path.join(label_path, file_name)
            if file_name.endswith('.txt'):  # 확장자가 .txt인 파일만 처리
                data = pd.read_csv(file_path, delimiter=',')
                
                sequences.append(data.values)
                labels.append(label)

    # 시퀀스 길이 맞추기 (패딩추가)
    if max_sequence_length is None:
        max_sequence_length = max(len(seq) for seq in sequences)
    
    padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length, padding='post', dtype='float32')

    return np.array(padded_sequences), np.array(labels), max_sequence_length

In [3]:
# 슬라이딩 윈도우
def create_sliding_windows(data, window_size, stride=1):
    num_samples = data.shape[0]
    num_timesteps = data.shape[1]
    num_features = data.shape[2]
    
    windows = []
    
    for start in range(0, num_timesteps - window_size + 1, stride):
        end = start + window_size
        window = data[:, start:end, :]
        windows.append(window)
    
    return np.array(windows).reshape(-1, window_size, num_features)

In [4]:
# 데이터셋 불러오기
base_dir = './training_data'
X, y, max_len = load_and_prepare_data(base_dir)

window_size = 10
X_windows = create_sliding_windows(X, window_size)
y_windows = np.repeat(y, X_windows.shape[1], axis=0)

print("Original shape:", X.shape)
print("x_windows:", X_windows.size)
print("y_windows:", y_windows.size)
print("New shape after sliding window:", X_windows.shape)

Original shape: (322, 35998, 6)
x_windows: 695307480
y_windows: 3220
New shape after sliding window: (11588458, 10, 6)


In [5]:
# 셔플
assert len(X_windows) == len(y_windows)

indices = np.arange(len(y_windows))
np.random.shuffle(indices)

X_shuffled = X_windows[indices]
y_shuffled = y_windows[indices]

X_train, X_val, y_train, y_val = train_test_split(X_shuffled, y_shuffled, test_size=0.2)

AssertionError: 

In [None]:
# 모델 구성
model = Sequential()

model.add(LSTM(16, input_shape=(window_size, X_windows.shape[2])))
model.add(LSTM(units=32, return_sequences=True))
model.add(Dropout(0.1))
model.add(LSTM(units=16, return_sequences=False))
model.add(Dropout(0.1))
model.add(Dense(units=1, activation='sigmoid'))
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# 학습
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
history = model.fit(X, y, epochs=20, batch_size=256, validation_split=0.2, callbacks=[early_stopping])