In [1]:
import pandas as pd
import numpy as np
import re

fixed_time_steps = 15

# CSV 파일 불러오기
data_df = pd.read_csv('/home/heechun/final_ws/PoseEstimate/1114_final_pose_check/data_frame/data_sequences.csv')
labels_df = pd.read_csv('/home/heechun/final_ws/PoseEstimate/1114_final_pose_check/data_frame/label_sequences.csv')

# 숫자 부분만 추출하는 함수 정의
def extract_numeric(value):
    # tensor(숫자) 형식에서 숫자만 추출
    match = re.search(r"tensor\(([\d.]+)", str(value))
    return float(match.group(1)) if match else np.nan

# 데이터 프레임 전체에 함수 적용
data_df = data_df.applymap(extract_numeric)

# 필요한 형식으로 변환
data_sequences = data_df.values.reshape(-1, fixed_time_steps, 34)  # 시퀀스의 개수 x 시퀀스 길이 x 특징 수
label_sequences = labels_df['label'].values  # 라벨 데이터

# 확인
print("Data shape:", data_sequences.shape)
print("Labels shape:", label_sequences.shape)


  data_df = data_df.applymap(extract_numeric)


Data shape: (1010, 15, 34)
Labels shape: (1010,)


In [2]:
data_sequences.shape, label_sequences.shape

((1010, 15, 34), (1010,))

In [3]:
data_df.shape, labels_df.shape

((15150, 34), (1010, 1))

In [4]:
data_df.head()
data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 15150 entries, 0 to 15149
Data columns (total 34 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   0       15148 non-null  float64
 1   1       15148 non-null  float64
 2   2       15148 non-null  float64
 3   3       15148 non-null  float64
 4   4       15148 non-null  float64
 5   5       15148 non-null  float64
 6   6       15148 non-null  float64
 7   7       15148 non-null  float64
 8   8       15148 non-null  float64
 9   9       15148 non-null  float64
 10  10      15148 non-null  float64
 11  11      15148 non-null  float64
 12  12      15148 non-null  float64
 13  13      15148 non-null  float64
 14  14      15148 non-null  float64
 15  15      15148 non-null  float64
 16  16      15148 non-null  float64
 17  17      15148 non-null  float64
 18  18      15148 non-null  float64
 19  19      15148 non-null  float64
 20  20      15148 non-null  float64
 21  21      15148 non-null  float64
 22

In [5]:
# NaN 값이 있는지 확인
print(np.isnan(data_sequences).any())


True


In [6]:
data_sequences = np.nan_to_num(data_sequences)


In [7]:
print(np.isnan(data_sequences).any())

False


In [8]:
data_sequences.shape

(1010, 15, 34)

# 1. 데이터 로딩 및 증강 처리

In [14]:
import pandas as pd
import numpy as np
import re
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

fixed_time_steps = 15

# CSV 파일 불러오기
data_df = pd.read_csv('/home/heechun/final_ws/PoseEstimate/1114_final_pose_check/data_frame/data_sequences.csv')
labels_df = pd.read_csv('/home/heechun/final_ws/PoseEstimate/1114_final_pose_check/data_frame/label_sequences.csv')

# 숫자 부분만 추출하는 함수 정의
def extract_numeric(value):
    # tensor(숫자) 형식에서 숫자만 추출
    match = re.search(r"tensor\(([\d.]+)", str(value))
    return float(match.group(1)) if match else np.nan

# 데이터 프레임 전체에 함수 적용
data_df = data_df.applymap(extract_numeric)

# 필요한 형식으로 변환
data_sequences = data_df.values.reshape(-1, fixed_time_steps, 34)  # 시퀀스의 개수 x 시퀀스 길이 x 특징 수
label_sequences = labels_df['label'].values  # 라벨 데이터

# NaN 값 처리
data_sequences = np.nan_to_num(data_sequences)

# 데이터 증강 함수 정의
def add_noise(data, noise_factor=0.1):
    noise = noise_factor * np.random.randn(*data.shape)
    return data + noise

def scale(data, scale_factor=1.1):
    return data * scale_factor

def shift(data, shift_max=0.1):
    shift = np.random.uniform(-shift_max, shift_max, data.shape)
    return data + shift

# 데이터 증강 적용
augmented_data_sequences = []
augmented_labels = []

for i in range(len(data_sequences)):
    original = data_sequences[i]
    
    # 원본 추가
    augmented_data_sequences.append(original)
    augmented_labels.append(label_sequences[i])
    
    # Noise 추가
    augmented_data_sequences.append(add_noise(original))
    augmented_labels.append(label_sequences[i])

    # Scaling 추가
    augmented_data_sequences.append(scale(original))
    augmented_labels.append(label_sequences[i])

    # Shifting 추가
    augmented_data_sequences.append(shift(original))
    augmented_labels.append(label_sequences[i])

# 증강된 데이터 변환
augmented_data_sequences = np.array(augmented_data_sequences)
augmented_labels = np.array(augmented_labels)

# 레이블 인코딩
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(augmented_labels)

# 데이터 분할 (학습/검증 데이터)
X_train, X_val, y_train, y_val = train_test_split(augmented_data_sequences, encoded_labels, test_size=0.2, random_state=42)


  data_df = data_df.applymap(extract_numeric)


# 2. 모델 정의 및 학습 설정

In [15]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Bidirectional, Dropout, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping, ReduceLROnPlateau
import os

# 양방향 LSTM 모델 정의 (LSTM 유닛 수 증가, 추가 LSTM 층 추가)
model = Sequential([
    Bidirectional(LSTM(256, return_sequences=True), input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.5),
    BatchNormalization(),
    Bidirectional(LSTM(128, return_sequences=True)),  # 추가된 LSTM 층
    Dropout(0.5),
    BatchNormalization(),
    Bidirectional(LSTM(64, return_sequences=True)),
    Dropout(0.5),
    BatchNormalization(),
    Bidirectional(LSTM(32, return_sequences=False)),
    Dropout(0.5),
    Dense(64, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.03)),
    BatchNormalization(),
    Dropout(0.5),
    Dense(32, activation='relu', kernel_regularizer=tf.keras.regularizers.l2(0.03)),
    BatchNormalization(),
    Dense(len(set(encoded_labels)), activation='softmax')
])

# 모델 컴파일 (초기 학습률 증가)
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-4), loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# 로그 디렉토리 설정
log_dir = "/home/heechun/final_ws/PoseEstimate/1114_final_pose_check/logs_final_data_change"
os.makedirs(log_dir, exist_ok=True)

# TensorBoard 콜백 설정
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

# ModelCheckpoint 콜백 설정
checkpoint_best = ModelCheckpoint(
    '/home/heechun/final_ws/PoseEstimate/1114_final_pose_check/best_model_final_data_change.keras',
    monitor='val_accuracy',
    save_best_only=True,
    verbose=1
)

# Early Stopping 설정
early_stopping = EarlyStopping(
    monitor='val_loss',
    patience=15,
    restore_best_weights=True,
    verbose=1
)

# 학습률 감소 스케줄러 설정
lr_scheduler = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=5,
    verbose=1
)

# 모델 학습
history = model.fit(
    X_train, y_train,
    epochs=100,
    batch_size=32,
    validation_data=(X_val, y_val),
    callbacks=[checkpoint_best, tensorboard_callback, early_stopping, lr_scheduler]
)


  super().__init__(**kwargs)


Epoch 1/100
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step - accuracy: 0.2658 - loss: 4.8641
Epoch 1: val_accuracy improved from -inf to 0.24010, saving model to /home/heechun/final_ws/PoseEstimate/1114_final_pose_check/best_model_final_data_change.keras
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 26ms/step - accuracy: 0.2659 - loss: 4.8634 - val_accuracy: 0.2401 - val_loss: 4.5547 - learning_rate: 1.0000e-04
Epoch 2/100
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.3002 - loss: 4.6515
Epoch 2: val_accuracy improved from 0.24010 to 0.37252, saving model to /home/heechun/final_ws/PoseEstimate/1114_final_pose_check/best_model_final_data_change.keras
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 20ms/step - accuracy: 0.3002 - loss: 4.6509 - val_accuracy: 0.3725 - val_loss: 4.3976 - learning_rate: 1.0000e-04
Epoch 3/100
[1m101/101[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [