In [None]:
import os
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization, Flatten, Conv1D, GlobalAveragePooling1D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import mixed_precision
from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import Callback, ModelCheckpoint, EarlyStopping
from tensorflow.keras.regularizers import l2
from tqdm import tqdm

# Mixed Precision 설정 (A100 GPU 최적화)
mixed_precision.set_global_policy('mixed_float16')

# 진행 상황 출력용 Callback 클래스
class TQDMProgressBar(Callback):
    def __init__(self, total_epochs):
        super().__init__()
        self.total_epochs = total_epochs

    def on_epoch_begin(self, epoch, logs=None):
        self.epoch_bar = tqdm(total=self.total_epochs, desc=f"Epoch {epoch + 1}/{self.total_epochs}", position=0, leave=False)

    def on_epoch_end(self, epoch, logs=None):
        self.epoch_bar.update(1)
        self.epoch_bar.close()

    def on_train_end(self, logs=None):
        if hasattr(self, 'batch_bar'):
            self.batch_bar.close()

# JSON 데이터 로드 함수
def load_data_from_json(json_file):
    X = []
    y = []
    # JSON 파일 읽기
    with open(json_file, 'r') as f:
        data = json.load(f)
        for frame in data:
            X.append(frame[:-1])  # 마지막 값을 제외하고 키포인트 데이터로 사용
            y.append(frame[-1])  # 마지막 값은 라벨로 사용
    return np.array(X), np.array(y)

# 데이터 경로
json_file = "/content/data_val.json"

# JSON 데이터 로드
X, y = load_data_from_json(json_file)

# 데이터 전처리: 레이블 원-핫 인코딩
y = to_categorical(y)

# 데이터 분할 (개선된 방식)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=True)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.33, random_state=42, shuffle=True)

# TensorFlow Dataset 생성
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).shuffle(10000).batch(64).prefetch(tf.data.AUTOTUNE).repeat()
val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val)).shuffle(2000).batch(64).prefetch(tf.data.AUTOTUNE)
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).shuffle(1000).batch(64).prefetch(tf.data.AUTOTUNE)

# 모델 불러오기 또는 새로 생성
def load_model_with_path(model_path):
    if os.path.exists(model_path):
        return load_model(model_path)
    else:
        # MLP 모델 설계
        model = Sequential([
            Conv1D(128, kernel_size=3, activation='relu', input_shape=(12, 3), kernel_regularizer=l2(0.0004)),
            BatchNormalization(),
            Dropout(0.3),

            Conv1D(64, kernel_size=3, activation='relu', kernel_regularizer=l2(0.0004)),
            BatchNormalization(),
            Dropout(0.3),

            Conv1D(32, kernel_size=3, activation='relu', kernel_regularizer=l2(0.0004)),
            BatchNormalization(),
            Dropout(0.3),

            Conv1D(16, kernel_size=3, activation='relu', kernel_regularizer=l2(0.0004)),
            BatchNormalization(),
            Dropout(0.3),

            GlobalAveragePooling1D(),

            Dense(y_train.shape[1], activation='softmax', dtype='float32')
        ])

        # 모델 컴파일
        model.compile(
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
            loss='categorical_crossentropy',
            metrics=['accuracy']
        )

        return model

# 체크포인트 콜백 설정
checkpoint = ModelCheckpoint(
    'best_model.keras',  # 저장할 파일 이름 (.keras 형식)
    monitor='val_accuracy',  # 모니터링할 지표
    mode='max',  # 최대화할 지표 (val_accuracy가 최대일 때 저장)
    save_best_only=True,  # 가장 좋은 모델만 저장
    verbose=1  # 저장 시 로그 출력
)

# 학습 진행 상황 출력
progress_bar = TQDMProgressBar(total_epochs=20)

# 모델 저장 파일 경로
model = load_model_with_path('/content/best_model.keras')

# EarlyStopping 콜백 설정
early_stopping = EarlyStopping(
    monitor='val_loss',  # 모니터링할 지표 (예: 'val_loss' 또는 'val_accuracy')
    patience=5,          # 개선되지 않은 에포크 수 (5번 연속으로 개선되지 않으면 중지)
    restore_best_weights=True,  # 가장 좋은 가중치를 복원
    verbose=1            # 중지 시 로그 출력
)

# 모델 학습
model.fit(
    train_dataset,
    steps_per_epoch=100,  # 전체 데이터를 학습하지 않고 일부 데이터로 학습
    validation_data=val_dataset,
    epochs=10,
    callbacks=[progress_bar, checkpoint, early_stopping],  # 조기 종료 콜백 추가
)

# 모델 평가
loss, accuracy = model.evaluate(test_dataset)
print(f"Test Loss: {loss:.4f}, Test Accuracy: {accuracy:.4f}")


Epoch 1/20:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 1/10
[1m 85/100[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 2ms/step - accuracy: 0.4915 - loss: 1.2771

                                                          


Epoch 1: val_accuracy improved from -inf to 0.20087, saving model to best_model.keras
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 26ms/step - accuracy: 0.5245 - loss: 1.2156 - val_accuracy: 0.2009 - val_loss: 1.8462


Epoch 2/20:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 2/10
[1m 84/100[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m1s[0m 69ms/step - accuracy: 0.8983 - loss: 0.4529

                                                          


Epoch 2: val_accuracy did not improve from 0.20087
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 60ms/step - accuracy: 0.8997 - loss: 0.4458 - val_accuracy: 0.2009 - val_loss: 2.1103


Epoch 3/20:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 3/10
[1m 96/100[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.9335 - loss: 0.2961

                                                          


Epoch 3: val_accuracy improved from 0.20087 to 0.53558, saving model to best_model.keras
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9338 - loss: 0.2952 - val_accuracy: 0.5356 - val_loss: 1.6612


Epoch 4/20:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 4/10
[1m 92/100[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 2ms/step - accuracy: 0.9435 - loss: 0.2531

                                                          


Epoch 4: val_accuracy improved from 0.53558 to 0.68866, saving model to best_model.keras
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9436 - loss: 0.2526 - val_accuracy: 0.6887 - val_loss: 0.8816


Epoch 5/20:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 5/10
[1m 95/100[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 2ms/step - accuracy: 0.9569 - loss: 0.2105

                                                          


Epoch 5: val_accuracy improved from 0.68866 to 0.94355, saving model to best_model.keras
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9567 - loss: 0.2104 - val_accuracy: 0.9435 - val_loss: 0.2927


Epoch 6/20:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 6/10
[1m 90/100[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 2ms/step - accuracy: 0.9543 - loss: 0.1978

                                                          


Epoch 6: val_accuracy did not improve from 0.94355
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9544 - loss: 0.1978 - val_accuracy: 0.9427 - val_loss: 0.2466


Epoch 7/20:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 7/10
[1m 94/100[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 2ms/step - accuracy: 0.9623 - loss: 0.1805

                                                          


Epoch 7: val_accuracy improved from 0.94355 to 0.96416, saving model to best_model.keras
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9621 - loss: 0.1809 - val_accuracy: 0.9642 - val_loss: 0.1647


Epoch 8/20:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 8/10
[1m 87/100[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 2ms/step - accuracy: 0.9593 - loss: 0.1779

                                                          


Epoch 8: val_accuracy improved from 0.96416 to 0.97905, saving model to best_model.keras
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.9599 - loss: 0.1763 - val_accuracy: 0.9790 - val_loss: 0.1157


Epoch 9/20:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 9/10
[1m 92/100[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 2ms/step - accuracy: 0.9668 - loss: 0.1635

                                                          


Epoch 9: val_accuracy did not improve from 0.97905
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9667 - loss: 0.1633 - val_accuracy: 0.9683 - val_loss: 0.1332


Epoch 10/20:   0%|          | 0/20 [00:00<?, ?it/s]

Epoch 10/10
[1m 94/100[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 2ms/step - accuracy: 0.9700 - loss: 0.1535

                                                           


Epoch 10: val_accuracy did not improve from 0.97905
[1m100/100[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.9700 - loss: 0.1532 - val_accuracy: 0.9771 - val_loss: 0.1323
Restoring model weights from the end of the best epoch: 8.
[1m103/118[0m [32m━━━━━━━━━━━━━━━━━[0m[37m━━━[0m [1m0s[0m 1ms/step - accuracy: 0.9805 - loss: 0.1104



[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.9805 - loss: 0.1103
Test Loss: 0.1103, Test Accuracy: 0.9803


In [None]:
print("Validation data distribution:")
print(np.bincount(np.argmax(y_val, axis=1)))

print("Test data distribution:")
print(np.bincount(np.argmax(y_test, axis=1)))

Validation data distribution:
[455 448 431 459 457]
Test data distribution:
[535 511 474 473 507]


In [None]:
for i in range(10):  # 첫 10개의 데이터 확인
    print(f"Features: {X_val[i]}, Label: {y_val[i]}")

In [None]:
def group_into_triplets(data):
    transformed_data = []
    for sample in data:
        grouped = [sample[i:i+3] for i in range(0, len(sample) - len(sample) % 3, 3)]  # 3개씩 묶음
        grouped.append(sample[-1])
        transformed_data.append(grouped)
    return transformed_data

# 변환된 데이터
with open('/content/data_val.json', 'r') as f:
    data = json.load(f)
transformed_data = group_into_triplets(data)

# JSON 파일로 저장
output_file = "transformed_data.json"
with open(output_file, "w") as f:
    json.dump(transformed_data, f, indent=4)