In [1]:
import os
import json
import numpy as np
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array, load_img
import cv2

def process_dataset(root_folder):
    image_paths = []
    label_data = []

    for roots, dirs, files in os.walk(root_folder):
        for file in files:
            if file.endswith('.jpg'):
                # 파일 이름 분석을 위해 숫자만 추출
                prefix = file[0:3]

                # 접두사에 따른 레이블 할당
                label = prefix_to_label.get(prefix)

                # 유효한 레이블이 있는 경우에만 리스트에 추가
                if label is not None:
                    image_paths.append(os.path.join(roots, file))
                    label_data.append(label)

    return image_paths, label_data


def resize_img(image_paths):
    images_resized = []  # 리사이즈된 이미지를 저장할 리스트
    for image_path in image_paths:
        image = cv2.imread(image_path)  # 각 이미지 경로로부터 이미지를 읽음
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # BGR에서 RGB로 색상 변환
        image_resized = cv2.resize(image, (128, 128))  # 이미지 리사이즈
        images_resized.append(image_resized)  # 결과 리스트에 추가
    images_resized = np.array(images_resized) / 255.0  # numpy 배열로 변환 및 정규화
    return images_resized


def shuffle_data(image_paths, label_data):
    # 데이터와 레이블을 같이 섞어줍니다.
    indices = np.arange(len(image_paths))
    np.random.shuffle(indices)
    shuffled_image_paths = np.array(image_paths)[indices]
    shuffled_label_data = np.array(label_data)[indices]
    return shuffled_image_paths, shuffled_label_data


def multilabel_train_generator(image_paths, label_data, batch_size):
    num_samples = len(image_paths)
    while True:
        # 데이터 셔플
        image_paths, label_data = shuffle_data(image_paths, label_data)
        for offset in range(0, num_samples, batch_size):
            batch_images = []
            batch_labels = []

            # 배치 크기만큼 이미지와 레이블 데이터 로드 및 전처리
            batch_image_paths = image_paths[offset:offset + batch_size]
            batch_image_labels = label_data[offset:offset + batch_size]

            batch_images = resize_img(batch_image_paths)
            
            for labels in batch_image_labels:
                batch_labels.append(labels)

            # 배치 데이터 반환
            yield np.array(batch_images), np.array(batch_labels)


def multilabel_test_generator(image_paths, label_data, batch_size):
    num_samples = len(image_paths)
    while True:
        for offset in range(0, num_samples, batch_size):
            batch_images = []
            batch_labels = []

            # 배치 크기만큼 이미지와 레이블 데이터 로드 및 전처리
            batch_image_paths = image_paths[offset:offset + batch_size]
            batch_image_labels = label_data[offset:offset + batch_size]

            batch_images = resize_img(batch_image_paths)
            
            for labels in batch_image_labels:
                batch_labels.append(labels)

            # 배치 데이터 반환
            yield np.array(batch_images), np.array(batch_labels)


def load_json_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return json.load(file)


def get_prefix_to_label(file_path, exercise_name):
    data = load_json_file(file_path)
    result = (data.get(exercise_name))
    prefix = data.get(exercise_name)['prefix']
    label = data.get(exercise_name)['last_path']
    prefix_to_label = dict(zip(prefix, label))
    return prefix_to_label



In [2]:
json_path = 'E:/AInotes/자세교정/모델학습/label_data.json'
exercise_name = 'HipThrust'
train_folder = r'E:/AI/dataset_skeleton_sep/face/' + exercise_name + '/training'
valid_folder = r'E:/AI/dataset_skeleton_sep/face/' + exercise_name + '/validation'

prefix_to_label = get_prefix_to_label(json_path, exercise_name)
train_image_paths, train_label_data = process_dataset(train_folder)
valid_image_paths, valid_label_data = process_dataset(valid_folder)
print(len(train_image_paths), len(train_label_data))
print(len(valid_image_paths), len(valid_label_data))

get_label_nums = lambda x: len(next(iter(x.values())))
print(get_label_nums(prefix_to_label))

batch_size = 32

train_generator = multilabel_train_generator(train_image_paths, train_label_data, batch_size)
validation_generator = multilabel_test_generator(valid_image_paths, valid_label_data, batch_size)

# 모델 구성
base_model = ResNet50(weights='imagenet', include_top=False, input_shape=(128, 128, 3))
for layer in base_model.layers:
    base_model.trainable = False
for layer in base_model.layers[-9:]:
    base_model.trainable = True

model = models.Sequential()
model.add(base_model)
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Flatten())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(get_label_nums(prefix_to_label), activation='sigmoid'))

earlystopping = EarlyStopping(monitor='val_loss', patience=5, mode='min', verbose=1)

8409 8409
1807 1807
3


In [3]:
model.compile(optimizer=optimizers.Adam(learning_rate=0.0002),
              loss=['binary_crossentropy'],
              metrics=['accuracy'])

# 모델 훈련
history = model.fit(train_generator,
                    steps_per_epoch=len(train_image_paths) // batch_size,
                    epochs=25,
                    validation_data=validation_generator,
                    validation_steps=len(valid_image_paths) // batch_size,
                    callbacks=[earlystopping]
                    )


Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 14: early stopping


In [4]:
#model.save(r'E:\AImodel\models\Multilabel\Face-ResNet-' + exercise_name + '-multilabel-model')
model.save(r'E:\AImodel\models\Multilabel\Face-ResNet-' + exercise_name + '-multilabel-model.h5')
print('model saved')


model saved


In [5]:
# test_image_resized = resize_img(test_image_paths)
# # 모델 예측
# predictions = model.predict(test_image_resized)

# # 임계값 설정 (예: 0.5)
# threshold = 0.5
# predictions_binary = (predictions > threshold).astype(int)

# # 각 레이블에 대한 정확도 계산
# accuracy_per_label = np.mean(predictions_binary == test_label_data, axis=0)

# # 각 레이블별 정확도 출력
# for i, accuracy in enumerate(accuracy_per_label):
#     print(f"레이블 {i}의 정확도: {accuracy}")

# # 전체 정확도도 여전히 중요할 수 있으므로, 이를 계산합니다.
# overall_accuracy = np.mean(predictions_binary == test_label_data)
# print(f"전체 정확도: {overall_accuracy}")
