In [22]:
import os
import json
import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.layers import Input, Dense, LSTM, Embedding, Dropout
from tensorflow.keras.models import Model
from tqdm import tqdm

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [23]:
# 이미지 폴더와 캡션 폴더 경로
train_images_folder = '/content/drive/MyDrive/이미지 설명문 추출 및 생성용 한국형 비전 데이터/Training/원천데이터'
train_captions_folder = '/content/drive/MyDrive/이미지 설명문 추출 및 생성용 한국형 비전 데이터/Training/라벨링데이터'
val_images_folder = '/content/drive/MyDrive/이미지 설명문 추출 및 생성용 한국형 비전 데이터/Validation/원천데이터'
val_captions_folder = '/content/drive/MyDrive/이미지 설명문 추출 및 생성용 한국형 비전 데이터/Validation/라벨링데이터'

In [24]:
# 캡션 파일 형식이 JSON일 때
def load_captions_from_json(folder_path):
    captions = []
    for file_name in os.listdir(folder_path):
        if file_name.endswith('.json'):
            file_path = os.path.join(folder_path, file_name)
            with open(file_path, 'r', encoding='utf-8') as file:
                data = json.load(file)
                captions.extend(data.values())
    return captions


In [25]:
# 훈련 데이터 및 검증 데이터 로드
train_captions = load_captions_from_json(train_captions_folder)
val_captions = load_captions_from_json(val_captions_folder)

In [26]:
# 이미지 특성 추출을 위한 InceptionV3 모델 로드
image_model = tf.keras.applications.InceptionV3(weights='imagenet')
new_input = image_model.input
hidden_layer = image_model.layers[-2].output
image_features_extract_model = tf.keras.Model(new_input, hidden_layer)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels.h5


In [27]:
# 이미지 전처리 함수

def load_image(image_path):
    img = image.load_img(image_path, target_size=(299, 299))
    img = image.img_to_array(img)
    img = tf.image.resize(img, (299, 299))
    img = tf.keras.applications.inception_v3.preprocess_input(img)
    return img, image_path

In [40]:
# 이미지와 캡션 특성 결합을 위한 데이터셋 생성
def create_dataset(image_folder, captions, image_features_extract_model, batch_size=32):
    image_paths = [os.path.join(image_folder, img_name) for img_name in os.listdir(image_folder)]
    images = []
    sequences = []

    for img_path, caption in tqdm(zip(image_paths, captions), desc=f'Loading captions from {image_folder}'):
        img, _ = load_image(img_path)
        images.append(img)
        sequences.append(caption)

        # 배치 크기만큼 데이터가 쌓이면 처리
        if len(images) == batch_size:
            images = tf.convert_to_tensor(images)
            image_features = image_features_extract_model(images)

            # 여기서 image_features를 활용하여 다른 작업 수행

            # 초기화
            images = []
            sequences = []

    # 마지막으로 쌓인 데이터 처리
    if images:
        images = tf.convert_to_tensor(images)
        image_features = image_features_extract_model(images)

    return image_features, sequences

In [45]:
import tensorflow.keras.backend as K

# 훈련 데이터 및 검증 데이터 로드
train_captions = load_captions_from_json(train_captions_folder)
val_captions = load_captions_from_json(val_captions_folder)

# 훈련 데이터셋 생성
train_image_features, train_captions = create_dataset(train_images_folder, train_captions, image_features_extract_model)

# 검증 데이터셋 생성
val_image_features, val_captions = create_dataset(val_images_folder, val_captions, image_features_extract_model)

# 모든 캡션을 하나의 리스트로 합치기
all_captions = train_captions + val_captions

# 메모리 정리
K.clear_session()

Loading captions from /content/drive/MyDrive/이미지 설명문 추출 및 생성용 한국형 비전 데이터/Training/원천데이터: 4844it [02:24, 33.41it/s]
Loading captions from /content/drive/MyDrive/이미지 설명문 추출 및 생성용 한국형 비전 데이터/Validation/원천데이터: 605it [00:20, 30.22it/s]


In [48]:
# 토큰화와 패딩을 위한 Tokenizer
tokenizer = Tokenizer(oov_token="<OOV>")
all_captions_texts = [caption['text'] for caption in all_captions if 'text' in caption and caption['text'].strip()]
tokenizer.fit_on_texts(all_captions_texts)

vocab_size = len(tokenizer.word_index) + 1


In [50]:
# 훈련 데이터에서 캡션 텍스트 추출 및 토큰화
train_captions_seqs = tokenizer.texts_to_sequences([caption['text'] for caption in train_captions if 'text' in caption and caption['text'].strip()])
val_captions_seqs = tokenizer.texts_to_sequences([caption['text'] for caption in val_captions if 'text' in caption and caption['text'].strip()])

# 훈련 데이터셋과 검증 데이터셋 중 비어있는 것이 있는지 확인
if not train_captions_seqs or not val_captions_seqs:
    raise ValueError("훈련 데이터셋 또는 검증 데이터셋이 비어있습니다.")

# 토큰화된 캡션을 정수 시퀀스로 변환
max_length = max(len(seq) for seq in train_captions_seqs + val_captions_seqs)
train_captions_padded = pad_sequences(train_captions_seqs, maxlen=max_length, padding='post')
val_captions_padded = pad_sequences(val_captions_seqs, maxlen=max_length, padding='post')

ValueError: ignored

In [11]:
# 이미지와 캡션 시퀀스를 결합하는 모델 정의

embed_dim = 256
units = 512

In [11]:
# 이미지 특성을 처리하는 부분
image_input = Input(shape=(train_image_features.shape[1],))
image_dense = Dense(embed_dim, activation='relu')(image_input)

# 시퀀스를 처리하는 부분
caption_input = Input(shape=(max_length,))
caption_embed = Embedding(vocab_size, embed_dim, input_length=max_length)(caption_input)
caption_lstm = LSTM(units, return_sequences=True)(caption_embed)

# 이미지와 캡션 시퀀스를 결합
combined = tf.keras.layers.Add()([image_dense, caption_lstm])
combined = Dense(units, activation='relu')(combined)
outputs = Dense(vocab_size, activation='softmax')(combined)

NameError: ignored

In [None]:
# 최종 모델 정의
model = Model(inputs=[image_input, caption_input], outputs=outputs)

# 모델 컴파일
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# 모델 요약
model.summary()