## 데이터 불러오기

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.layers import Conv2D, MaxPooling2D, Input, Concatenate
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D

from tensorflow.keras.models import Model

DATA_PATH = './csv_data/colorinfo'

train_df = pd.read_csv(DATA_PATH + '/train_color.csv')
val_df = pd.read_csv(DATA_PATH + '/val_color.csv')
test_df = pd.read_csv(DATA_PATH + '/test_color.csv')

# Colab에서 사용한다면, 다음 코드 주석을 풀고, 실행시킵니다.
# train_df['image'] = train_df['image'].apply(lambda x: str(x).replace('\\', '/'))
# val_df['image'] = val_df['image'].apply(lambda x: str(x).replace('\\', '/'))
# test_df['image'] = test_df['image'].apply(lambda x: str(x).replace('\\', '/'))

train_df.head()

Unnamed: 0,image,black,blue,brown,green,red,white,dress,shirt,pants,shorts,shoes,color
0,./clothes_dataset\green_shoes\f1f33bed259f4b38...,0,0,0,1,0,0,0,0,0,0,1,3
1,./clothes_dataset\brown_pants\8a797ffb710eefe3...,0,0,1,0,0,0,0,0,1,0,0,2
2,./clothes_dataset\white_dress\ef86bf5eee72dbe8...,0,0,0,0,0,1,1,0,0,0,0,5
3,./clothes_dataset\black_shoes\ff7f558959757ab7...,1,0,0,0,0,0,0,0,0,0,1,0
4,./clothes_dataset\blue_pants\b354ab5371b90d5eb...,0,1,0,0,0,0,0,0,1,0,0,1


## 제네레이터 정의하기

In [2]:
def get_steps(num_samples, batch_size):
    if (num_samples % batch_size) > 0 :
        return (num_samples // batch_size) + 1
    else :
        return num_samples // batch_size

class DataGenerator(tf.keras.utils.Sequence):
    def __init__(self, df, batch_size = 32, target_size = (112, 112), shuffle = True):
        self.len_df = len(df)
        self.batch_size = batch_size
        self.target_size = target_size
        self.shuffle = shuffle
        self.class_col = ['black', 'blue', 'brown', 'green', 'red', 'white', 
             'dress', 'shirt', 'pants', 'shorts', 'shoes']
        
        # 제네레이터를 통해 이미지를 불러옵니다.
        self.generator = ImageDataGenerator(rescale = 1./255)
        self.df_generator = self.generator.flow_from_dataframe(dataframe=df, 
                                                          directory='',
                                                            x_col = 'image',
                                                            y_col = self.class_col,
                                                            target_size = self.target_size,
                                                            color_mode='rgb',
                                                            class_mode='raw',
                                                            batch_size=self.batch_size,
                                                            shuffle = True,
                                                            seed=42)
        self.colors_df = df['color']
        self.on_epoch_end()
        
    def __len__(self):
        return int(np.floor(self.len_df) / self.batch_size)
    
    # 데이터를 섞습니다.
    def on_epoch_end(self):
        self.indexes = np.arange(self.len_df)
        if self.shuffle:
            np.random.shuffle(self.indexes)
    
    # ([이미지 데이터, 색 정보], 레이블)을 반환합니다.
    # 이미지는 미리 정의한 제네레이터를 통해,
    # 색 정보는 __data_generation 메소드를 활용합니다.
    def __getitem__(self, index):
        indexes = self.indexes[index * self.batch_size : (index + 1) * self.batch_size]
        colors = self.__data_generation(indexes)
        images, labels = self.df_generator.__getitem__(index)
        
        # return multi-input and output
        return [images, colors], labels
    
    def __data_generation(self, indexes):
        colors = np.array([self.colors_df[k] for k in indexes])

        return colors

In [3]:
train_datagen = DataGenerator(train_df)
val_datagen = DataGenerator(val_df)

Found 5578 validated image filenames.
Found 2391 validated image filenames.


## 모델 구성하기

In [4]:
def get_model():
    # 다중 입력 모델을 구성합니다.
    img_input = Input(shape = (112, 112, 3))
    color_input = Input(shape = [1])
    
    x = Conv2D(32, (3, 3), padding = 'same', activation = 'relu')(img_input)
    x = MaxPooling2D((3, 3), strides = 2)(x)
    x = Conv2D(64, (3, 3), padding = 'same', activation = 'relu')(x)
    x = MaxPooling2D((3, 3), strides = 2)(x)
    x = Conv2D(64, (3, 3), padding = 'same', activation = 'relu')(x)
    x = MaxPooling2D((3, 3), strides = 2)(x)
    x = GlobalAveragePooling2D()(x)
    
    # 색 데이터를 병합합니다.
    color_concat = Concatenate()([x, color_input])
    
    x = Dense(64, activation = 'relu')(color_concat)
    x = Dense(11, activation = 'sigmoid')(x)
    
    # 다중 입력이기 때문에,
    # inputs 인자에 리스트 형태로 입력 데이터를 전달합니다.
    model = Model(inputs = [img_input, color_input], outputs = x)
    
    model.compile(optimizer = 'adam',
             loss = 'binary_crossentropy',
             metrics = ['acc'])
    
    return model

model = get_model()
print('model ready~')

model ready~


## 제네레이터를 통해 모델 학습시키기

In [None]:
batch_size = 32

model.fit(train_datagen,
         validation_data = val_datagen,
         epochs = 10,
         use_multiprocessing=True,
         workers = 8)

## 테스트 데이터 예측하기

In [None]:
test_datagen = ImageDataGenerator(rescale = 1./255)

test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_df, 
    directory='',
    x_col = 'image',
    y_col = None,
    target_size = (112, 112),
    color_mode='rgb',
    class_mode=None,
    batch_size=batch_size,
    shuffle = False
)

In [None]:
preds = model.predict(test_generator,
                     steps = get_steps(len(test_df), batch_size),
                     verbose = 1)

In [None]:
import matplotlib.pyplot as plt
import cv2

# 8개만 예측해보도록 하겠습니다.
do_preds = preds[:8]

for i, pred in enumerate(do_preds):
    plt.subplot(2, 4, i + 1)
    prob = zip(class_col, list(pred))
    prob = sorted(list(prob), key = lambda z: z[1], reverse = True)[:2]
    
    image = cv2.imread(test_df['image'][i])
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    plt.imshow(image)
    plt.title(f'{prob[0][0]}: {round(prob[0][1] * 100, 2)}% \n {prob[1][0]}: {round(prob[1][1] * 100, 2)}%')
    
plt.tight_layout()