In [1]:
import os.path
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import seaborn as sns

from pathlib import Path
from tqdm import tqdm
from time import perf_counter

from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report,accuracy_score
from IPython.display import Markdown, display
from glob import glob
from keras.preprocessing.image import ImageDataGenerator


In [2]:
base_dir = 'C:/Users/pc/OneDrive/바탕 화면/4-1/도배_하자/'
train_folder = glob(base_dir + 'train_img증강/*')

train_path = []
for folder in train_folder:
    tmp = glob(folder + '/*')
    train_path += tmp

In [3]:
train_df = pd.DataFrame(train_path, columns=['path'])
train_df['label'] = train_df['path'].apply(lambda x: x.split('\\')[-2])
train_df

Unnamed: 0,path,label
0,C:/Users/pc/OneDrive/바탕 화면/4-1/도배_하자/train_img...,가구수정
1,C:/Users/pc/OneDrive/바탕 화면/4-1/도배_하자/train_img...,가구수정
2,C:/Users/pc/OneDrive/바탕 화면/4-1/도배_하자/train_img...,가구수정
3,C:/Users/pc/OneDrive/바탕 화면/4-1/도배_하자/train_img...,가구수정
4,C:/Users/pc/OneDrive/바탕 화면/4-1/도배_하자/train_img...,가구수정
...,...,...
15855,C:/Users/pc/OneDrive/바탕 화면/4-1/도배_하자/train_img...,훼손
15856,C:/Users/pc/OneDrive/바탕 화면/4-1/도배_하자/train_img...,훼손
15857,C:/Users/pc/OneDrive/바탕 화면/4-1/도배_하자/train_img...,훼손
15858,C:/Users/pc/OneDrive/바탕 화면/4-1/도배_하자/train_img...,훼손


In [4]:
train_datagen = ImageDataGenerator(rescale = 1./255,
                                   validation_split=0.2)



train_gen = train_datagen.flow_from_directory('C:/Users/pc/OneDrive/바탕 화면/4-1/도배_하자/train_img증강',
                                                 target_size = (150, 150),
                                                 batch_size = 32,
                                                 class_mode = 'categorical',subset='training')

val_gen  = train_datagen.flow_from_directory('C:/Users/pc/OneDrive/바탕 화면/4-1/도배_하자/train_img증강',
                                                 target_size = (150, 150),
                                                 batch_size = 32,
                                                 class_mode = 'categorical',subset='validation')

Found 12695 images belonging to 19 classes.
Found 3165 images belonging to 19 classes.


In [5]:
#Initialising the CNN
cnn = tf.keras.models.Sequential()

# Step 1 - Convolution
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[150, 150, 3]))

# Step 2 - Pooling
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

# Adding convolutional layer
cnn.add(tf.keras.layers.Conv2D(filters=32, kernel_size=3, activation='relu'))
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))

# Step 3 - Flattening
cnn.add(tf.keras.layers.Flatten())

# Step 4 - Full Connection
cnn.add(tf.keras.layers.Dense(units=128, activation='relu'))
cnn.add(tf.keras.layers.Dense(units=128, activation='relu'))
cnn.add(tf.keras.layers.Dense(units=128, activation='relu'))

# Step 5 - Output Layer
cnn.add(tf.keras.layers.Dense(units=19, activation='softmax'))

# Compiling the CNN
cnn.compile(optimizer = 'adam', 
            loss = 'categorical_crossentropy', 
            metrics = ['accuracy'])
cnn.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 148, 148, 32)      896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 74, 74, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 72, 72, 32)        9248      
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 36, 36, 32)       0         
 2D)                                                             
                                                                 
 flatten (Flatten)           (None, 41472)             0         
                                                                 
 dense (Dense)               (None, 128)               5

In [6]:
from keras.callbacks import EarlyStopping
early_stopping = EarlyStopping()

In [7]:
cnn.fit(x = train_gen, validation_data = val_gen, epochs = 50,callbacks = [early_stopping])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50


<keras.callbacks.History at 0x196c75d1a10>

##### 전이학습 시키기 전에 train/test 내부 증강시키는 코드 추가

In [42]:
def create_gen():
    # 생성기 및 데이터 증강으로 이미지 로드
    train_generator = tf.keras.preprocessing.image.ImageDataGenerator(
        preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input,
        validation_split=0.1
    )

    test_generator = tf.keras.preprocessing.image.ImageDataGenerator(
        preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input
    )

    train_images = train_generator.flow_from_dataframe(
        dataframe=train_df,
        x_col='path', # 파일위치 열이름
        y_col='label', # 클래스 열이름
        target_size=(224, 224), # 이미지 사이즈
        color_mode='rgb', # 이미지 채널수
        class_mode='categorical', # Y값(Label값)
        batch_size=32,
        shuffle=True, # 데이터를 섞을지 여부
        seed=0,
        subset='training', # train 인지 val인지 설정
        rotation_range=30, # 회전제한 각도 30도
        zoom_range=0.15, # 확대 축소 15%
        width_shift_range=0.2, # 좌우이동 20%
        height_shift_range=0.2, # 상하이동 20%
        shear_range=0.15, # 반시계방햐의 각도
        horizontal_flip=True, # 좌우 반전 True
        fill_mode="nearest"
        # 이미지 변경시 보완 방법 (constant, nearest, reflect, wrap) 4개 존재
    )

    val_images = train_generator.flow_from_dataframe(
        dataframe=train_df,
        x_col='path',
        y_col='label',
        target_size=(224, 224),
        color_mode='rgb',
        class_mode='categorical',
        batch_size=32,
        shuffle=True,
        seed=0,
        subset='validation',
        rotation_range=30,
        zoom_range=0.15,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.15,
        horizontal_flip=True,
        fill_mode="nearest"
    )

    test_images = test_generator.flow_from_directory(
        'C:/Users/pc/OneDrive/바탕 화면/4-1/도배_하자/open',
        classes=['test'],
        target_size=(224, 224),
        color_mode='rgb',
        class_mode='categorical',
        batch_size=32,
        shuffle=False
    )
    
    return train_generator,test_generator,train_images,val_images,test_images

#### 전이학습

In [43]:
models = {
    "DenseNet121": {"model":tf.keras.applications.DenseNet121, "perf":0},
    "MobileNetV2": {"model":tf.keras.applications.MobileNetV2, "perf":0},
    "DenseNet201": {"model":tf.keras.applications.DenseNet201, "perf":0},
    "EfficientNetB0": {"model":tf.keras.applications.EfficientNetB0, "perf":0},
    "EfficientNetB1": {"model":tf.keras.applications.EfficientNetB1, "perf":0},
    "InceptionV3": {"model":tf.keras.applications.InceptionV3, "perf":0},
    "MobileNetV2": {"model":tf.keras.applications.MobileNetV2, "perf":0},
    "MobileNetV3Large": {"model":tf.keras.applications.MobileNetV3Large, "perf":0},
    "ResNet152V2": {"model":tf.keras.applications.ResNet152V2, "perf":0},
    "ResNet50": {"model":tf.keras.applications.ResNet50, "perf":0},
    "ResNet50V2": {"model":tf.keras.applications.ResNet50V2, "perf":0},
    "VGG19": {"model":tf.keras.applications.VGG19, "perf":0},
    "VGG16": {"model":tf.keras.applications.VGG16, "perf":0},
    "Xception": {"model":tf.keras.applications.Xception, "perf":0}
}
# Create the generators
train_generator,test_generator,train_images,val_images,test_images= create_gen()
print('\n')

Found 14274 validated image filenames belonging to 19 classes.
Found 1586 validated image filenames belonging to 19 classes.
Found 792 images belonging to 1 classes.




In [44]:
def get_model(model):
    # Load the pretained model
    kwargs =    {'input_shape':(224, 224, 3),
                'include_top':False,
                'weights':'imagenet',
                'pooling':'avg'}
    
    pretrained_model = model(**kwargs)
    pretrained_model.trainable = False # 레이어를 동결 시켜서 훈련중 손실을 최소화 한다.
    
    inputs = pretrained_model.input

    x = tf.keras.layers.Dense(128, activation='relu')(pretrained_model.output)
    x = tf.keras.layers.Dense(128, activation='relu')(x)

    outputs = tf.keras.layers.Dense(19, activation='softmax')(x)
    # 라벨 개수가 8개이기 때문에 Dencs도 8로 설정
    model = tf.keras.Model(inputs=inputs, outputs=outputs)

    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )
    
    return model

In [45]:
from keras.callbacks import EarlyStopping
early_stopping = EarlyStopping()

In [46]:
# Train모델 학습
for name, model in models.items():
    
    # 전이 학습 모델 가져오기
    m = get_model(model['model'])
    models[name]['model'] = m
    
    start = perf_counter()
    
    # 모델 학습
    history = m.fit(train_images,validation_data=val_images,epochs=1,verbose=0)
    
    # 학습시간과 val_accuracy 저장
    duration = perf_counter() - start
    duration = round(duration,2)
    models[name]['perf'] = duration
    print(f"{name:20} trained in {duration} sec")
    
    val_acc = history.history['val_accuracy']
    models[name]['val_acc'] = [round(v,4) for v in val_acc]

DenseNet121          trained in 1281.97 sec
MobileNetV2          trained in 302.82 sec
DenseNet201          trained in 1477.66 sec
EfficientNetB0       trained in 432.92 sec
EfficientNetB1       trained in 559.62 sec
InceptionV3          trained in 491.58 sec
MobileNetV3Large     trained in 278.84 sec
ResNet152V2          trained in 1781.57 sec
ResNet50             trained in 734.83 sec
ResNet50V2           trained in 660.3 sec
VGG19                trained in 2160.18 sec
VGG16                trained in 1732.13 sec
Xception             trained in 852.02 sec


In [55]:
# test데이터로 모델 성능 예측
for name, model in models.items():
    
    # Predict the label of the test_images
    pred = models[name]['model'].predict(test_images)
    pred = np.argmax(pred,axis=1)

    # Map the label
    labels = (train_images.class_indices)
    labels = dict((v,k) for k,v in labels.items())
    pred = [labels[k] for k in pred]

print(pred)


['터짐', '오염', '곰팡이', '창틀,문틀수정', '오염', '오염', '오염', '몰딩수정', '창틀,문틀수정', '오염', '몰딩수정', '오타공', '오염', '오염', '오염', '터짐', '피스', '곰팡이', '오염', '오염', '창틀,문틀수정', '울음', '오염', '오염', '몰딩수정', '오염', '면불량', '곰팡이', '터짐', '훼손', '오염', '오염', '오염', '오염', '곰팡이', '오염', '오염', '오염', '터짐', '몰딩수정', '오염', '꼬임', '터짐', '면불량', '오염', '오염', '꼬임', '울음', '오염', '오염', '오염', '창틀,문틀수정', '울음', '면불량', '훼손', '오염', '오염', '곰팡이', '오염', '오염', '오타공', '몰딩수정', '터짐', '걸레받이수정', '훼손', '오염', '터짐', '오염', '오염', '오염', '오염', '곰팡이', '오염', '오염', '훼손', '오염', '곰팡이', '오염', '울음', '곰팡이', '오염', '오염', '몰딩수정', '오타공', '오염', '오염', '들뜸', '오염', '꼬임', '오염', '오염', '틈새과다', '오염', '오염', '오염', '오염', '석고수정', '오염', '몰딩수정', '오염', '피스', '이음부불량', '오염', '터짐', '오타공', '오염', '꼬임', '울음', '오염', '오염', '곰팡이', '오염', '오염', '오염', '오염', '오염', '오염', '면불량', '면불량', '곰팡이', '면불량', '면불량', '오염', '곰팡이', '창틀,문틀수정', '훼손', '오염', '꼬임', '오염', '터짐', '몰딩수정', '오염', '터짐', '오염', '오타공', '오염', '오염', '오염', '오염', '터짐', '오타공', '피스', '오염', '오염', '오염', '훼손', '오타공', '면불량', '훼손', '터짐', '오염', '곰팡이', '오염', '오

In [48]:
#test
base_dir = 'C:/Users/pc/OneDrive/바탕 화면/4-1/도배_하자/'
test = pd.read_csv(base_dir + 'open/test.csv')
test

Unnamed: 0,id,img_path
0,TEST_000,./test/000.png
1,TEST_001,./test/001.png
2,TEST_002,./test/002.png
3,TEST_003,./test/003.png
4,TEST_004,./test/004.png
...,...,...
787,TEST_787,./test/787.png
788,TEST_788,./test/788.png
789,TEST_789,./test/789.png
790,TEST_790,./test/790.png


In [49]:
submit = pd.read_csv('C:/Users/pc/OneDrive/바탕 화면/4-1/도배_하자/open/sample_submission.csv')
submit

Unnamed: 0,id,label
0,TEST_000,훼손
1,TEST_001,훼손
2,TEST_002,훼손
3,TEST_003,훼손
4,TEST_004,훼손
...,...,...
787,TEST_787,훼손
788,TEST_788,훼손
789,TEST_789,훼손
790,TEST_790,훼손


In [50]:
submit['label'] = pred

In [51]:
submit.head(15)

Unnamed: 0,id,label
0,TEST_000,터짐
1,TEST_001,오염
2,TEST_002,곰팡이
3,TEST_003,"창틀,문틀수정"
4,TEST_004,오염
5,TEST_005,오염
6,TEST_006,오염
7,TEST_007,몰딩수정
8,TEST_008,"창틀,문틀수정"
9,TEST_009,오염


In [56]:
submit['label'].value_counts()

오염         367
터짐          60
곰팡이         55
훼손          50
몰딩수정        37
꼬임          36
면불량         33
오타공         31
울음          23
창틀,문틀수정     21
들뜸          17
피스          15
이음부불량       15
녹오염         12
석고수정        11
걸레받이수정       8
틈새과다         1
Name: label, dtype: int64

In [52]:
submit.to_csv('./증강_submit.csv', index=False)