In [1]:
import tensorflow as tf
import numpy as np
import random, os, time
import tensorflow.contrib.slim as slim  # 기존 tensorflow를 사용하기 쉽게 만들어 놓은 high-level API

from custom_op import conv2d, conv2d_t, atrous_conv2d, relu, bn, max_pool
from utils import read_data_path, next_batch, read_image, read_annotation, draw_plot_segmentation

The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.



In [2]:
class DeepLab_v3(object):
    
    def __init__(self):
        
        RESULT_MODEL_PATH = '/src/hyebin/modellos/DEEPLAB-V3-CRACK-11/'

        # 최대 학습 수행 횟수
        self.N_EPOCH = 20
        # 총 이미지 개수에서 나눌 학습 수행 횟수l
        self.N_BATCH = 5
        # 최적화 함수 학습률
        self.LEARNING_RATE = 0.00001
        
        self.LOGS_DIR = os.path.join(RESULT_MODEL_PATH ,'logs')
        self.CKPT_DIR = os.path.join(RESULT_MODEL_PATH ,'ckpt')
        self.OUTPUT_DIR = os.path.join(RESULT_MODEL_PATH ,'output')
        
        self.N_CLASS = 151  # filters
        self.RESIZE = 191
        
        # '/src/data/DeepCrack/train/train_img/'
        # '/src/hyebin/deep-crack/ADEChallengeData2016/images/training/'
        self.TRAIN_IMAGE_PATH = '/src/data/DeepCrack/train/train_img/'
        # '/src/data/DeepCrack/train/train_lab/'
        # '/src/hyebin/deep-crack/ADEChallengeData2016/annotations/training/'
        self.TRAIN_LABEL_PATH = '/src/data/DeepCrack/train/train_lab/'
    
        # '/src/data/DeepCrack/train/test_img/'
        # '/src/hyebin/deep-crack/ADEChallengeData2016/images/validation/'
        self.VALID_IMAGE_PATH = '/src/data/DeepCrack/train/test_img/'
        # '/src/data/DeepCrack/train/test_lab/'
        # '/src/hyebin/deep-crack/ADEChallengeData2016/annotations/validation/'
        self.VALID_LABEL_PATH = '/src/data/DeepCrack/train/tlest_lab/'
        
        
        
    ## 모델 생성
    def make_model(self, inputs, is_training):
    
        ## ResNet을 사용하여 특징 추출 (Encoder)
    
        # ResNet50 : Convolution 연산 + Fully Connected Layer만 계산했을 때, 총 50개의 Layer (CNN)
        # layer가 깊어질수록 성능이 더 좋아진다고 생각 (VGG-19보다 더 깊게 설계)
        with tf.variable_scope('ResNet50'):
            # strides : 필터를 적용하는 간격 (필터의 이동량)
            x = conv2d(inputs, 64, [7,7], strides=[1,2,2,1], name='conv1')   # size 1/2
            # bn : tf.contrib.layers.batch_norm (Batch Normalization)
            # Batch Normalization : 신경망을 안정시켜주는 표준화 기법 
            x = bn(x, is_training)
            # relu : tf.nn.relu
            x = relu(x)
            # max_pool : Pooling Layer (tf.nn.max_pool)
            x = max_pool(x, ksize=[1,3,3,1], name='pool1')   # size 1/4
            print('x : {}'.format(x))
        
            x = self.conv_block(x, [64, 64, 256], '2_1', is_training, s=1)
            x = self.identity_block(x, [64, 64, 256], '2_2', is_training)
            x = self.identity_block(x, [64, 64, 256], '2_3', is_training)
            print('x1 : {}'.format(x))
        
            x = self.conv_block(x, [128, 128, 512], '3_1', is_training)
            x = self.identity_block(x, [128, 128, 512], '3_2', is_training)
            x = self.identity_block(x, [128, 128, 512], '3_3', is_training)
            print('x2 : {}'.format(x))
            
            x = self.atrous_conv_block(x, [256, 256, 1024], '4_1', 2, is_training, s=1)
            x = self.atrous_identity_block(x, [256, 256, 1024], '4_2',  2, is_training)
            x = self.atrous_identity_block(x, [256, 256, 1024], '4_3',  2, is_training)
            x = self.atrous_identity_block(x, [256, 256, 1024], '4_4',  2, is_training)
            x = self.atrous_identity_block(x, [256, 256, 1024], '4_5',  2, is_training)
            x = self.atrous_identity_block(x, [256, 256, 1024], '4_6',  2, is_training)
            print('x3 : {}'.format(x))
        
            x = self.atrous_conv_block(x, [512, 512, 2048], '5_1', 4, is_training, s=1)
            x = self.atrous_identity_block(x, [512, 512, 2048], '5_2', 4, is_training)
            x = self.atrous_identity_block(x, [512, 512, 2048], '5_3', 4, is_training)
            print('x4 : {}'.format(x))
        
        
            ## Atrous Pyrimid Pooling (Decoder)
        
            # ASPP : Atrous + Spatial Pyramid Pooling
            #      : rate를 다양하게 변환시켜 다양한 RF(Receptive Field)가 고려된 feature map을 생성할 수 있도록 함
            with tf.variable_scope('ASPP'):
                feature_map_shape = x.get_shape().as_list()
            
                # global average pooling
                # feature맵의 width, height 평균을 냄
                feature_map = tf.reduce_mean(x, [1,2], keepdims=True)  # keepdims:차원 유지 여부
            
                # conv2d : Convolution Layer
                feature_map = conv2d(feature_map, 256, [1,1], name='gap_feature_map')
                feature_map = tf.image.resize_bilinear(feature_map, [feature_map_shape[1], feature_map_shape[2]])
                print('feature_map : {}'.format(feature_map))
            
                # rate : 필터의 픽셀 간의 거리를 나타내는 확장 비율
                rate1 = conv2d(x, 256, [1,1], name='rate1')
                print('rate1 : {}'.format(rate1))
                rate6 = atrous_conv2d(x, 256, [3,3], rate=6, name='rate6')
                print('rate6 : {}'.format(rate6))
                rate12 = atrous_conv2d(x, 256, [3,3], rate=12, name='rate12')
                print('rate12 : {}'.format(rate12))
                rate18 = atrous_conv2d(x, 256, [3,3], rate=18, name='rate18')
                print('rate18 : {}'.format(rate18))
                concated = tf.concat([feature_map, rate1, rate6, rate12, rate18], axis=3)  # concat:tensor 객체를 횡(가로) 방향으로 연결 (axis=3차원)
                print('concated : {}'.format(concated))
            
                net = conv2d(concated, 256, [1,1], name='net')
                print('net : {}'.format(net))
            
                ## 모델 초기 설정
                # logits : sigmoid 함수와 역함수 관계
                logits = conv2d(net, self.N_CLASS, [1,1], name='logits')
                # tf.image.resize(bilinear(입력 텐서, [높이, 가로]))
                logits = tf.image.resize_bilinear(logits, size=[self.RESIZE, self.RESIZE], name='out')
                print('logits : {}'.format(logits))
            
                # 예측값
                pred = tf.argmax(logits, axis=3)  # argmax:1p차원 배열에서 가장 큰 값을 찾아 index return
                pred = tf.expand_dims(pred, dim=3)  # expand_dims:axis로 지정된 차원을 추가
                print('pred : {}'.format(pred))
            
                return logits, pred
            
            
            
    ## 학습 전 함수 정의
    def build_model(self):
    
        self.input_x = tf.placeholder(dtype=tf.float32, shape=[None, self.RESIZE, self.RESIZE, 3])
        self.label_y = tf.placeholder(dtype=tf.int32, shape=[None, self.RESIZE, self.RESIZE, 1])
        
        self.is_train = tf.placeholder(dtype=tf.bool)

        self.logits, self.pred = self.make_model(self.input_x, self.is_train)
    
        # loss 함수 정의
        self.loss = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(logits=self.logits, labels=tf.squeeze(self.label_y, [3])))
               
        # loss 함수의 최적화 함수 정의
        self.optimizer = tf.train.AdamOptimizer(self.LEARNING_RATE).minimize(self.loss)
    
        self.loss_summary = tf.summary.merge([tf.summary.scalar('loss', self.loss)])
    
        model_vars = tf.trainable_variables()
        slim.model_analyzer.analyze_vars(model_vars, print_info=True)    
        
      
    
    ## 학습 수행 및 모델 저장
    def train_model(self):
    
        if not os.path.exists(self.LOGS_DIR):    os.mkdir(self.LOGS_DIR)
        if not os.path.exists(self.CKPT_DIR):    os.mkdir(self.CKPT_DIR)
        if not os.path.exists(self.OUTPUT_DIR):  os.mkdir(self.OUTPUT_DIR)
       
        train_set_path = read_data_path(self.TRAIN_IMAGE_PATH, self.TRAIN_LABEL_PATH)
        valid_set_path = read_data_path(self.VALID_IMAGE_PATH, self.VALID_LABEL_PATH)
    
        ckpt_save_path = os.path.join(self.CKPT_DIR, 'DEEPLAB-V3_' + str(self.N_BATCH) + '_' + str(self.LEARNING_RATE))

        with tf.Session() as sess:
                sess.run(tf.global_variables_initializer())

                total_batch = int(len(train_set_path) / self.N_BATCH)
                counter = 0

                self.saver = tf.train.Saver()
                self.writer = tf.summary.FileWriter(self.LOGS_DIR, sess.graph)

                for epoch in range(self.N_EPOCH):
                    total_loss = 0
                    random.shuffle(train_set_path)           # 매 epoch마다 데이터셋 shuffling
                    random.shuffle(valid_set_path)

                    for i in range(total_batch):
                        batch_xs_path, batch_ys_path = next_batch(train_set_path, self.N_BATCH, i)

                        batch_xs = read_image(batch_xs_path, [self.RESIZE, self.RESIZE])
                        batch_ys = read_annotation(batch_ys_path, [self.RESIZE, self.RESIZE])
                        
                        feed_dict = {self.input_x: batch_xs, self.label_y: batch_ys, self.is_train: True}
                        
                        # 학습 수행
                        _, _loss_summary, _loss = sess.run([self.optimizer, self.loss_summary, self.loss], feed_dict=feed_dict)
                        
                        #print('loss : {}'.format(_loss))
                        
                        self.writer.add_summary(_loss_summary, counter)
                        counter += 1
                        total_loss += _loss

                    ## validation 과정
                    valid_xs_path, valid_ys_path = next_batch(valid_set_path, 4, 0)
                    # 실제 이미지
                    valid_xs = read_image(valid_xs_path, [self.RESIZE, self.RESIZE])                   
                    # 라벨 이미지
                    valid_ys = read_annotation(valid_ys_path, [self.RESIZE, self.RESIZE])
                    
                    # 예측 이미지?
                    valid_pred = sess.run(self.pred, feed_dict={self.input_x: valid_xs, self.label_y: valid_ys, self.is_train:False})                    
                    valid_pred = np.squeeze(valid_pred, axis=3)
                    
                    valid_ys = np.squeeze(valid_ys, axis=3)  # squeeze:차원 중 size가 1인 것을 찾아 Scala값으로 바꿔 해당 차원을 제거

                    ## plotting and save figure
                    img_save_path = self.OUTPUT_DIR + '/' + str(epoch).zfill(3) + '.png'
                    draw_plot_segmentation(img_save_path, valid_xs, valid_pred, valid_ys)
                    
                    #print('total_loss : {}'.format(total_loss))
                    
                    print('Epoch:', '%03d' % (epoch + 1), 'Avg Loss: {:.6}\t'.format(total_loss / total_batch))
                    self.saver.save(sess, ckpt_save_path + '_' + str(epoch) + '.model', global_step=counter)
            
                self.saver.save(sess, ckpt_save_path + '_' + str(epoch) + '.model', global_step=counter)
                print('Finish save model')
                
       
    
    ## 모든 convoltion 연산에 3x3 이하 크기의 커널 사용 
    ## feature map의 크기가 같은 layer는 출력 feature map 갯수가 동일함
    ## ResNet50 부터는 연산량을 줄이기 위해 Residual Block 내에 1x1, 3x3, 1x1 Convolution 연산을 쌓음
    ## 1x1 Convolution 연산으로 feature map의 갯수를 줄였다가, 3x3 연산을 거친 후,
    ## 1x1 Convolution 연산으로 차원을 늘려줌  (=병목 레이어)
    
    ## 차원이 같은 Residual Block
    ## 입력값과 출력값의 크기를 맞춰줌
    ## input, output 차원이 같을 때 사용되는 shortcut
    def identity_block(self, inputs, filters, stage, is_training):
        filter1, filter2, filter3 = filters
        layer1 = relu(bn(conv2d(inputs, filter1, [1,1], name=stage+'_a_identity', padding='VALID'), is_training))
        layer2 = relu(bn(conv2d(layer1, filter2, [3,3], name=stage+'_b_identity'), is_training))
        layer3 = bn(conv2d(layer2, filter3, [1,1], name=stage+'_c_identity', padding='VALID'), is_training)
        layer4 = relu(tf.add(layer3, inputs))
        return layer4
    
    
    ## Projection을 활용한 Residual Block
    ## 차원이 다르기 때문에 Conv_layer를 통해 projection shortcut
    ## projection shortcut : 입력값을 바로 더하지 않고 
    ##           1x1 convolution 연산을 stride를 설정하여 freature map의 크기와 개수를 맞춰준 후 더해줌
    def conv_block(self, inputs, depths, stage, is_training, s=2):
        depth1, depth2, depth3 = depths
        layer1 = relu(bn(conv2d(inputs, depth1, [1,1], name=stage+'_a_conv', strides=[1, s, s, 1], padding='VALID'), is_training))
        layer2 = relu(bn(conv2d(layer1, depth2, [3,3], name=stage+'_b_conv'), is_training))
        layer3 = bn(conv2d(layer2, depth3, [1,1], name=stage+'_c_conv', padding='VALID'), is_training)
        shortcut = bn(conv2d(inputs, depth3, [1,1], name=stage+'_shortcut', strides=[1, s, s, 1], padding='VALID'), is_training)
        layer4 = relu(tf.add(layer3, shortcut))
        return layer4
    
    
     
    def atrous_identity_block(self, inputs, depths, stage, rate, is_training):
        depth1, depth2, depth3 = depths
        layer1 = relu(bn(atrous_conv2d(inputs, depth1, [1,1], rate, name=stage+'_a_identity'), is_training))
        layer2 = relu(bn(atrous_conv2d(layer1, depth2, [3,3], rate, name=stage+'_b_identity'), is_training))
        layer3 = bn(atrous_conv2d(layer2, depth3, [1,1], rate, name=stage+'_c_identity'), is_training)
        layer4 = relu(tf.add(layer3, inputs))
        return layer4
    
    
    
    def atrous_conv_block(self, inputs, depths, stage, rate, is_training, s=2):
        depth1, depth2, depth3 = depths
        layer1 = relu(bn(atrous_conv2d(inputs, depth1, [1,1], rate, name=stage+'_a_conv'), is_training))
        layer2 = relu(bn(atrous_conv2d(layer1, depth2, [3,3], rate, name=stage+'_b_conv'), is_training))
        layer3 = bn(atrous_conv2d(layer2, depth3, [1,1], rate, name=stage+'_c_conv'), is_training)
        shortcut = bn(conv2d(inputs, depth3, [1,1], name=stage+'_shortcut', strides=[1, s, s, 1], padding='VALID'), is_training)
        layer4 = relu(tf.add(layer3, shortcut))
        return layer4

In [3]:
if __name__ == '__main__':
    
    model = DeepLab_v3()
    
    model.build_model()
    print('\nStart training after 5sec....\n')
    time.sleep(5)
    
    model.train_model()
    print('\nFinish training')



x : Tensor("ResNet50/pool1:0", shape=(?, 48, 48, 64), dtype=float32)
x1 : Tensor("ResNet50/Relu_9:0", shape=(?, 48, 48, 256), dtype=float32)
x2 : Tensor("ResNet50/Relu_18:0", shape=(?, 24, 24, 512), dtype=float32)
x3 : Tensor("ResNet50/Relu_36:0", shape=(?, 24, 24, 1024), dtype=float32)
x4 : Tensor("ResNet50/Relu_45:0", shape=(?, 24, 24, 2048), dtype=float32)
feature_map : Tensor("ResNet50/ASPP/ResizeBilinear:0", shape=(?, 24, 24, 256), dtype=float32)
rate1 : Tensor("ResNet50/ASPP/rate1/BiasAdd:0", shape=(?, 24, 24, 256), dtype=float32)
rate6 : Tensor("ResNet50/ASPP/rate6/BiasAdd:0", shape=(?, 24, 24, 256), dtype=float32)
rate12 : Tensor("ResNet50/ASPP/rate12/BiasAdd:0", shape=(?, 24, 24, 256), dtype=float32)
rate18 : Tensor("ResNet50/ASPP/rate18/BiasAdd:0", shape=(?, 24, 24, 256), dtype=float32)
concated : Tensor("ResNet50/ASPP/concat:0", shape=(?, 24, 24, 1280), dtype=float32)
net : Tensor("ResNet50/ASPP/net/BiasAdd:0", shape=(?, 24, 24, 256), dtype=float32)
logits : Tensor("ResNet

inputs_path_len : 300
labels_path_len : 300
inputs_path_len : 237
labels_path_len : 237


KeyboardInterrupt: 