In [None]:
'''
프롬프트 환경에서 설치
conda create -n iai python=3.6
conda activate iai
conda install -c conda-forge tensorflow
conda install -c conda-forge opencv
conda install -c conda-forge matplotlib
conda install -c conda-forge tqdm
conda install -c contango tflearn
conda install jupyter notebook

conda activate iai
conda install jupyter notebook
'''

In [1]:
import cv2                 # working with, mainly resizing, images
import numpy as np         # dealing with arrays
import os                  # dealing with directories
from random import shuffle # mixing up or currently ordered data that might lead our network astray in training.
from tqdm import tqdm      # a nice pretty percentage bar for tasks. Thanks to viewer Daniel BA1/4hler for this suggestion

# 폴더 경로 설정
TRAIN_DIR = 'C:/Users/UN/dog_cat_img/dogs-vs-cats-redux-kernels-edition/train/train'
TEST_DIR = 'C:/Users/UN/dog_cat_img/dogs-vs-cats-redux-kernels-edition/test/test'

# Convert Image Size
IMG_SIZE = 50

# Learning Rate
LR = 1e-3

MODEL_NAME = 'dogsvscats-{}-{}.model'.format(LR, '2conv-basic') 

In [7]:
# image 이름으로 라벨링
def label_img(img):
    word_label = img.split('.')[-3] # 파일명에서 dog, cat 추출
    # conversion to one-hot array [cat,dog]
    #                            [much cat, no dog]
    # cat은 [1,0]으로, dog는 [0,1]로
    if word_label == 'cat': return [1,0]
    elif word_label == 'dog': return [0,1]

In [8]:
#train data set 생성 함수
def create_train_data():
    training_data = []
    for img in tqdm(os.listdir(TRAIN_DIR)):
        #image 이름으로 라벨링
        label = label_img(img)
        #image 경로 생성
        path = os.path.join(TRAIN_DIR,img)
        #image 읽기
        img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
        #image 사이즈 변환
        img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
        training_data.append([np.array(img),np.array(label)])
    shuffle(trainig_data)
    #jupyter notebook 경로에 저장
    np.save('train_data.npy', training_data)
    return training_data

In [9]:
def process_test_data():
    testing_data = []
    for img in tqdm(os.listdir(TEST_DIR)):
        path = os.path.join(TEST_DIR,img)
        img_num = img.split('.')[0]
        img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
        testing_data.append([np.array(img), img_num])
    shuffle(testing_data)
    np.save('test_data.npy', testing_data)
    return testing_data

In [10]:
#data set 생성
train_data = create_train_data()
test_data = process_test_data()

100%|██████████████████████████████████████████████████████████████████████████| 25000/25000 [00:17<00:00, 1408.06it/s]
100%|██████████████████████████████████████████████████████████████████████████| 12500/12500 [00:08<00:00, 1394.14it/s]


In [12]:
#학습을 위한 모델 생성 라이브러리 import
import tflearn
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression

W1017 11:06:43.610268 11188 deprecation_wrapper.py:119] From C:\Users\UN\Anaconda3\lib\site-packages\tflearn\helpers\summarizer.py:9: The name tf.summary.merge is deprecated. Please use tf.compat.v1.summary.merge instead.

W1017 11:06:43.611235 11188 deprecation_wrapper.py:119] From C:\Users\UN\Anaconda3\lib\site-packages\tflearn\helpers\trainer.py:25: The name tf.summary.FileWriter is deprecated. Please use tf.compat.v1.summary.FileWriter instead.

W1017 11:06:43.616221 11188 deprecation_wrapper.py:119] From C:\Users\UN\Anaconda3\lib\site-packages\tflearn\collections.py:13: The name tf.GraphKeys is deprecated. Please use tf.compat.v1.GraphKeys instead.

W1017 11:06:43.619735 11188 deprecation_wrapper.py:119] From C:\Users\UN\Anaconda3\lib\site-packages\tflearn\config.py:123: The name tf.get_collection is deprecated. Please use tf.compat.v1.get_collection instead.

W1017 11:06:43.624700 11188 deprecation_wrapper.py:119] From C:\Users\UN\Anaconda3\lib\site-packages\tflearn\config.py:129

In [None]:
#단순 cnn 모델 구성
convnet = input_data(shape=[None, IMG_SIZE, IMG_SIZE, 1], name='input')

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = fully_connected(convnet, 1024, activation='relu')
convnet = dropout(convnet, 0.8)

convnet = fully_connected(convnet, 2, activation='softmax')
convnet = regression(convnet, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')

model = tflearn.DNN(convnet, tensorboard_dir='log')

train = train_data[:-500]
test = train_data[-500:]

X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
Y = [i[1] for i in train]

test_x = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
test_y = [i[1] for i in test]

In [None]:
#model 학습
model.fit({'input': X}, {'targets': Y}, n_epoch=3, validation_set=({'input': test_x}, {'targets': test_y}), 
    snapshot_step=500, show_metric=True, run_id=MODEL_NAME)

In [13]:
#심층 cnn 모델 형성
convnet = input_data(shape=[None, IMG_SIZE, IMG_SIZE, 1], name='input')

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 128, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = fully_connected(convnet, 1024, activation='relu')
convnet = dropout(convnet, 0.8)

convnet = fully_connected(convnet, 2, activation='softmax')
convnet = regression(convnet, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')

model = tflearn.DNN(convnet, tensorboard_dir='log')


train = train_data[:-500]
test = train_data[-500:]

X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
Y = [i[1] for i in train]

test_x = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
test_y = [i[1] for i in test]

W1017 11:06:48.842268 11188 deprecation_wrapper.py:119] From C:\Users\UN\Anaconda3\lib\site-packages\tflearn\layers\core.py:81: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W1017 11:06:48.845246 11188 deprecation.py:506] From C:\Users\UN\Anaconda3\lib\site-packages\tflearn\initializations.py:119: calling UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
W1017 11:06:48.845246 11188 deprecation.py:323] From C:\Users\UN\Anaconda3\lib\site-packages\tensorflow\python\util\deprecation.py:507: UniformUnitScaling.__init__ (from tensorflow.python.ops.init_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.initializers.variance_scaling instead with distribution=uniform to get equivalent behavior.
W1017 11:06:48.859199

In [14]:
#model 학습
model.fit({'input': X}, {'targets': Y}, n_epoch=3, validation_set=({'input': test_x}, {'targets': test_y}), 
    snapshot_step=500, show_metric=True, run_id=MODEL_NAME)

Training Step: 1148  | total loss: 0.43131 | time: 27.972s
| Adam | epoch: 003 | loss: 0.43131 - acc: 0.7992 -- iter: 24448/24500
Training Step: 1149  | total loss: 0.43468 | time: 29.040s
| Adam | epoch: 003 | loss: 0.43468 - acc: 0.8037 | val_loss: 0.40297 - val_acc: 0.8140 -- iter: 24500/24500
--


In [15]:
#지정한 모델명으로 모델 저장
#model.save(MODEL_NAME)

In [16]:
import tensorflow as tf
tf.reset_default_graph()

In [17]:
#plot 생성 라이브러리
import matplotlib.pyplot as plt

#test_data = process_test_data()
#test_data = np.load('test_data.npy')

fig=plt.figure()

for num,data in enumerate(test_data[:12]):
    # cat: [1,0]
    # dog: [0,1]
    
    img_num = data[1]
    img_data = data[0]
    
    y = fig.add_subplot(3,4,num+1)
    orig = img_data
    data = img_data.reshape(IMG_SIZE,IMG_SIZE,1)
    model_out = model.predict([data])[0]
    
    if np.argmax(model_out) == 1: str_label='Dog'
    else: str_label='Cat'
        
    y.imshow(orig,cmap='gray')
    plt.title(str_label)
    y.axes.get_xaxis().set_visible(False)
    y.axes.get_yaxis().set_visible(False)
plt.show()

<Figure size 640x480 with 12 Axes>

In [19]:
#예측값 csv 저장
with open('submission_file.csv','w') as f:
    f.write('id,label\n')
            
with open('submission_file.csv','a') as f:
    for data in tqdm(test_data):
        img_num = data[1]
        img_data = data[0]
        orig = img_data
        data = img_data.reshape(IMG_SIZE,IMG_SIZE,1)
        model_out = model.predict([data])[0]
        f.write('{},{}\n'.format(img_num,model_out[1]))

100%|███████████████████████████████████████████████████████████████████████████| 12500/12500 [00:18<00:00, 693.56it/s]


In [15]:
#log파일을 이용한 tensorboard, anaconda prompt에 입력
#tensorboard --logdir="C:/Users/UN/log"