In [1]:
from PIL import Image
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense

# <font color = 'RED'> 製作MODEL/SUBMIT資料夾 </font>

In [2]:
! mkdir model submit

子目錄或檔案 model 已經存在。
處理時發生錯誤: model。
子目錄或檔案 submit 已經存在。
處理時發生錯誤: submit。


In [21]:
from os import listdir
from os.path import isfile, isdir, join

def get_imgs_data(mypath, return_file_names=False):
    files = listdir(mypath) # 取得所有檔案與子目錄名稱
    imgs_arr = []           # 儲存影像路徑
    file_names = []         # 儲存影樣檔名 
    for f in files:
        if (not f[0]=='.') and (f[-4:]=='.png'):
            img = Image.open(mypath+f)
            # 統一調整影像SIZE
            # Image.ANTIALIAS 平滑滤波。对所有可以影响输出像素的输入像素进行高质量的重采样滤波，以计算输出像素值。
            img = img.resize((224, 224), Image.ANTIALIAS)
            imgs_arr.append(np.array(img))
            file_names.append(f[:-4])
            
    odarray = np.array(imgs_arr)
    print(odarray)
    # np.clip : np.clip(x,min,max)  if x>max x=max ; if x<min x=min
    op_array =  np.clip(np.array(imgs_arr) / 127.5-1, -1, 1) # normalize
    if return_file_names:
        return op_array , file_names
    return op_array 

X_test, ids_test = get_imgs_data("./testing_set/testing_set/", return_file_names=True); print(X_test.shape)
ids_test[:5]

[[[[ 76  44  23]
   [ 76  44  23]
   [ 76  44  23]
   ...
   [ 45  17   6]
   [ 46  18   7]
   [ 47  19   8]]

  [[ 75  43  22]
   [ 75  43  22]
   [ 75  43  22]
   ...
   [ 44  16   5]
   [ 45  17   6]
   [ 46  18   7]]

  [[ 74  42  21]
   [ 74  42  21]
   [ 74  42  21]
   ...
   [ 44  16   5]
   [ 44  16   5]
   [ 45  17   6]]

  ...

  [[ 38  53  98]
   [ 35  49  94]
   [ 35  49  94]
   ...
   [ 49  71 101]
   [ 46  68 101]
   [ 45  68 102]]

  [[ 36  59 100]
   [ 37  54  97]
   [ 45  57 103]
   ...
   [ 54  73 107]
   [ 55  75 109]
   [ 53  73 106]]

  [[ 25  50  89]
   [ 28  46  89]
   [ 37  47  93]
   ...
   [ 59  78 111]
   [ 57  75 108]
   [ 53  72 105]]]


 [[[113 125 137]
   [115 127 139]
   [115 127 139]
   ...
   [ 83 112  74]
   [124 154 122]
   [121 149 120]]

  [[113 125 137]
   [115 127 139]
   [115 127 139]
   ...
   [161 186 151]
   [139 168 135]
   [ 61  90  56]]

  [[113 125 137]
   [115 127 139]
   [115 127 139]
   ...
   [178 202 173]
   [113 143 105]
   [ 58  88

['003CE5D6A5', '005053493B', '01CBBE0E4C', '02C5C50BFB', '03D9D55867']

In [8]:
classmap = pd.read_csv('classmap.csv', index_col=0, header=None).to_dict()[1]
classmap.keys()

dict_keys(['whippet', 'Walker_hound', 'fox_terrier', 'basset', 'beagle', 'papillon', 'Norfolk_terrier'])

In [22]:
X_train_list = [] 
y_train_list = []
for k in classmap.keys():
    c_i =  classmap[k]
    X_train_ = get_imgs_data(f"./training_set/training_set/{k}/"); print(f"class [{c_i}]: ", X_train_.shape)
    X_train_list.append(X_train_)
    y_train_list.append(np.zeros(X_train_.shape[0])+c_i)
X_train = np.concatenate(X_train_list, 0); print(f"X_train: ", X_train.shape)
y_train = np.concatenate(y_train_list, 0); print(f"y_train: ", y_train.shape)
del X_train_list; del y_train_list

[[[[ 72  81  26]
   [ 66  74  19]
   [ 60  67  13]
   ...
   [ 76 107  31]
   [ 88 113  32]
   [ 83 108  25]]

  [[ 68  77  20]
   [ 70  78  22]
   [ 66  74  18]
   ...
   [ 74 105  28]
   [ 83 110  29]
   [ 78 103  21]]

  [[ 72  82  21]
   [ 78  87  27]
   [ 71  80  22]
   ...
   [ 66  97  18]
   [ 73 100  22]
   [ 70  97  19]]

  ...

  [[ 43  65  15]
   [ 37  59  12]
   [ 42  66  19]
   ...
   [ 54  77  10]
   [ 61  88  18]
   [ 64  90  18]]

  [[ 56  80  27]
   [ 55  78  27]
   [ 51  75  27]
   ...
   [ 53  75   9]
   [ 80 106  30]
   [ 54  82  11]]

  [[ 57  85  26]
   [ 58  85  28]
   [ 34  62  11]
   ...
   [ 75  96  31]
   [ 67  95  18]
   [ 54  85   8]]]


 [[[228 246 250]
   [230 248 252]
   [230 248 252]
   ...
   [254 254 255]
   [254 254 255]
   [255 255 255]]

  [[228 246 250]
   [230 248 252]
   [230 248 252]
   ...
   [255 255 255]
   [255 255 255]
   [255 255 255]]

  [[228 246 250]
   [230 248 252]
   [230 248 252]
   ...
   [255 255 255]
   [255 255 255]
   [255 255

class [2]:  (110, 224, 224, 3)
[[[[ 39  58  61]
   [ 33  53  56]
   [ 31  50  54]
   ...
   [ 17  68  88]
   [ 16  65  91]
   [ 28  77 104]]

  [[ 61  75  75]
   [ 55  70  70]
   [ 61  75  75]
   ...
   [ 42  92 109]
   [ 31  83 104]
   [ 24  76  99]]

  [[ 47  58  52]
   [ 45  56  51]
   [ 50  61  56]
   ...
   [ 31  75  88]
   [ 34  86  99]
   [ 37  94 108]]

  ...

  [[ 89 168  85]
   [ 68 172 108]
   [ 65 170 154]
   ...
   [121 186 106]
   [119 185 105]
   [118 185 105]]

  [[ 90 170  77]
   [ 76 175  91]
   [ 76 169 121]
   ...
   [119 185 104]
   [119 186 106]
   [117 184 104]]

  [[ 91 169  75]
   [ 82 176  77]
   [ 86 167  94]
   ...
   [117 182 102]
   [116 183 103]
   [115 182 102]]]


 [[[ 81  52  54]
   [ 70  47  47]
   [ 92  79  76]
   ...
   [112 106  95]
   [107  85  78]
   [ 98  72  65]]

  [[ 73  44  45]
   [ 80  56  56]
   [ 94  78  76]
   ...
   [107 100  90]
   [ 69  66  52]
   [ 74  74  57]]

  [[ 86  55  58]
   [ 91  64  65]
   [ 90  71  69]
   ...
   [ 77  70  6

[[[[ 93 114  58]
   [ 59  76  32]
   [ 77  88  57]
   ...
   [132 122  47]
   [143 130  63]
   [144 130  65]]

  [[ 95 112  60]
   [ 76  90  47]
   [ 74  83  48]
   ...
   [148 137  62]
   [139 126  57]
   [143 129  62]]

  [[ 96 104  60]
   [ 92  98  55]
   [ 98 103  62]
   ...
   [150 139  64]
   [141 129  57]
   [153 140  69]]

  ...

  [[ 48  64  46]
   [ 22  35  20]
   [ 32  39  27]
   ...
   [ 70  81  27]
   [ 85  96  39]
   [100 111  60]]

  [[ 61  78  59]
   [ 28  41  25]
   [ 34  41  28]
   ...
   [ 82  93  42]
   [104 114  62]
   [ 81  90  44]]

  [[ 64  80  61]
   [ 38  52  34]
   [ 32  39  26]
   ...
   [ 97 107  59]
   [111 121  74]
   [ 74  84  41]]]


 [[[160 160 162]
   [163 163 165]
   [163 163 165]
   ...
   [186 185 191]
   [183 183 193]
   [176 176 188]]

  [[156 156 158]
   [158 158 160]
   [165 165 167]
   ...
   [184 183 189]
   [181 181 191]
   [174 174 186]]

  [[159 159 161]
   [161 161 163]
   [162 162 164]
   ...
   [180 179 186]
   [178 177 187]
   [169 169

In [23]:
y_train_categ = to_categorical(y_train); print(f"y_train_categ: ", y_train_categ.shape)

y_train_categ:  (868, 7)


# <font color ='GREEN'>Data Augmentation</font>

In [24]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
datagen = ImageDataGenerator(
    featurewise_center=True,
    featurewise_std_normalization=True,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True)

datagen.fit(X_train)

# <font color='BLUE'>Build Model</font>

In [10]:
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Conv2D, BatchNormalization, MaxPool2D, GlobalAveragePooling2D
from tensorflow.keras.optimizers import RMSprop, Adam, SGD
from tensorflow.keras import backend as K
from tensorflow.keras.applications import VGG16, MobileNet, InceptionV3
from tensorflow.keras.layers import Input

In [11]:
# this could also be the output a different Keras model or layer
input_tensor = Input(shape=(224, 224, 3))  # this assumes K.image_data_format() == 'channels_last'
base_model = InceptionV3(input_tensor=input_tensor, weights='imagenet', include_top=False)


W0929 06:19:57.013173  5888 deprecation.py:506] From D:\Anaconda3\envs\PYCHARM\lib\site-packages\tensorflow\python\ops\init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


In [None]:
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(1024, activation='relu')(x)
predictions = Dense(7, activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

In [None]:
opt = RMSprop(lr=3e-4)
model.compile(optimizer=opt,
              loss='categorical_crossentropy',
              metrics=['accuracy']
             )

In [None]:
epochs = 5
model.fit_generator(datagen.flow(X_train, y_train_categ, batch_size=32),
                    steps_per_epoch=len(X_train) / 32, 
                    epochs=epochs)

In [None]:
# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 249 layers and unfreeze the rest:
for layer in model.layers[:249]:
    layer.trainable = False
for layer in model.layers[249:]:
    layer.trainable = True

In [None]:
# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), 
              loss='categorical_crossentropy',
              metrics=['accuracy']
             )

In [None]:
# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
epochs = 10
his=model.fit_generator(datagen.flow(X_train, y_train_categ, batch_size=32),
                    steps_per_epoch=len(X_train) / 32, 
                    epochs=epochs)

In [None]:
model_name = "CNN_InceptionV3"
postfix = "12"

In [None]:
# save out
from datetime import datetime
y_pred_categ = model.predict(X_test)
y_pred = np.argmax(y_pred_categ, 1)
df_op = pd.DataFrame({'id':ids_test, 'class': y_pred})
t_str = datetime.strftime(datetime.now(), "%Y%m%d%H%M%S")
pth = f'submit/{model_name}_submission_{postfix}_{t_str}.csv'
df_op.to_csv(pth, index=False) # real test score: 0.93
print(f'save scv: {pth}') 
# save out
pth = f'model/{model_name}_model_{postfix}_{t_str}.h5'
model.save_weights(pth)
print(f'save model: {pth}')

# Try Resenet

In [25]:
from tensorflow.keras.applications.resnet50 import ResNet50
res_model = ResNet50(input_tensor=input_tensor, weights='imagenet', include_top=False)



In [42]:
from tensorflow.keras.layers import Flatten, Dense,Dropout

x_res = res_model.output
x_res = GlobalAveragePooling2D()(x_res)
x_res  = Flatten()(x_res)

predictions_res = Dense(7, activation='softmax')(x_res)


In [43]:
#設定凍結與要進行的網路層
res_net_Final = Model(inputs=res_model.input,outputs=predictions_res)
for layer in res_net_Final.layers[:1]:
    layer.trainable = False
for layer in res_net_Final.layers[2:]:
    layer.trainable = True

In [44]:
res_net_Final.compile(optimizer=Adam(lr=1e-5),
                     loss='categorical_crossentropy',
                     metrics=['accuracy'])
res_net_Final.summary()

Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1 (Conv2D)                  (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_conv1 (BatchNormalization)   (None, 112, 112, 64) 256         conv1[0][0]                      
____________________________________________________________________________________________

In [45]:
epochs = 10
his_res=res_net_Final.fit_generator(datagen.flow(X_train, y_train_categ, batch_size=10),
                    steps_per_epoch=len(X_train) / 10, 
                    epochs=epochs)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [46]:
model_name = "CNN_RES50"
postfix = "12"
# save out
from datetime import datetime
y_pred_categ = res_net_Final.predict(X_test)
y_pred = np.argmax(y_pred_categ, 1)
df_op = pd.DataFrame({'id':ids_test, 'class': y_pred})
t_str = datetime.strftime(datetime.now(), "%Y%m%d%H%M%S")
pth = f'submit/{model_name}_submission_{postfix}_{t_str}.csv'
df_op.to_csv(pth, index=False) # real test score: 0.93
print(f'save scv: {pth}') 
# save out
pth = f'model/{model_name}_model_{postfix}_{t_str}.h5'
res_net_Final.save_weights(pth)
print(f'save model: {pth}')

save scv: submit/CNN_RES50_submission_12_20190929072850.csv
save model: model/CNN_RES50_model_12_20190929072850.h5


# Train

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<tensorflow.python.keras.callbacks.History at 0x20b6fd82448>

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


save scv: submit/CNN_InceptionV3_submission_12_20190928154149.csv
save model: model/CNN_InceptionV3_model_12_20190928154149.h5


In [82]:
plt.plot(his.history['val_loss'])
plt.xlabel('epochs')
plt.ylabel('validation loss')
plt.show()

KeyError: 'val_loss'

In [None]:
plt.plot(his.history['acc'])
plt.xlabel('epochs')
plt.ylabel('accuracy')
plt.show()

In [None]:
plt.plot(his.history['acc'],label="Accuracy")
plt.plot(his.history['val_acc'], label="Validation accuracy")
plt.legend()
plt.xlabel('epochs')
plt.show()

In [None]:
plt.plot(his.history['loss'],label="traing loss")
plt.plot(his.history['val_loss'], label="Validation loss")
plt.legend()
plt.xlabel('epochs')
plt.show()