In [None]:
from google.colab import drive
# 將自己的雲端硬碟掛載上去
drive.mount('/content/gdrive')


Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
cd '/content/gdrive/My Drive/2020-ml100marathon-final-exam/image_data/'

/content/gdrive/My Drive/2020-ml100marathon-final-exam/image_data


In [None]:
import os
import glob
import shutil
# import matplotlib.pyplot as plt
# import tensorflow as tf
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Dense, Conv2D, Flatten, Dropout, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator

import numpy as np
# from tensorflow.keras.optimizers import Adam
# from tensorflow.keras.layers import Activation,BatchNormalization

# from tensorflow.keras.regularizers import l2
# from tensorflow.keras.callbacks import LearningRateScheduler
# from tensorflow.keras.callbacks import ReduceLROnPlateau

base_dir =  "./train"
num_classes = 5
classes = ['daisy', 'dandelion', 'rose', 'sunflower', 'tulip']



In [None]:
for cl in classes:
  img_path = os.path.join(base_dir, cl)                          # 取得單一類別資料夾路徑
  images = glob.glob(img_path + '/*.jpg')                        # 載入所有 jpg 檔成為一個 list
  print("{}: {} Images".format(cl, len(images)))                 # 印出單一類別有幾張圖片
  num_train = int(round(len(images)*0.8))                        # 切割 80% 資料作為訓練集
  train, val = images[:num_train], images[num_train:]            # 訓練 > 0~80%，驗證 > 80%~100%

  for t in train:
    if not os.path.exists(os.path.join(base_dir, 'train', cl)):  # 如果資料夾不存在
      os.makedirs(os.path.join(base_dir, 'train', cl))           # 建立新資料夾
    shutil.move(t, os.path.join(base_dir, 'train', cl))          # 搬運圖片資料到新的資料夾

  for v in val:
    if not os.path.exists(os.path.join(base_dir, 'val', cl)):    # 如果資料夾不存在
      os.makedirs(os.path.join(base_dir, 'val', cl))             # 建立新資料夾
    shutil.move(v, os.path.join(base_dir, 'val', cl))            # 搬運圖片資料到新的資料夾
    
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')

daisy: 0 Images
dandelion: 0 Images
rose: 0 Images
sunflower: 0 Images
tulip: 0 Images


In [None]:
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')

IMG_SHAPE=150
batch_size=256

image_gen_train = ImageDataGenerator(
    rescale=1./255,               # 從0~255整數，壓縮為0~1浮點數
    rotation_range=45,            # 隨機旋轉 ±45°
    width_shift_range=.15,        # 隨機水平移動 ±15%
    height_shift_range=.15,       # 隨機垂直移動 ±15%
    horizontal_flip=True,         # 隨機水平翻轉
    zoom_range=0.5                # 隨機縮放 50%
)


train_data_gen = image_gen_train.flow_from_directory(
    batch_size=batch_size,
    directory=train_dir,
    shuffle=True,
    target_size=(IMG_SHAPE,IMG_SHAPE),
    class_mode='categorical'  
    )

image_gen_val = ImageDataGenerator(
    rescale=1./255,               # 從0~255整數，壓縮為0~1浮點數
    rotation_range=45,            # 隨機旋轉 ±45°
    width_shift_range=.15,        # 隨機水平移動 ±15%
    height_shift_range=.15,       # 隨機垂直移動 ±15%
    horizontal_flip=True,         # 隨機水平翻轉
    zoom_range=0.5                # 隨機縮放 50%
    )

val_data_gen = image_gen_val.flow_from_directory(
    batch_size=batch_size,
    directory=val_dir,
    target_size=(IMG_SHAPE, IMG_SHAPE),
    class_mode='categorical'
)

Found 2258 images belonging to 5 classes.
Found 565 images belonging to 5 classes.


In [None]:
from keras.models import load_model
from sklearn.datasets import load_files   
from keras.utils import np_utils
from glob import glob
from keras import applications
from keras.preprocessing.image import ImageDataGenerator 
from keras import optimizers
from keras.models import Sequential, Model, load_model
from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.layers import BatchNormalization, Concatenate, GlobalMaxPooling2D
from keras.callbacks import ReduceLROnPlateau, LambdaCallback, ModelCheckpoint, LearningRateScheduler, EarlyStopping

# 訓練用的超參數

input_shape=(IMG_SHAPE, IMG_SHAPE, 3)

# 架構主要 model

main_model = Sequential()

resnet_model = applications.resnet_v2.ResNet50V2(include_top=False, pooling='avg', weights='imagenet', input_shape=input_shape)
main_model.add(resnet_model)

# 想辦法讓訓練不要這麼飄
main_model.add(BatchNormalization())
main_model.add(Dense(2048, activation='relu'))
main_model.add(BatchNormalization())
main_model.add(Dense(1024, activation='relu'))
main_model.add(Dropout(0.5))
main_model.add(Flatten())
main_model.add(Dense(num_classes, activation='softmax'))
main_model.summary()

# 因為是遷移學習，本來就有訓練了，縮小 learning rate，才不會讓訓練好的成果跑掉
opt = optimizers.Adam(lr=1e-4)
main_model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

# 檢查是否凍結 ALL
print("--檢查 ALL 是否凍結-----------------")
for layer in main_model.layers:
    print(layer.name, ' is trainable? ', layer.trainable)

main_model.summary()


# 訓練模型囉！

# checkpoint 儲存最好的一個

# shutil.rmtree('best_loss', ignore_errors=True)
if not os.path.exists('best_loss'):
  os.makedirs('best_loss')

# tensorflow v2 val_acc -> val_accuracy
weight_path="./best_loss/epoch_{epoch:02d}_val_acc_{val_accuracy:.4f}_val_loss_{val_loss:.4f}.h5"
best_weight_path = "./best_weight.h5"

ck_epoch_weight = ModelCheckpoint(weight_path, monitor='val_loss', verbose=1,
                                          save_weights_only=True,
                                          save_best_only=True,
                                          mode='min')

ck_best_weight = ModelCheckpoint(best_weight_path, monitor='val_loss', verbose=1,
                                        save_weights_only=True,
                                        save_best_only=True,
                                        mode='min')

# 使用自動降低學習率 (當 validation loss 連續 5 次沒有下降時，自動降低學習率)
lr_reducer = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.8,
    patience=5,
    verbose=1,
    mode='auto',
    min_delta=0.0001,
    cooldown=5,
    min_lr=1e-14)

#early = EarlyStopping(monitor="val_loss", mode="min", patience=10) # probably needs to be more patient, but kaggle time is limited

# 學習率動態調整。當跑到第幾個 epcoh 時，根據設定修改學習率。這邊的數值都是參考原 paper
def lr_schedule(epoch):
    lr = 1e-4
    if epoch > 500:
        lr = 1e-7
    elif epoch > 200:
        lr = 1e-6
    elif epoch > 100:
        lr = 1e-5
    print('Learning rate: ', lr)
    return lr

lr_scheduler = LearningRateScheduler(lr_schedule)

# 設定 callbacks
callbacks = [ck_epoch_weight, ck_best_weight, lr_reducer]#,early]

epochs = 500

#history = main_model.fit(train_data_gen,epochs=epochs,validation_data=val_data_gen,callbacks=callbacks,shuffle = True,workers=2)

main_model.save('my_model')



Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50v2 (Functional)      (None, 2048)              23564800  
_________________________________________________________________
batch_normalization_2 (Batch (None, 2048)              8192      
_________________________________________________________________
dense_3 (Dense)              (None, 2048)              4196352   
_________________________________________________________________
batch_normalization_3 (Batch (None, 2048)              8192      
_________________________________________________________________
dense_4 (Dense)              (None, 1024)              2098176   
_________________________________________________________________
dropout_1 (Dropout)          (None, 1024)              0         
_________________________________________________________________
flatten_1 (Flatten)          (None, 1024)             

In [None]:
from tensorflow import keras
main_model = keras.models.load_model("./my_model")


In [None]:
main_model.load_weights("best_weight.h5")

In [None]:
import os

folderPath = "./test"
testFolder = os.listdir(folderPath)
fileName = []
prediction_class = []

import tensorflow as tf

for file in testFolder:
  filePath = os.path.join(folderPath,file)
  #filePate = "./test/240f4db86a37d8b932b7496cc71d7699.jpg"
  image = tf.keras.preprocessing.image.load_img(filePath,target_size=(IMG_SHAPE,IMG_SHAPE))
  input_arr = tf.keras.preprocessing.image.img_to_array(image)
  input_arr = input_arr/255.0
  #input_arr = np.array([input_arr])  # Convert single image to a batch.
  #print(input_arr)
  #predictions = main_model.predict(input_arr)

  #print(predictions)

  print(main_model.predict(np.expand_dims(input_arr, axis=0)))

  predicted = np.argmax(main_model.predict(np.expand_dims(input_arr, axis=0)))


  print(predicted)

  prediction_class.append(predicted)
  fileName.append(file.replace(".jpg", ""))

  print("image: %s, class: %d" %(file, predicted))




[1;30;43m串流輸出內容已截斷至最後 5000 行。[0m
image: a66f895d03ea74bcd39fd52d4b8191e1.jpg, class: 3
[[1.5604923e-05 4.9876587e-07 9.9990976e-01 2.8033423e-06 7.1329931e-05]]
2
image: 903019a731861d257be5715d71cb82b7.jpg, class: 2
[[6.1315134e-08 2.0380124e-07 7.7725147e-08 2.7881295e-08 9.9999964e-01]]
4
image: b99c94c37b58488d432a3e65a1f3e7ac.jpg, class: 4
[[6.1484985e-07 6.3706091e-07 1.2469877e-07 9.3875569e-06 9.9998927e-01]]
4
image: bd4179ec98e998c58c09343b60e760d0.jpg, class: 4
[[6.8204463e-05 1.0124162e-05 2.6182723e-04 8.8584507e-03 9.9080139e-01]]
4
image: 9d52faec9df60a3004ece8cb26b0587d.jpg, class: 4
[[1.5524682e-06 9.9999261e-01 2.1885764e-07 1.1616692e-09 5.5442742e-06]]
1
image: c2469cffd4349e72a22f54a045f6fcbc.jpg, class: 1
[[4.6192057e-04 7.8962580e-04 1.4198388e-01 1.1219928e-06 8.5676336e-01]]
4
image: a723c21c0afffb1139a5ca59371ccd23.jpg, class: 4
[[2.7126253e-08 1.0000000e+00 3.5922157e-10 4.5449693e-11 5.9379002e-10]]
1
image: a34b0ca82333ec5afcdc74be7ddf26b6.jpg, class: 1
[

In [None]:
import pandas  as pd


sub = pd.DataFrame({'id': fileName, 'flower_class': prediction_class})

sub.to_csv('final_exam_20201203.csv', index=False)