In [28]:
import matplotlib.pyplot as plt
from tqdm import tqdm
import numpy as np
import pandas as pd
import os
import cv2
os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [29]:
# Check GPU
import tensorflow as tf
physical_devices = tf.config.experimental.list_physical_devices('GPU')
print(physical_devices)
assert len(physical_devices) > 0, "Not enough GPU hardware devices available"
tf.config.experimental.set_memory_growth(physical_devices[0], True)

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [30]:
# import package
import tensorflow.keras as keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import (Input, Dense, Dropout, Activation,
                                     Flatten, BatchNormalization, Conv2D,
                                     MaxPooling2D)
from tensorflow.keras.layers import GlobalAveragePooling2D

In [31]:
train_list = pd.read_csv('train.txt', sep=" ", header=None)
train_list.columns = ["img_path", "label"]

valid_list = pd.read_csv('valid.txt', sep=" ", header=None)
valid_list.columns = ["img_path", "label"]

In [32]:
train_list['label'] = train_list['label'].apply(str)
train_list

#print(train_list)
#print(test_list)

Unnamed: 0,img_path,label
0,jpg/image_03860.jpg,16
1,jpg/image_06092.jpg,13
2,jpg/image_02400.jpg,42
3,jpg/image_02852.jpg,55
4,jpg/image_07710.jpg,96
...,...,...
1015,jpg/image_02944.jpg,59
1016,jpg/image_07434.jpg,93
1017,jpg/image_02684.jpg,57
1018,jpg/image_01639.jpg,81


In [33]:
valid_list['label'] = valid_list['label'].apply(str)
valid_list

Unnamed: 0,img_path,label
0,jpg/image_04467.jpg,89
1,jpg/image_07129.jpg,44
2,jpg/image_05166.jpg,4
3,jpg/image_07002.jpg,34
4,jpg/image_02007.jpg,79
...,...,...
1015,jpg/image_08182.jpg,61
1016,jpg/image_07029.jpg,38
1017,jpg/image_05956.jpg,67
1018,jpg/image_06051.jpg,13


In [34]:
#準備模型
def call_list_fun(models, model_name):
    model_dir = './Model/{}-logs'.format(model_name)
    if not os.path.exists(model_dir):
        os.makedirs(model_dir)

    logfiles = model_dir + '/{}-{}'.format('basic_model',
                                           models.__class__.__name__)
    model_cbk = keras.callbacks.TensorBoard(log_dir=logfiles,
                                            histogram_freq=1)

    modelfiles = model_dir + '/{}-best-model.h5'.format('basic_model')
    model_mckp = keras.callbacks.ModelCheckpoint(modelfiles,
                                                 monitor='val_accuracy',
                                                 save_best_only=True)

    earlystop = keras.callbacks.EarlyStopping(monitor='val_loss',
                                              patience=5,
                                              verbose=2)
    return [model_cbk, model_mckp, earlystop]

## 讀入 Pre-trained Model

In [35]:
from tensorflow.keras.applications.resnet50  import (ResNet50 , preprocess_input)
#from tensorflow.keras.applications.resnet50 import ResNet50

In [39]:
img_size = 224

pre_model = ResNet50(weights='imagenet', include_top=False, input_shape=(img_size, img_size, 3))
x = GlobalAveragePooling2D()(pre_model.output)
x = Dropout(0.25)(x)
outputs = Dense(102, activation='softmax')(x)
model = Model(inputs=pre_model.inputs, outputs=outputs)

# 決定 Pre-Train 參數量

In [40]:
pre_model.trainable = True

# trainable_layer = 5
# for layer in pre_model.layers[:-trainable_layer]:
#     layer.trainable = False

# for layer in model.layers:
#     print(layer, layer.trainable)

# model.summary()

In [41]:
callbacks_list_ResNet50 = call_list_fun(model, 'model_ResNet50')

In [42]:
learning_rate = 1e-4
optimizer = keras.optimizers.Adam(lr=learning_rate)

model.compile(loss='categorical_crossentropy',
              optimizer=optimizer,
              metrics=['accuracy'])

# Data Augmentation

In [43]:
batch_size = 64
num_steps = len(train_list) // batch_size + 1
num_epochs = 10
img_size = 224

In [44]:
train_datagen = ImageDataGenerator(
                                   width_shift_range=0.1,
                                   height_shift_range=0.1,
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                   preprocessing_function=preprocess_input
                                  )
valid_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

In [45]:
img_shape = (img_size, img_size)

train_generator = train_datagen.flow_from_dataframe(
                                                    dataframe=train_list,
                                                    x_col="img_path",
                                                    y_col="label",
                                                    target_size=img_shape,
                                                    batch_size=batch_size,
                                                    class_mode='categorical')

#看目前train_generator label的狀況
label = (train_generator.class_indices)
print(label)

valid_generator = valid_datagen.flow_from_dataframe(
                                                    dataframe=valid_list,
                                                    x_col="img_path",
                                                    y_col="label",
                                                    target_size=img_shape,
                                                    batch_size=batch_size,
                                                    class_mode='categorical',
                                                    shuffle=False)

# 用在不知道 label 的資料讀取上
# test_generator = test_datagen.flow_from_dataframe(
#                                                     dataframe=test_list,
#                                                     directory=data_path,
#                                                     x_col="img_path",
#                                                     target_size=img_shape,
#                                                     batch_size=batch_size,
#                                                     class_mode=None,
#                                                     shuffle=False)

Found 1020 validated image filenames belonging to 102 classes.
{'0': 0, '1': 1, '10': 2, '100': 3, '101': 4, '11': 5, '12': 6, '13': 7, '14': 8, '15': 9, '16': 10, '17': 11, '18': 12, '19': 13, '2': 14, '20': 15, '21': 16, '22': 17, '23': 18, '24': 19, '25': 20, '26': 21, '27': 22, '28': 23, '29': 24, '3': 25, '30': 26, '31': 27, '32': 28, '33': 29, '34': 30, '35': 31, '36': 32, '37': 33, '38': 34, '39': 35, '4': 36, '40': 37, '41': 38, '42': 39, '43': 40, '44': 41, '45': 42, '46': 43, '47': 44, '48': 45, '49': 46, '5': 47, '50': 48, '51': 49, '52': 50, '53': 51, '54': 52, '55': 53, '56': 54, '57': 55, '58': 56, '59': 57, '6': 58, '60': 59, '61': 60, '62': 61, '63': 62, '64': 63, '65': 64, '66': 65, '67': 66, '68': 67, '69': 68, '7': 69, '70': 70, '71': 71, '72': 72, '73': 73, '74': 74, '75': 75, '76': 76, '77': 77, '78': 78, '79': 79, '8': 80, '80': 81, '81': 82, '82': 83, '83': 84, '84': 85, '85': 86, '86': 87, '87': 88, '88': 89, '89': 90, '9': 91, '90': 92, '91': 93, '92': 94, '93'

In [None]:
history = model.fit_generator(train_generator,
                              steps_per_epoch=num_steps,
                              epochs=num_epochs,
                              validation_data=valid_generator,
                              callbacks=callbacks_list_ResNet50)

In [None]:
train_history = ['loss', 'val_loss', 'accuracy', 'val_accuracy']
name_history = ['training_loss', 'val_loss', 'training_acc', 'val_acc']

plt.figure(figsize=(12, 5))
for eachx, eachy, i in zip(train_history, name_history, range(4)):
    if i % 2 == 0:
        plt.subplot(1, 2, i//2+1)
    l_x = len(history.history[eachx])
    plt.plot(np.arange(l_x), history.history[eachx], label=eachy)
    plt.legend(loc='best')
    plt.title('model'+eachy)
plt.show()

## 最後測試

In [None]:
test_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)

test_list = pd.read_csv('test.txt', sep=" ", header=None)
test_list.columns = ["img_path"]

test_generator = test_datagen.flow_from_dataframe(
                                                     dataframe=test_list,
                                                     x_col="img_path",
                                                     y_col=None,
                                                     target_size=img_shape,
                                                     batch_size=batch_size,
                                                     class_mode=None,
                                                     shuffle=False)

In [None]:
# load best model 

#model = load_model('Model/model_VGG-logs/basic_model-best-model.h5')
y_pred_test = model.predict(test_generator)
y_pred_test = y_pred_test.argmax(-1)
y_pred_test

In [None]:
test_list['img_path']

In [None]:
#組成DataFrame

final_data = {"id": test_list['img_path'],
        "class": y_pred_test}

my_data = pd.DataFrame(final_data)
my_data

In [None]:
#class轉換格式
ans_map = inv_map = {v: k for k, v in label.items()}
my_data['class'] = my_data['class'].apply(lambda x: ans_map[x])
my_data


# y_pred_test = pd.DataFrame(y_pred_test, columns=['class'])


# ans_map = {v: k for k, v in label.items()}
# my_data['class'] = my_data['class'].apply(lambda x: ans_map[x])

# my_data
#my_data.head(3)

In [None]:
my_data.to_csv('answer.csv',index=False) # saving to a csv file for predictions on kaggle
#index=False,輸出時index不輸出
my_data