In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import keras
from keras import layers, activations
from os import path
from PIL import Image
import pickle
from concurrent import futures
from matplotlib import pyplot as plt

trainingDataDir = 'o:/temp/pixiv/training/'
targetSize = (224, 224)
targetShape = (224, 224, 3)
seed = 1

In [None]:
def data_gen(
    dataPath,
    batchSize=32,
    shuffle=True,
    seed=1,
    train_split=0.7,
    #  test_split=0.2,
    valid_split=0.3,
    subset='train',
    print_len=False,
    targetSize=(224, 224)):
    test_split = 1 - train_split - valid_split
    csvPath = path.join(trainingDataDir, 'data.csv')

    if not path.exists(dataPath):
        raise Exception('dataPath not found')
    dataCsv = pd.read_csv(csvPath, index_col='img')
    dataCsv = dataCsv.dropna(subset='R18')
    # positiveCount = dataCsv['R18'].eq(1).sum()
    # negativeCount = dataCsv['R18'].eq(0).sum()
    # count = min(positiveCount, negativeCount)
    # negativeDatas = dataCsv[dataCsv['R18'].eq(0)].sample(count,
    #                                                      random_state=seed)
    # positiveDatas = dataCsv[dataCsv['R18'].eq(1)].sample(count,
    #                                                      random_state=seed)
    # dataCsv = pd.concat([negativeDatas, positiveDatas])

    if shuffle:
        dataCsv = dataCsv.sample(frac=1, random_state=seed)
    if subset == 'train':
        dataCsv = dataCsv[:int(train_split * len(dataCsv))]
    elif subset == 'valid':
        dataCsv = dataCsv[int(train_split *
                              len(dataCsv)):int((train_split + valid_split) *
                                                len(dataCsv))]
    elif subset == 'test':
        dataCsv = dataCsv[int((train_split + valid_split) * len(dataCsv)):]
    else:
        raise Exception('subset must be train or test')
    if len(dataCsv) == 0:
        raise Exception("dataset is zero")
    if print_len:
        print(len(dataCsv))
    count = 0
    imgPaths = []
    ys = []
    with futures.ThreadPoolExecutor(max_workers=10) as executor:

        def load_img(imgPath):
            img = Image.open(imgPath)
            xx = max(img.size)
            newImg = Image.new('RGB', (xx, xx))
            newImg.paste(img, (int(
                (xx - img.size[0]) / 2), int((xx - img.size[1]) / 2)))
            newImg = newImg.resize(targetSize, Image.BICUBIC)
            return np.array(newImg)

        while True:
            for i in dataCsv.index:
                imgPath = path.join(trainingDataDir, i)
                imgPaths.append(imgPath)
                ys.append(dataCsv.loc[i, 'R18'])
                count += 1
                if count == batchSize:
                    res = executor.map(load_img, imgPaths)
                    yield np.array(list(res)), np.array(ys)
                    count = 0
                    imgPaths = []
                    ys = []


In [None]:
mobNet = keras.applications.mobilenet_v2.MobileNetV2(include_top=False,
                                                     weights='imagenet',
                                                     input_shape=targetShape)
for layer in mobNet.layers:
    layer.trainable = False


In [None]:
inputLayer = layers.Input(shape=targetShape)
# preprocess
x = layers.RandomFlip()(inputLayer)
x = layers.RandomZoom(height_factor=0.2, width_factor=0.2)(x)
x = layers.RandomRotation(0.2)(x)
x = layers.Rescaling(scale=1. / 127.5, offset=-1)(x)
x = mobNet(x)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Flatten()(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(2048)(x)
x = layers.ReLU()(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(2048)(x)
x = layers.ReLU()(x)
outputLayer = layers.Dense(1, activation='sigmoid')(x)
model = keras.Model(inputs=inputLayer, outputs=outputLayer)
model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy'])
model.summary()


In [None]:
cbs = [
    keras.callbacks.EarlyStopping(monitor='val_accuracy',
                                  patience=10,
                                  verbose=1,
                                  restore_best_weights=True)
]
model.fit(data_gen(trainingDataDir,
                   train_split=0.8,
                   valid_split=0.1,
                   batchSize=64),
          batch_size=64,
          steps_per_epoch=30,
          epochs=1000,
          class_weight={
              0: 2.315,
              1: 1
          },
          validation_data=data_gen(trainingDataDir,
                                   batchSize=64,
                                   subset='valid',
                                   train_split=0.8,
                                   valid_split=0.1),
          validation_steps=4,
          max_queue_size=30,
          callbacks=cbs)


In [None]:
gen = data_gen(trainingDataDir,
               subset='test',
               batchSize=64,
               train_split=0.8,
               valid_split=0.1,
               print_len=True)
# next(gen)

In [None]:
model.evaluate(gen,batch_size=64,steps=4)

In [None]:
tx, ty = next(gen)
res = model.predict(tx)
for i in range(len(res)):
    plt.imshow(tx[i])
    plt.show()
    print(res[i], ty[i])

In [None]:
model.save('model-r18.h5')