In [1]:
# Data prep
import os, shutil
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from random import random
import pandas as pd
from tensorflow.keras.preprocessing.image import ImageDataGenerator
keras.mixed_precision.set_global_policy('mixed_float16')

# data loading config
batch_size = 32
img_height = 1920
img_width = 1280
dataPath = 'train-smaller'
labels = 'inferred'  # auto find from folders
label_mode = 'categorical'  # one hot encoding
color_mode = 'rgb'
shuffle = True
seed = 69
test_split = 0.2  # split into train and test (NOT val), 0-1
AUTOTUNE = tf.data.AUTOTUNE
train_df = pd.read_csv('train.csv')

def split_train_test(dataPath):
    for root, dirs, files in os.walk(dataPath):
        for name in files:
            randomNum = random()
            if randomNum <= test_split:
                os.makedirs('test\\'+root+'\\', exist_ok=True)
                shutil.move(root+'\\'+name, 'test\\'+root+'\\')

# split_train_test(dataPath)  # Only need to run this once

print('Training data')
train = keras.preprocessing.image_dataset_from_directory(dataPath, labels=labels, label_mode=label_mode,
color_mode=color_mode, shuffle=shuffle, subset="training", seed=seed, validation_split=test_split,
image_size=(img_height, img_width), batch_size=batch_size)
train_class_names = train.class_names

val = keras.preprocessing.image_dataset_from_directory(dataPath, labels=labels, label_mode=label_mode,
color_mode=color_mode, shuffle=shuffle, subset="validation", seed=seed, validation_split=test_split,
image_size=(img_height, img_width), batch_size=batch_size)

train = train.cache().prefetch(buffer_size=AUTOTUNE)
val = val.cache().prefetch(buffer_size=AUTOTUNE)

print()
print('train classes:', train_class_names)

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: NVIDIA GeForce RTX 2060 SUPER, compute capability 7.5
Training data
Found 26187 files belonging to 62 classes.
Using 20950 files for training.
Found 26187 files belonging to 62 classes.
Using 5237 files for validation.

train classes: ['badlands', 'badlands_plateau', 'bamboo_jungle', 'bamboo_jungle_hills', 'beach', 'birch_forest', 'birch_forest_hills', 'cold_ocean', 'dark_forest', 'dark_forest_hills', 'deep_cold_ocean', 'deep_frozen_ocean', 'deep_lukewarm_ocean', 'deep_ocean', 'desert', 'desert_hills', 'desert_lakes', 'eroded_badlands', 'flower_forest', 'forest', 'frozen_ocean', 'frozen_river', 'giant_tree_taiga', 'giant_tree_taiga_hills', 'gravelly_mountains', 'ice_spikes', 'jungle', 'jungle_edge', 'jungle_hills', 'lukewarm_ocean', 'modified_gravelly_mountains', 'modified_jungle', 'modified_wood

In [None]:
opt = keras.optimizers.Adam(lr=1e-3, epsilon=1e-6)
loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True)
epoch = 100
batch_size = 64
callbacks = [
    tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', min_delta=0, patience=15, verbose=1,
                                     mode='auto', baseline=None, restore_best_weights=True),
    tf.keras.callbacks.ModelCheckpoint('./best_model',monitor='val_accuracy',save_best_only=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy', factor=0.05, patience=10, verbose=1)
]

# model layers
xInput = layers.Input((img_height, img_width, 3))  # get resolutions, ignore batch size
x = layers.experimental.preprocessing.CenterCrop(40, 40)(xInput)
x = layers.Flatten()(x)
x = layers.Dense(128)(x)
x = layers.Activation('relu')(x)
x = layers.Dense(512)(x)
x = layers.Activation('relu')(x)
x = layers.Dense(len(train_class_names))(x)  # a probability for each class so need same no as classes
xOutput = layers.Activation('softmax')(x)

model = keras.Model(xInput, xOutput)
model.compile(optimizer=opt, loss=loss, metrics='accuracy')
model.summary()
model.fit(train, validation_data=val, batch_size=batch_size, epochs=epoch, callbacks=callbacks, verbose=1)

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 1920, 1280, 3)]   0         
_________________________________________________________________
center_crop (CenterCrop)     (None, 40, 40, 3)         0         
_________________________________________________________________
flatten (Flatten)            (None, 4800)              0         
_________________________________________________________________
dense (Dense)                (None, 128)               614528    
_________________________________________________________________
activation (Activation)      (None, 128)               0         
_________________________________________________________________
dense_1 (Dense)              (None, 512)               66048     
_________________________________________________________________
activation_1 (Activation)    (None, 512)               0     

In [12]:
test = keras.preprocessing.image_dataset_from_directory('test-release/',
color_mode=color_mode, shuffle=False, label_mode=None,
image_size=(img_height, img_width), batch_size=batch_size)
print(test.class_names)

# model.predict(test)

Found 11261 files belonging to 1 classes.


['test']