In [17]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt
from IPython.display import HTML

In [18]:
batch_size = 32
image_size_x = 256
image_size_y = 256
channels = 3
epochs = 50

In [None]:
# I am using the dataset from Coffee Leaf Diseases and set configurations for the train dataset
# Because I need to put all the dataset into one format with the same size.
dataset = keras.preprocessing.image_dataset_from_directory(
    "./data/coffee_leaf_disease/train",
    seed = 123, # 0 # 42
    shuffle = True,
    image_size = (image_size_x, image_size_y),
    batch_size = batch_size
)

Found 1264 files belonging to 4 classes.


In [None]:
# I print the name of the classes
class_names = dataset.class_names
class_names

['miner', 'nodisease', 'phoma', 'rust']

In [None]:
# 1264/32 = 39.5 = 40
len(dataset)

40

In [None]:
train_size = 0.8
len(dataset) * train_size

32.0

In [31]:
train_ds = dataset.take(13)
len(train_ds)

13

In [32]:
test_ds = dataset.skip(13)
len(test_ds)

27

In [33]:
val_size = 0.1
len(dataset) * val_size

4.0

In [34]:
val_ds = test_ds.take(1)
len(val_ds)

1

In [35]:
test_ds = test_ds.skip(1)
len(test_ds)

26

In [None]:
# I code get_dataset_partitions_tf function 
# to get the train, validation and test datasets (80%, 10%, 10%)

def get_dataset_partitions_tf(ds, train_split = 0.8, val_split=0.1, test_split = 0.1, shuffle = True, shuffle_size = 10000):
    assert (train_split + test_split + val_split) == 1

    ds_size = len(ds)

    if shuffle:
        ds = ds.shuffle(shuffle_size, seed=12)
    
    train_size = int(train_split * ds_size)
    val_size = int(val_split * ds_size)

    train_ds = ds.take(train_size)
    val_ds = ds.skip(train_size).take(val_size)
    test_ds = ds.skip(train_size).skip(val_size)

    return train_ds, val_ds, test_ds


In [38]:
train_ds, val_ds, test_ds = get_dataset_partitions_tf(dataset)

In [39]:
len(train_ds), len(val_ds), len(test_ds)

(32, 4, 4)