# Contents:
1. Read images

### Import relevant libraries

In [2]:
import tensorflow as tf
from tensorflow.keras import models,layers
import matplotlib.pyplot as plt

### Read from Directory

In [3]:
image_size = 256
batch_size = 32

In [4]:
dataset = tf.keras.utils.image_dataset_from_directory(
    "potato_dataset",
    labels='inferred',
    label_mode = 'int',
    batch_size = batch_size,
    image_size = (image_size,image_size),
    shuffle = True,
    seed= 42
)

Found 3878 files belonging to 3 classes.


In [5]:
class_names = dataset.class_names
class_names

['early_blight', 'late_blight', 'normal']

### Check if the data is balanced

In [6]:
# Create a dictionary to store class counts
class_counts = {}

# Loop through the dataset and count elements in each class
for images, labels in dataset:
    for label in labels.numpy():
        if label not in class_counts:
            class_counts[label] = 0
        class_counts[label] += 1

# Print the counts for each class
for label, count in class_counts.items():
    print(f'Class {label}: {count}')

min_count = min(class_counts)

Class 0: 1424
Class 2: 1020
Class 1: 1434


The data is distributed in acceptable range

### Split the data

In [7]:
def split_data(dataset, train_split = 0.8, val_split = 0.1, shuffle = True, shuffle_size= 10000):
    train_count = int(len(dataset)*train_split)
    val_count = int(len(dataset)*val_split)
    
    if shuffle:
        dataset = dataset.shuffle(shuffle_size, seed = 42)

    train_dataset = dataset.take(train_count)
    val_test_dataset = dataset.skip(train_count)

    validation_dataset = val_test_dataset.take(val_count)
    test_dataset = val_test_dataset.skip(val_count)

    return train_dataset, validation_dataset, test_dataset

In [8]:
train, validation, test = split_data(dataset)
print(f"train size = {len(train)}")
print(f"validation size = {len(validation)}")
print(f"test size = {len(test)}")

train size = 97
validation size = 12
test size = 13


### Cache and prefetch the data

In [9]:
train = train.cache().shuffle(10000).prefetch(buffer_size = tf.data.AUTOTUNE)
validation = validation.cache().shuffle(10000).prefetch(buffer_size = tf.data.AUTOTUNE)
test = test.cache().shuffle(10000).prefetch(buffer_size = tf.data.AUTOTUNE)

### Produce layers for model

In [10]:
resize_rescale_layer = tf.keras.Sequential([
    layers.experimental.preprocessing.Resizing(image_size,image_size),
    layers.experimental.preprocessing.Rescaling(1.0/255)
])

In [11]:
data_augmentation_layer = tf.keras.Sequential([
    layers.RandomFlip("horizontal_and_vertical"),
    layers.RandomRotation(0.3),
    layers.RandomZoom(0.2)
])