### **1. Setup and load data**

##### **1.1 Install dependencies and data**

In [1]:
import tensorflow as tf
import os #used for handling file operations

In [2]:
tf.config.list_physical_devices('GPU') #verify tensorflow is using the gpu

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [3]:
# Avoid OOM errors. Setting GPU memory growth limit #### if this approach still doesn't resolve the memory consumption issue. we need to reduce size of the mini batches when training
gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    tf.config.experimental.set_memory_growth(gpu,True)

#### **1.2 Load Data**

In [4]:
# # # Change working directory to notebook location# 
# notebook_path = r"E:\AKILA\RiceClassifcation"# 
# os.chdir(notebook_path# )
# print(f"Current working directory: {os.getcwd()}")

Current working directory: E:\AKILA\RiceClassifcation


In [None]:
# import cv2i

# data_dir = "Data"
# data_dir_pat h = s.path.join(cwd, data_dirf)
# o.litdi()r(data_di:r)

In [8]:
DATA_DIR = "Data"
BATCH_SIZE = 8
IMAGE_SIZE = (254,254)
ROOT_PATTERN = f"{DATA_DIR}/*/*/*.JPG"

In [52]:
# create dataset of file paths
paths = tf.data.Dataset.list_files(file_pattern=ROOT_PATTERN,shuffle=True)

In [46]:
# create label vocabularies


type_dirs = tf.io.gfile.listdir(DATA_DIR)
# filter, to only keep dir names
image_class_type_dirs = []  # will contain ['AT 362', 'BG 357', 'BG 360', 'BW 367',......]
for dir in type_dirs:
    if tf.io.gfile.isdir(os.path.join(DATA_DIR,dir)):
        image_class_type_dirs.append(dir)


# quality_dirs = tf.io.gfile.listdir(os.path.join(DATA_DIR,image_class_type_dirs[0]))
# filter, to only keep dir names
# image_class_quality_dirs = [] # will contain ['Bad', 'Good']
# for dir in quality_dirs:
#     if tf.io.gfile.isdir(os.path.join(DATA_DIR,image_class_type_dirs[0],dir)):
#         image_class_quality_dirs.append(dir)
image_class_quality_dirs = ['Bad', 'Good']

image_class_type_table = tf.lookup.StaticHashTable(
    initializer=tf.lookup.KeyValueTensorInitializer(
        keys=tf.constant(image_class_type_dirs),
        values=tf.constant(list(range(len(image_class_type_dirs))))
        ),
    default_value=-1
)

image_class_quality_table = tf.lookup.StaticHashTable(
    initializer=tf.lookup.KeyValueTensorInitializer(
        keys=tf.constant(image_class_quality_dirs),
        values=tf.constant(list(range(len(image_class_quality_dirs))))
    ),
    default_value=-1
)

In [118]:
# helper function to extract type and quality label 
def parse_label(path):
    # path format should be Data\\Type\\Quality\\Image.JPG
    # path_string = path.decode('utf-8')
    # parts = path_string.split("\\")
    parts = tf.strings.split("/")
    rice_type = parts[-3]
    rice_quality = parts[-2]

    # t = image_class_type_table.lookup(tf.constant(rice_type))
    t = image_class_type_table.lookup(rice_type)
    # q = image_class_quality_table.lookup(tf.constant(rice_quality))
    q = image_class_quality_table.lookup(rice_quality)
    return t, q

# helper function to load and preprocess image
def load_image(path):
    img = tf.io.read_file(path)
    img = tf.image.decode_image(img, channels=3, expand_animations=False)
    img.set_shape([None, None, 3])  # Set shape before resize
    img = tf.image.resize(img, IMAGE_SIZE)
    img = img / 255.0
    return img

# act as the lambda function for the mapping
def load_item(path):
    t, q = parse_label(path)
    image = load_image(path)
    return image, {"type": t, "quality": q}

In [119]:
# dataset = paths.shuffle(100)
# dataset = dataset.map(loadItem,num_parallel_calls=tf.data.AUTOTUNE)
# dataset = dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

dataset = (
    paths
    .shuffle(100)
    .map(load_item, num_parallel_calls=tf.data.AUTOTUNE)
    .batch(BATCH_SIZE)
    .prefetch(tf.data.AUTOTUNE)
)