Adapted from CIPHAR-100 Kaggle Notebook

In [1]:
import tensorflow as tf

### Preparing TPU

In [2]:
AUTO = tf.data.experimental.AUTOTUNE

# Detect TPU, return appropriate distribution strategy
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver() 
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy() 

print("REPLICAS: ", strategy.num_replicas_in_sync)

REPLICAS:  1


### Importing Libraries

In [3]:
import numpy as np
from tensorflow.keras import regularizers
from tensorflow.keras.regularizers import l2
import tensorflow.keras.layers as tfl
from keras import backend as K

### Loading Data

In [4]:
from tensorflow.keras.datasets import cifar100

(X_train, y_train), (X_test, y_test) = cifar100.load_data(label_mode='coarse')

In [5]:
print(X_train.shape,y_train.shape)

(50000, 32, 32, 3) (50000, 1)


### Resizing Images

In [6]:
import cv2

In [7]:
X_test = np.array([cv2.resize(img, (140, 140)) for img in X_test])

### Encoding Labels

In [8]:
from sklearn.preprocessing import OneHotEncoder

enc = OneHotEncoder()
y_train=enc.fit_transform(y_train).toarray().astype(int)
y_test=enc.transform(y_test).toarray().astype(int)


print(y_train.shape)
print(y_train[0])

(50000, 20)
[0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]


### Building ResNet

In [9]:
def identity_block(X, f, filters):
    X_shortcut=X

    X=tfl.Conv2D(filters=filters[0],kernel_size=1,strides=(1,1), padding='valid')(X)
    X=tfl.BatchNormalization(axis=3)(X, training=True)
    X=tfl.Activation('relu')(X)

    X=tfl.Conv2D(filters=filters[1],kernel_size=f,strides=(1,1), padding='same')(X)
    X=tfl.BatchNormalization(axis=3)(X, training=True)
    X=tfl.Activation('relu')(X)

    X=tfl.Conv2D(filters=filters[2],kernel_size=1,strides=(1,1), padding='valid')(X)
    X=tfl.BatchNormalization(axis=3)(X, training=True)

    X=tfl.Add()([X_shortcut,X])
    X=tfl.Activation('relu')(X)

    return X

In [10]:
def convolutional_block(X, f, filters, s=2):
    X_shortcut=X

    X=tfl.Conv2D(filters=filters[0],kernel_size=1,strides=(s,s), padding='valid')(X)
    X=tfl.BatchNormalization(axis=3)(X, training=True)
    X=tfl.Activation('relu')(X)

    X=tfl.Conv2D(filters=filters[1],kernel_size=f,strides=(1,1), padding='same')(X)
    X=tfl.BatchNormalization(axis=3)(X, training=True)
    X=tfl.Activation('relu')(X)

    X=tfl.Conv2D(filters=filters[2],kernel_size=1,strides=(1,1), padding='valid')(X)
    X=tfl.BatchNormalization(axis=3)(X, training=True)

    X_shortcut=tfl.Conv2D(filters=filters[2],kernel_size=1,strides=(s,s), padding='valid')(X_shortcut)
    X_shortcut=tfl.BatchNormalization(axis=3)(X_shortcut, training=True)

    X=tfl.Add()([X_shortcut,X])
    X=tfl.Activation('relu')(X)

    return X

In [11]:
def arch(input_shape):

    input_img = tf.keras.Input(shape=input_shape)

    #layer = data_augmenter()(input_img)

    layer =tfl.ZeroPadding2D((3, 3))(input_img)

    layer=tfl.Conv2D(filters=64,kernel_size=7,strides=(2,2))(layer)
    layer=tfl.BatchNormalization(axis=3)(layer, training=True)
    layer=tfl.Activation('relu')(layer)
    layer=tfl.MaxPooling2D((3, 3), strides=(2, 2))(layer)

    layer=convolutional_block(layer,3,[64,64,256],1)
    layer=identity_block(layer,3,[64,64,256])
    layer=identity_block(layer,3,[64,64,256])

    layer=convolutional_block(layer,3,[128,128,512],2)
    layer=identity_block(layer,3,[128,128,512])
    layer=identity_block(layer,3,[128,128,512])
    layer=identity_block(layer,3,[128,128,512])

    layer=convolutional_block(layer,3, [256, 256, 1024],2)
    layer=identity_block(layer,3, [256, 256, 1024])
    layer=identity_block(layer,3, [256, 256, 1024])
    layer=identity_block(layer,3, [256, 256, 1024])
    layer=identity_block(layer,3, [256, 256, 1024])
    layer=identity_block(layer,3, [256, 256, 1024])

    layer=convolutional_block(layer,3, [512, 512, 2048],2)
    layer=identity_block(layer,3, [512, 512, 2048])
    layer=identity_block(layer,3, [512, 512, 2048])

    layer=tfl.AveragePooling2D(pool_size=(2, 2),padding='same')(layer)
    layer=tfl.Flatten()(layer)

    outputs=tfl.Dense(units= 20 , activation='softmax')(layer)
    model = tf.keras.Model(inputs=input_img, outputs=outputs)
    return model

# training and evaluating the model

In [12]:
# instantiating the model in the strategy scope creates the model on the TPU
with strategy.scope():
    conv_model = arch((140, 140, 3)) # define your model normally
    conv_model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
conv_model.summary()

In [18]:
# train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(16 * strategy.num_replicas_in_sync)
# test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(16 * strategy.num_replicas_in_sync)
# history = conv_model.fit(train_dataset,epochs=4,validation_data=test_dataset,batch_size=16 * strategy.num_replicas_in_sync,shuffle=True)

In [16]:
with strategy.scope():
    conv_model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(140, 140, 3)),
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D(2, 2),
        
        # --- THIS IS THE MISSING PIECE ---
        tf.keras.layers.Flatten(), 
        # Or use tf.keras.layers.GlobalAveragePooling2D() 
        # ---------------------------------
        
        tf.keras.layers.Dense(20, activation='softmax') # 20 classes
    ])
    
    # IMPORTANT: Check your loss function
    conv_model.compile(
        optimizer='adam',
        loss='categorical_crossentropy', # Use 'categorical' if your labels are [16, 20]
        metrics=['accuracy']
    )

In [17]:
import tensorflow as tf

# 1. Prepare the Data Pipeline (including resizing)
def preprocess_data(image, label):
    image = tf.image.resize(image, (140, 140))
    # Optional: Normalize pixels to [0, 1] if not already done
    image = tf.cast(image, tf.float32) / 255.0 
    return image, label

# Re-wrap datasets to ensure they are resized and batched for the strategy
GLOBAL_BATCH_SIZE = 16 * strategy.num_replicas_in_sync

train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.map(preprocess_data).shuffle(1000).batch(GLOBAL_BATCH_SIZE)

test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
test_dataset = test_dataset.map(preprocess_data).batch(GLOBAL_BATCH_SIZE)

# 2. Build and Compile within the Strategy Scope
with strategy.scope():
    conv_model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(140, 140, 3)),
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D(2, 2),
        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D(2, 2),
        
        # Flattens the 3D features into a 1D vector for the Dense layer
        tf.keras.layers.Flatten(), 
        
        tf.keras.layers.Dense(20, activation='softmax') # 20 classes
    ])
    
    conv_model.compile(
        optimizer='adam',
        # Use 'categorical_crossentropy' because your labels are shape (batch, 20)
        loss='categorical_crossentropy', 
        metrics=['accuracy']
    )

# 3. Train the Model
history = conv_model.fit(
    train_dataset, 
    epochs=10, 
    validation_data=test_dataset
)

Epoch 1/10


2026-01-08 11:01:20.256002: W tensorflow/core/framework/dataset.cc:993] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.


[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 282ms/step - accuracy: 0.2246 - loss: 2.5442

2026-01-08 11:16:03.191380: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node MultiDeviceIteratorGetNextFromShard}}]]
2026-01-08 11:16:03.454122: W tensorflow/core/framework/dataset.cc:993] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.
2026-01-08 11:16:30.161872: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node MultiDeviceIteratorGetNextFromShard}}]]


[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m910s[0m 291ms/step - accuracy: 0.2780 - loss: 2.3705 - val_accuracy: 0.3171 - val_loss: 2.2407
Epoch 2/10
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step - accuracy: 0.3490 - loss: 2.1486

2026-01-08 11:24:48.102914: W tensorflow/core/framework/dataset.cc:993] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.
2026-01-08 11:25:17.706234: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node MultiDeviceIteratorGetNextFromShard}}]]


[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m528s[0m 169ms/step - accuracy: 0.3608 - loss: 2.1120 - val_accuracy: 0.3611 - val_loss: 2.1158
Epoch 3/10


2026-01-08 11:25:18.109621: W tensorflow/core/framework/dataset.cc:993] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.


[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 161ms/step - accuracy: 0.3911 - loss: 2.0102

2026-01-08 11:33:42.506777: W tensorflow/core/framework/dataset.cc:993] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.


[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m534s[0m 171ms/step - accuracy: 0.3970 - loss: 1.9926 - val_accuracy: 0.3496 - val_loss: 2.1501
Epoch 4/10
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 163ms/step - accuracy: 0.4162 - loss: 1.9196

2026-01-08 11:42:40.969188: W tensorflow/core/framework/dataset.cc:993] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.
2026-01-08 11:43:09.710175: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node MultiDeviceIteratorGetNextFromShard}}]]


[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m538s[0m 172ms/step - accuracy: 0.4232 - loss: 1.9028 - val_accuracy: 0.3590 - val_loss: 2.1314
Epoch 5/10
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 159ms/step - accuracy: 0.4382 - loss: 1.8444

2026-01-08 11:51:26.962840: W tensorflow/core/framework/dataset.cc:993] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.


[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m528s[0m 169ms/step - accuracy: 0.4456 - loss: 1.8285 - val_accuracy: 0.3635 - val_loss: 2.1895
Epoch 6/10
[1m   1/3125[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m9:29[0m 182ms/step - accuracy: 0.5000 - loss: 1.5058

2026-01-08 11:51:58.291970: W tensorflow/core/framework/dataset.cc:993] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.


[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 158ms/step - accuracy: 0.4597 - loss: 1.7703

2026-01-08 12:00:11.091961: W tensorflow/core/framework/dataset.cc:993] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.


[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m523s[0m 167ms/step - accuracy: 0.4653 - loss: 1.7548 - val_accuracy: 0.3552 - val_loss: 2.2064
Epoch 7/10
[1m   1/3125[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m12:17[0m 236ms/step - accuracy: 0.7500 - loss: 1.1231

2026-01-08 12:00:41.130900: W tensorflow/core/framework/dataset.cc:993] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.


[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 365ms/step - accuracy: 0.4828 - loss: 1.6961

2026-01-08 12:19:43.185427: W tensorflow/core/framework/dataset.cc:993] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.


[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1172s[0m 375ms/step - accuracy: 0.4900 - loss: 1.6808 - val_accuracy: 0.3584 - val_loss: 2.2694
Epoch 8/10
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 176ms/step - accuracy: 0.5052 - loss: 1.6182

2026-01-08 12:29:23.926456: W tensorflow/core/framework/dataset.cc:993] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.
2026-01-08 12:29:53.919199: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node MultiDeviceIteratorGetNextFromShard}}]]


[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m582s[0m 186ms/step - accuracy: 0.5117 - loss: 1.6015 - val_accuracy: 0.3449 - val_loss: 2.3504
Epoch 9/10


2026-01-08 12:29:54.219873: W tensorflow/core/framework/dataset.cc:993] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.


[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 173ms/step - accuracy: 0.5299 - loss: 1.5363

2026-01-08 12:38:54.218132: W tensorflow/core/framework/dataset.cc:993] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.


[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m568s[0m 182ms/step - accuracy: 0.5341 - loss: 1.5244 - val_accuracy: 0.3395 - val_loss: 2.4282
Epoch 10/10
[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 831ms/step - accuracy: 0.5489 - loss: 1.4604

2026-01-08 13:22:40.266482: W tensorflow/core/framework/dataset.cc:993] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.


[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3507s[0m 1s/step - accuracy: 0.5548 - loss: 1.4456 - val_accuracy: 0.3238 - val_loss: 2.5534


2026-01-08 13:37:48.945198: W tensorflow/core/framework/dataset.cc:993] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.


In [20]:
# Path to your new validation images
val_dir = 'clean_insect_images'

# 1. Create the dataset from the directory
new_val_ds = tf.keras.utils.image_dataset_from_directory(
    val_dir,
    labels='inferred',
    label_mode='categorical', # Matches your OneHot encoding
    image_size=(140, 140),     # Matches your resizing in the notebook
    batch_size=16 * strategy.num_replicas_in_sync, # Optimized for TPU
    shuffle=False
)

# 2. Add Prefetching for performance
new_val_ds = new_val_ds.prefetch(buffer_size=tf.data.AUTOTUNE)

Found 8605 files belonging to 11 classes.


In [21]:
# Use the trained model to evaluate the new images
loss, accuracy = conv_model.evaluate(new_val_ds)

print(f"Validation Loss: {loss}")
print(f"Validation Accuracy: {accuracy}")

2026-01-08 16:11:46.223903: W tensorflow/core/framework/dataset.cc:993] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.


INFO:tensorflow:Error reported to Coordinator: Arguments `target` and `output` must have the same shape. Received: target.shape=(None, 11), output.shape=(None, 20)
Traceback (most recent call last):
  File "/Users/deepikasenthil/miniforge3/lib/python3.10/site-packages/tensorflow/python/training/coordinator.py", line 293, in stop_on_exception
    yield
  File "/Users/deepikasenthil/miniforge3/lib/python3.10/site-packages/tensorflow/python/distribute/mirrored_run.py", line 387, in run
    self.main_result = self.main_fn(*self.main_args, **self.main_kwargs)
  File "/Users/deepikasenthil/miniforge3/lib/python3.10/site-packages/tensorflow/python/autograph/impl/api.py", line 643, in wrapper
    return func(*args, **kwargs)
  File "/Users/deepikasenthil/miniforge3/lib/python3.10/site-packages/keras/src/backend/tensorflow/trainer.py", line 96, in test_step
    loss = self._compute_loss(
  File "/Users/deepikasenthil/miniforge3/lib/python3.10/site-packages/keras/src/trainers/trainer.py", line 3

ValueError: Arguments `target` and `output` must have the same shape. Received: target.shape=(None, 11), output.shape=(None, 20)

In [22]:
from tensorflow.keras.applications import ResNet50

In [24]:
from tensorflow.keras.applications.resnet50 import preprocess_input

def preprocess_data(image, label):
    # Your existing resize logic
    image = tf.image.resize(image, (140, 140))
    # Add this line for the pretrained model
    image = preprocess_input(image) 
    return image, label

# Apply this to your datasets
train_dataset = train_dataset.map(preprocess_data).batch(GLOBAL_BATCH_SIZE )
test_dataset = test_dataset.map(preprocess_data).batch(GLOBAL_BATCH_SIZE )

In [25]:
with strategy.scope():
    # load the Base Model
    # include_top=False removes the 1000-class ImageNet classification layer
    base_model = ResNet50(
        include_top=False, 
        weights='imagenet', 
        input_shape=(140, 140, 3)
    )
    
    # add your custom classification head
    # use GlobalAveragePooling2D instead of Flatten to save memory
    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(20, activation='softmax') # 20 for Coarse CIFAR
    ])

    # compile the model
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

conv_model = model

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 0us/step


In [28]:
BATCH_SIZE = 16 * strategy.num_replicas_in_sync 

# prepare the dataset
train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train))
train_dataset = train_dataset.map(preprocess_data)
train_dataset = train_dataset.shuffle(1000).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test))
test_dataset = test_dataset.map(preprocess_data).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# call fit WITHOUT the batch_size argument
# when using tf.data.Dataset, 'fit' automatically knows the batch size from the dataset itself.
history = model.fit(
    train_dataset, 
    epochs=1, 
    validation_data=test_dataset
)

2026-01-08 16:26:45.165248: W tensorflow/core/framework/dataset.cc:993] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.


[1m 259/3125[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m2:23:28[0m 3s/step - accuracy: 0.1043 - loss: 3.1975

KeyboardInterrupt: 

In [None]:
loss, acc = model.evaluate(new_val_ds) 
print(f"Accuracy on my custom images: {acc}")

In [31]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input

# 1. Prepare the Data Pipeline (Fixing the 5D Tensor Error)
BATCH_SIZE = 16 * strategy.num_replicas_in_sync

def prepare_ds(X, y, shuffle=False):
    # Ensure images are float32 and preprocessed for ResNet-50
    ds = tf.data.Dataset.from_tensor_slices((X.astype('float32'), y))
    ds = ds.map(lambda x, y: (preprocess_input(x), y), num_parallel_calls=tf.data.AUTOTUNE)
    if shuffle:
        ds = ds.shuffle(1000)
    return ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

train_ds = prepare_ds(X_train, y_train, shuffle=True)
test_ds = prepare_ds(X_test, y_test)

# 2. Build and Compile Model inside Strategy Scope
with strategy.scope():
    base_model = ResNet50(include_top=False, weights='imagenet', input_shape=(140, 140, 3))
    base_model.trainable = True # Set to True for finetuning

    model = tf.keras.Sequential([
        base_model,
        tf.keras.layers.GlobalAveragePooling2D(),
        tf.keras.layers.Dense(256, activation='relu'),
        tf.keras.layers.Dropout(0.3),
        tf.keras.layers.Dense(20, activation='softmax') # Matches your 20 coarse labels
    ])

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5), # Low rate for finetuning
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )



# 1. Update the preparation function to include resizing
def prepare_ds(X, y, shuffle=False):
    # Ensure images are float32
    ds = tf.data.Dataset.from_tensor_slices((X.astype('float32'), y))
    
    def preprocess_logic(image, label):
        # RESIZE is missing in your current code! 
        # We must scale up from 32x32 to 140x140
        image = tf.image.resize(image, (140, 140))
        # Then apply ResNet specific scaling
        image = preprocess_input(image)
        return image, label

    ds = ds.map(preprocess_logic, num_parallel_calls=tf.data.AUTOTUNE)
    
    if shuffle:
        ds = ds.shuffle(1000)
    
    return ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

# 2. Re-create the datasets
train_ds = prepare_ds(X_train, y_train, shuffle=True)
test_ds = prepare_ds(X_test, y_test)

# 3. Now run the fit (Ensure model was built with input_shape=(140, 140, 3))
history = model.fit(
    train_ds, 
    epochs=1, 
    validation_data=test_ds
)

2026-01-08 16:58:04.382056: W tensorflow/core/framework/dataset.cc:993] Input of GeneratorDatasetOp::Dataset will not be optimized because the dataset does not implement the AsGraphDefInternal() method needed to apply optimizations.


[1m2132/3125[0m [32m━━━━━━━━━━━━━[0m[37m━━━━━━━[0m [1m2:41:01[0m 10s/step - accuracy: 0.3449 - loss: 2.2182

KeyboardInterrupt: 