In [1]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt

In [None]:
# import tensorflow_datasets as tfds

# # Path to where TFDS stores datasets (adjust if needed)
# data_dir = r"C:\Users\SRIRAM\tensorflow_datasets"

# # Load from local directory only — skip download
# data, info = tfds.load(
#     'coco_captions',
#     split=['train[:1%]', 'validation[:1%]'],  # can use full train/validation too
#     with_info=True,
#     as_supervised=False,
#     data_dir=data_dir,
#     download=False   # 🚫 prevents TFDS from trying to re-download
# )

# train_ds, val_ds = data

# print("Dataset loaded locally from:", data_dir)
# print("Train examples:", info.splits['train'].num_examples)
# print("Validation examples:", info.splits['validation'].num_examples)


In [None]:
# a) Load dataset
data, info = tfds.load('coco_captions', with_info=True, as_supervised=False)

train_ds = data['train']
val_ds = data['validation']

# b) Show number of images
print("Training images:", info.splits['train'].num_examples)
print("Validation images:", info.splits['validation'].num_examples)




[1mDownloading and preparing dataset Unknown size (download: Unknown size, generated: Unknown size, total: Unknown size) to C:\Users\USER\tensorflow_datasets\coco_captions\2014\1.1.0...[0m


Dl Completed...: 0 url [00:00, ? url/s]

Dl Size...: 0 MiB [00:00, ? MiB/s]

Extraction completed...: 0 file [00:00, ? file/s]

In [None]:
# import tensorflow_datasets as tfds

# data, info = tfds.load(
#     'coco_captions',
#     split=['train[:1%]', 'validation[:1%]'],  # load just 1%
#     with_info=True,
#     as_supervised=False
# )

# train_ds, val_ds = data
# print("Train examples:", info.splits['train'].num_examples * 0.01)
# print("Val examples:", info.splits['validation'].num_examples * 0.01)


In [None]:
# c) Plot sample images
plt.figure(figsize=(8, 8))
for i, sample in enumerate(train_ds.take(6)):
    image = sample['image']
    plt.subplot(2, 3, i+1)
    plt.imshow(image)
    plt.axis("off")
plt.show()


In [None]:
# f) Normalize images
def preprocess(image):
    image = tf.image.resize(image, (128, 128))
    image = tf.cast(image, tf.float32) / 255.0
    return image

train_images = train_ds.map(lambda x: preprocess(x['image'])).batch(32)
val_images = val_ds.map(lambda x: preprocess(x['image'])).batch(32)


In [None]:
# g) Build CNN model
model = models.Sequential([
    layers.Conv2D(32, (3,3), activation='relu', input_shape=(128,128,3)),
    layers.MaxPooling2D(2,2),
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D(2,2),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')  # Dummy 10-class output
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# h) Train and evaluate
history = model.fit(train_images, validation_data=val_images, epochs=2)


In [None]:
# d) Augmentation
data_augmentation = tf.keras.Sequential([
    layers.RandomFlip("horizontal_and_vertical"),
    layers.RandomRotation(0.2),
    layers.RandomContrast(0.3)
])

aug_train_ds = train_ds.map(lambda x: (data_augmentation(x['image']),))
aug_train_images = aug_train_ds.map(lambda x: preprocess(x[0])).batch(32)

print("After augmentation:")
print("Training images (approx):", info.splits['train'].num_examples)
print("Validation images:", info.splits['validation'].num_examples)


In [None]:
# i + j) Rebuild CNN (same architecture for fair comparison)
aug_model = models.clone_model(model)
aug_model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

aug_history = aug_model.fit(aug_train_images, validation_data=val_images, epochs=2)


In [None]:
# k) Compare before and after augmentation
plt.plot(history.history['accuracy'], label='Before Aug')
plt.plot(aug_history.history['accuracy'], label='After Aug')
plt.title('Training Accuracy Comparison')
plt.legend()
plt.show()
