# Understanding tf.dataset and TFDS

Start by creating tf.dataset wit tensors

In [None]:
import tensorflow as tf
X=tf.range(10)
dataset = tf.data.Dataset.from_tensor_slices(X)
dataset

In [None]:
for item in dataset:
    print(item)

In [None]:
dataset = dataset.map(lambda x: x*2)


In [None]:
for item in dataset:
    print(item)

Now let us load the mnist as a tf.dataset from the TFDS project

In [None]:
import tensorflow_datasets as tfds

In [None]:
dataset = tfds.load("mnist",as_supervised=True)
train_dataset, test_dataset = dataset["train"], dataset["test"]

In [None]:
for item in train_dataset.take(2):
    print(type(item))
    images,labels=item
    #images = item["image"]
    #labels = item["label"]
    print(type(images))

In [None]:
import tensorflow as tf

# Define a function that resizes the data
def resize_data(x,y):
  x = tf.image.resize(x, (224, 224))
  return x, y 
# Apply the resize function to the dataset using map
resized_dataset = train_dataset.map(resize_data)


In [None]:
import numpy as np
for item in resized_dataset.take(2):
    x,y=item
    plt.figure()
    plt.imshow(x),plt.title(np.array(y))

Let us try now with the tf_flowers dataset.


In [None]:
dataset = tfds.load("tf_flowers",as_supervised=True)
train_dataset = dataset["train"]
def resize_data(x,y):
  x = tf.image.resize(x, (224, 224))
  return x, y 
resized_dataset = train_dataset.map(resize_data)

In [None]:
for item in resized_dataset.take(2):
    x,y=item
    print(x.shape)
    plt.figure()
    plt.imshow(x),plt.title(np.array(y))

This can be done with the tfds.builder but it does not take the as_supervised=True parameter
That forces us to use the 'image' 'label' keys.

In [None]:
import tensorflow_datasets as tfds

# Download the Imagenet dataset
imagenet_builder = tfds.builder("tf_flowers")
imagenet_builder.download_and_prepare()

# Load the Imagenet dataset as a `tf.data.Dataset` object
imagenet_dataset = imagenet_builder.as_dataset()


In [None]:
train_dataset = imagenet_dataset['train']

In [None]:
import tensorflow as tf
assert isinstance(train_dataset,tf.data.Dataset)

In [None]:
import matplotlib.pyplot as plt
x=train_dataset.take(1).as_numpy_iterator().next()
plt.imshow(x['image'])
plt.title(x['label'])


In [None]:
from tensorflow.keras.applications import VGG16

# Load the VGG16 model with weights pre-trained on ImageNet
base_model = VGG16(weights='imagenet',include_top=False)
base_model.output.get_shape()

In [None]:
avg = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
output = tf.keras.layers.Dense(1, activation="softmax")(avg)
model = tf.keras.models.Model(inputs=base_model.input, outputs=output)

In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
resized_dataset = train_dataset.map(resize_data)


In [None]:
for item in resized_dataset.batch(32).take(2):
    x,y=item
    print(x.shape)
    model.predict(x)

In [None]:
# Create a dataset that produces batches of data
batch_size = 32
dataset = resized_dataset.batch(batch_size)

In [None]:
# Create an iterator from the dataset
iterator = dataset.__iter__()


In [None]:
batch = iterator.get_next()
type(batch)

In [None]:
def generator():
  while True:
    try:
      # Get the next batch of data from the iterator
      batch = iterator.get_next()
      # Extract the input and output elements from the batch
      x, y = batch
      x=resize(x,(224,224))
      yield (x, y)
    except tf.errors.OutOfRangeError:
      # Reset the iterator when the dataset is exhausted
      iterator.reset()

In [None]:
# Use the generator as the input to model.fit
model.fit(generator(), epochs=10, steps_per_epoch=len(resized_dataset) // batch_size)


In [None]:


# Load some data to use for training or evaluation
x_train, y_train, x_test, y_test = load_data()

# Train the model on the training data
history = model.fit(x_train, y_train, epochs=10, batch_size=32)

# Evaluate the model on the test data
scores = model.evaluate(x_test, y_test)

# Use the model to make predictions on new data
predictions = model.predict(x_new)


In [None]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Set up the ImageDataGenerator to load and preprocess the image data
datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# Load the data from a directory of images
train_generator = datagen.flow_from_directory(
    'data/train',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

# Load the VGG16 model with weights pre-trained on ImageNet
model = VGG16(weights='imagenet')

# Compile the model with a Adam optimizer and a categorical cross-entropy loss function
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model on the training data
history = model.fit_generator(train_generator, epochs=10)

# Load the validation data in a similar way
validation_generator = datagen.flow_from_directory(
    'data/validation',
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

# Evaluate the model on the validation data
scores = model.evaluate_generator(validation_generator)

# Use the model to make predictions on new data
x_new = load_new_data()
predictions = model.predict(x_new)
