<a href="https://colab.research.google.com/github/iamr7d/Deep-Learning/blob/main/Basics_of_Auto_encoders.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras import layers, models, datasets, callbacks
import tensorflow.keras.backend as K

This line loads the Fashion MNIST dataset, which contains grayscale images of 10 different classes of clothing items (e.g., T-shirts, shoes, bags). Each image is
28
×
28
28×28 pixels.

In [2]:
IMAGE_SIZE = 32
CHANNELS = 1
BATCH_SIZE = 100
BUFFER_SIZE = 1000
VALIDATION_SPLIT = 0.2
EMBEDDING_DIM = 2
EPOCHS = 3

In [3]:
# Load the data
(x_train, y_train), (x_test, y_test) = datasets.fashion_mnist.load_data()

In [4]:
# Preprocess the data

def preprocess(imgs):
    """
    Normalize and reshape the images
    """
    imgs = imgs.astype("float32") / 255.0
    imgs = np.pad(imgs, ((0, 0), (2, 2), (2, 2)), constant_values=0.0)
    imgs = np.expand_dims(imgs, -1)
    return imgs

x_train = preprocess(x_train)
x_test = preprocess(x_test)

* The line **imgs = imgs.astype('float32') / 255.0** converts pixel values, which range from 0 to 255, to a floating-point range between 0 and 1. This normalization helps the model learn more efficiently by ensuring pixel values are within a consistent range.

* Padding the images: This line pads each
28
×
28
28×28 image to a
32
×
32
32×32 image by adding a 2-pixel border of zeros around each side. Padding is useful here for a few reasons:

* It makes the images compatible with standard models or architectures, which often work better with
32
×
32
32×32 images.
Padding helps the model retain the original image’s features while extending the input size slightly.
* The argument ((0, 0), (2, 2), (2, 2)) means:

> (0, 0): No padding on the first dimension, which represents the number of images in the dataset.

> (2, 2): Add 2 pixels of padding to both the top and bottom in the height dimension.

> (2, 2): Add 2 pixels of padding to both sides in the width dimension.

> Setting constant values: constant_values=0.0 pads with zeros (black pixels), which maintains consistency for the padded border.

> Adding a channel dimension: This step reshapes the data to include a channel dimension, converting each
32
×
32
32×32 image to a shape of
32
×
32
×
1
32×32×1, where 1 represents the grayscale channel.

In [5]:
# Show some items of clothing from the training set
display(x_train)

array([[[[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        ...,

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]]],


       [[[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        [[0.],
         [0.],
         [0.],
         ...,
         [0.],
         [0.],
         [0.]],

        ...,

        [[0.],
 

# **2. Build the autoencoder**

In [6]:
# Encoder
encoder_input = layers.Input(shape=(IMAGE_SIZE, IMAGE_SIZE, CHANNELS), name="encoder_input")
x = layers.Conv2D(32, (3, 3), strides=2, activation="relu", padding="same")(encoder_input)
x = layers.Conv2D(64, (3, 3), strides=2, activation="relu", padding="same")(x)
x = layers.Conv2D(128, (3, 3), strides=2, activation="relu", padding="same")(x)
shape_before_flattening = K.int_shape(x)[1:]  # the decoder will need this!
x = layers.Flatten()(x)
encoder_output = layers.Dense(EMBEDDING_DIM, name="encoder_output")(x)
encoder = models.Model(encoder_input, encoder_output)
encoder.summary()

In [7]:
# Decoder
decoder_input = layers.Input(shape=(EMBEDDING_DIM,), name="decoder_input")
x = layers.Dense(np.prod(shape_before_flattening))(decoder_input)
x = layers.Reshape(shape_before_flattening)(x)
x = layers.Conv2DTranspose(128, (3, 3), strides=2, activation="relu", padding="same")(x)
x = layers.Conv2DTranspose(64, (3, 3), strides=2, activation="relu", padding="same")(x)
x = layers.Conv2DTranspose(32, (3, 3), strides=2, activation="relu", padding="same")(x)
decoder_output = layers.Conv2D(
    CHANNELS,
    (3, 3),
    strides=1,
    activation="sigmoid",
    padding="same",
    name="decoder_output",)(x)
decoder = models.Model(decoder_input, decoder_output)
decoder.summary()

In [9]:
# Autoencoder
autoencoder = models.Model(encoder_input, decoder(encoder_output) )
autoencoder.summary()

In [10]:
# Compile the autoencoder
autoencoder.compile(optimizer="adam", loss="binary_crossentropy")

In [13]:
autoencoder.fit(
    x_train,
    x_train,
    epochs=EPOCHS,
    batch_size=BATCH_SIZE,
    shuffle=True,
    validation_data=(x_test, x_test)
)

Epoch 1/3
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m199s[0m 327ms/step - loss: 0.3520 - val_loss: 0.2615
Epoch 2/3
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m200s[0m 324ms/step - loss: 0.2580 - val_loss: 0.2566
Epoch 3/3
[1m600/600[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m203s[0m 326ms/step - loss: 0.2547 - val_loss: 0.2554


<keras.src.callbacks.history.History at 0x7f7a72a993c0>

In [14]:
n_to_predict = 5000
example_images = x_test[:n_to_predict]
example_labels = y_test[:n_to_predict]

In [15]:
predictions = autoencoder.predict(example_images)

print("Example real clothing items")
display(example_images)
print("Reconstructions")
display(predictions)

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 32ms/step
Example real clothing items


array([[[[0.        ],
         [0.        ],
         [0.        ],
         ...,
         [0.        ],
         [0.        ],
         [0.        ]],

        [[0.        ],
         [0.        ],
         [0.        ],
         ...,
         [0.        ],
         [0.        ],
         [0.        ]],

        [[0.        ],
         [0.        ],
         [0.        ],
         ...,
         [0.        ],
         [0.        ],
         [0.        ]],

        ...,

        [[0.        ],
         [0.        ],
         [0.        ],
         ...,
         [0.        ],
         [0.        ],
         [0.        ]],

        [[0.        ],
         [0.        ],
         [0.        ],
         ...,
         [0.        ],
         [0.        ],
         [0.        ]],

        [[0.        ],
         [0.        ],
         [0.        ],
         ...,
         [0.        ],
         [0.        ],
         [0.        ]]],


       [[[0.        ],
         [0.        ],
         [0.  

Reconstructions


array([[[[1.06818188e-04],
         [2.66423085e-05],
         [4.89159138e-05],
         ...,
         [1.61061380e-05],
         [2.16057797e-05],
         [8.70811171e-04]],

        [[8.65911989e-06],
         [2.28603744e-06],
         [4.18625677e-06],
         ...,
         [3.58730790e-06],
         [1.48812308e-06],
         [1.28112733e-04]],

        [[4.56395537e-05],
         [1.37595898e-05],
         [6.61662416e-05],
         ...,
         [1.16873547e-04],
         [2.86358954e-05],
         [4.99755319e-04]],

        ...,

        [[1.28709007e-05],
         [7.73575721e-06],
         [7.22730183e-05],
         ...,
         [8.25342704e-06],
         [6.41981160e-07],
         [4.29925058e-05]],

        [[2.36601372e-05],
         [5.02337843e-06],
         [3.54854237e-05],
         ...,
         [5.51412222e-06],
         [1.00390798e-06],
         [6.22196021e-05]],

        [[4.08526626e-04],
         [1.48300169e-04],
         [2.08390542e-04],
         ...,
 

In [16]:
n_to_predict = 5000
example_images = x_test[:n_to_predict]
example_labels = y_test[:n_to_predict]

In [17]:
predictions = autoencoder.predict(example_images)

print("Example real clothing items")
display(example_images)
print("Reconstructions")
display(predictions)

[1m157/157[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 32ms/step
Example real clothing items


array([[[[0.        ],
         [0.        ],
         [0.        ],
         ...,
         [0.        ],
         [0.        ],
         [0.        ]],

        [[0.        ],
         [0.        ],
         [0.        ],
         ...,
         [0.        ],
         [0.        ],
         [0.        ]],

        [[0.        ],
         [0.        ],
         [0.        ],
         ...,
         [0.        ],
         [0.        ],
         [0.        ]],

        ...,

        [[0.        ],
         [0.        ],
         [0.        ],
         ...,
         [0.        ],
         [0.        ],
         [0.        ]],

        [[0.        ],
         [0.        ],
         [0.        ],
         ...,
         [0.        ],
         [0.        ],
         [0.        ]],

        [[0.        ],
         [0.        ],
         [0.        ],
         ...,
         [0.        ],
         [0.        ],
         [0.        ]]],


       [[[0.        ],
         [0.        ],
         [0.  

Reconstructions


array([[[[1.06818188e-04],
         [2.66423085e-05],
         [4.89159138e-05],
         ...,
         [1.61061380e-05],
         [2.16057797e-05],
         [8.70811171e-04]],

        [[8.65911989e-06],
         [2.28603744e-06],
         [4.18625677e-06],
         ...,
         [3.58730790e-06],
         [1.48812308e-06],
         [1.28112733e-04]],

        [[4.56395537e-05],
         [1.37595898e-05],
         [6.61662416e-05],
         ...,
         [1.16873547e-04],
         [2.86358954e-05],
         [4.99755319e-04]],

        ...,

        [[1.28709007e-05],
         [7.73575721e-06],
         [7.22730183e-05],
         ...,
         [8.25342704e-06],
         [6.41981160e-07],
         [4.29925058e-05]],

        [[2.36601372e-05],
         [5.02337843e-06],
         [3.54854237e-05],
         ...,
         [5.51412222e-06],
         [1.00390798e-06],
         [6.22196021e-05]],

        [[4.08526626e-04],
         [1.48300169e-04],
         [2.08390542e-04],
         ...,
 