<a href="https://colab.research.google.com/github/ananyakaligal/My-Machine-Learning-Journey/blob/main/CNN_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install tensorflow



In [2]:
import tensorflow as tf
import numpy as np
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import layers, models
import time

In [3]:
# Define constants
batchSize = 256
numEpochs = 5
trainToLoss = 0.0001
bsr = 1.0  # BatchSizeReduction. 1 = no reduction. 0.25 = reduction to 25% of original.
lrr = 1.0

In [4]:
def load_mnist(train_samples=None, test_samples=None):
    (x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

    # Normalize the pixel values to be between 0 and 1
    x_train = x_train.astype('float32') / 255.0
    x_test = x_test.astype('float32') / 255.0

    # Reshape to add the channel dimension
    x_train = x_train.reshape(-1, 28, 28, 1)
    x_test = x_test.reshape(-1, 28, 28, 1)

    # Convert labels to one-hot encoding
    y_train = tf.keras.utils.to_categorical(y_train, num_classes=10)
    y_test = tf.keras.utils.to_categorical(y_test, num_classes=10)

    if train_samples:
        # Randomly sample the training data
        indices_train = np.random.choice(len(x_train), train_samples, replace=False)
        x_train = x_train[indices_train]
        y_train = y_train[indices_train]

    if test_samples:
        # Randomly sample the testing data
        indices_test = np.random.choice(len(x_test), test_samples, replace=False)
        x_test = x_test[indices_test]
        y_test = y_test[indices_test]

    return x_train, y_train, x_test, y_test

# Example usage: Load 1000 training samples and 200 test samples
x_train, y_train, x_test, y_test = load_mnist(train_samples=10000, test_samples=2000)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz


In [5]:
# Print the shapes of the datasets
print(f"x_train shape: {x_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"x_test shape: {x_test.shape}")
print(f"y_test shape: {y_test.shape}")

x_train shape: (10000, 28, 28, 1)
y_train shape: (10000, 10)
x_test shape: (2000, 28, 28, 1)
y_test shape: (2000, 10)


In [6]:
def format_data(data):
    indices = []
    values = []
    batch_size = data.shape[0]
    height = data.shape[1]
    width = data.shape[2]
    channels = data.shape[3] if len(data.shape) > 3 else 1  # Add channel dimension if present

    for i in range(batch_size):
        for row in range(height):
            for col in range(width):
                for channel in range(channels):
                    indices.append([i, row, col, channel])
                    values.append(float(data[i, row, col, channel]))

    return np.array(indices), np.array(values)

In [7]:
# Formatting data
formatted_x_train_indices, formatted_x_train_values = format_data(x_train)
formatted_x_test_indices, formatted_x_test_values = format_data(x_test)

In [8]:
# Check shapes
print("Shape of formatted_x_train_indices:", formatted_x_train_indices.shape)
print("Shape of formatted_x_train_values:", formatted_x_train_values.shape)

Shape of formatted_x_train_indices: (7840000, 4)
Shape of formatted_x_train_values: (7840000,)


In [9]:
# Define a function to print samples of the formatted data
def print_sample(indices, values, sample_size=30):
    for i in range(sample_size):
        print(f"Indices: {indices[i]}, Value: {values[i]}")

# Print samples
print("Sample of train_indices and train_values:")
print_sample(formatted_x_train_indices, formatted_x_train_values)

Sample of train_indices and train_values:
Indices: [0 0 0 0], Value: 0.0
Indices: [0 0 1 0], Value: 0.0
Indices: [0 0 2 0], Value: 0.0
Indices: [0 0 3 0], Value: 0.0
Indices: [0 0 4 0], Value: 0.0
Indices: [0 0 5 0], Value: 0.0
Indices: [0 0 6 0], Value: 0.0
Indices: [0 0 7 0], Value: 0.0
Indices: [0 0 8 0], Value: 0.0
Indices: [0 0 9 0], Value: 0.0
Indices: [ 0  0 10  0], Value: 0.0
Indices: [ 0  0 11  0], Value: 0.0
Indices: [ 0  0 12  0], Value: 0.0
Indices: [ 0  0 13  0], Value: 0.0
Indices: [ 0  0 14  0], Value: 0.0
Indices: [ 0  0 15  0], Value: 0.0
Indices: [ 0  0 16  0], Value: 0.0
Indices: [ 0  0 17  0], Value: 0.0
Indices: [ 0  0 18  0], Value: 0.0
Indices: [ 0  0 19  0], Value: 0.0
Indices: [ 0  0 20  0], Value: 0.0
Indices: [ 0  0 21  0], Value: 0.0
Indices: [ 0  0 22  0], Value: 0.0
Indices: [ 0  0 23  0], Value: 0.0
Indices: [ 0  0 24  0], Value: 0.0
Indices: [ 0  0 25  0], Value: 0.0
Indices: [ 0  0 26  0], Value: 0.0
Indices: [ 0  0 27  0], Value: 0.0
Indices: [0 1 0 0]

In [10]:
def create_tensor_from_data(indices, values, shape):
    # Convert indices and values to tensors
    indices_tensor = tf.constant(indices, dtype=tf.int64)
    values_tensor = tf.constant(values, dtype=tf.float32)

    # Create sparse tensor
    sparse_tensor = tf.sparse.SparseTensor(
        indices=indices_tensor,
        values=values_tensor,
        dense_shape=shape
    )

    # Convert sparse tensor to dense tensor
    dense_tensor = tf.sparse.to_dense(sparse_tensor)
    return dense_tensor

In [11]:
# Create tensors
shape_x_train = [10000, 28, 28, 1]  # Shape for x_train
x_train_tensor = create_tensor_from_data(formatted_x_train_indices, formatted_x_train_values, shape_x_train)


In [12]:
# Define a function to extract a sample from the tensor
def get_sample_from_tensor(tensor, sample_index):
    # Extract a sample by indexing into the tensor
    sample = tensor[sample_index]
    return sample

# Example usage
sample_index = 0  # Index of the sample you want to extract
sample_tensor = get_sample_from_tensor(x_train_tensor, sample_index)

# Print or visualize the sample
print("Sample Tensor Shape:", sample_tensor.shape)
print("Sample Tensor Values (first few pixels):", sample_tensor.numpy().flatten()[:100])

Sample Tensor Shape: (28, 28, 1)
Sample Tensor Values (first few pixels): [0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.
 0. 0. 0. 0.]


In [13]:
def format_labels_one_hot(labels):
    indices = []
    values = []

    num_samples, num_classes = labels.shape

    for i in range(num_samples):
        one_hot_vector = labels[i]
        for j in range(num_classes):
            indices.append([i, j])
            values.append(float(one_hot_vector[j]))

    return np.array(indices), np.array(values)

In [14]:
# Assuming y_train and y_test are already one-hot encoded
formatted_y_train_indices, formatted_y_train_values = format_labels_one_hot(y_train)
formatted_y_test_indices, formatted_y_test_values = format_labels_one_hot(y_test)

In [15]:
# Print samples
print("Sample of train_indices and train_values of y :")
print_sample(formatted_y_train_indices, formatted_y_train_values)

Sample of train_indices and train_values of y :
Indices: [0 0], Value: 1.0
Indices: [0 1], Value: 0.0
Indices: [0 2], Value: 0.0
Indices: [0 3], Value: 0.0
Indices: [0 4], Value: 0.0
Indices: [0 5], Value: 0.0
Indices: [0 6], Value: 0.0
Indices: [0 7], Value: 0.0
Indices: [0 8], Value: 0.0
Indices: [0 9], Value: 0.0
Indices: [1 0], Value: 1.0
Indices: [1 1], Value: 0.0
Indices: [1 2], Value: 0.0
Indices: [1 3], Value: 0.0
Indices: [1 4], Value: 0.0
Indices: [1 5], Value: 0.0
Indices: [1 6], Value: 0.0
Indices: [1 7], Value: 0.0
Indices: [1 8], Value: 0.0
Indices: [1 9], Value: 0.0
Indices: [2 0], Value: 0.0
Indices: [2 1], Value: 0.0
Indices: [2 2], Value: 0.0
Indices: [2 3], Value: 0.0
Indices: [2 4], Value: 1.0
Indices: [2 5], Value: 0.0
Indices: [2 6], Value: 0.0
Indices: [2 7], Value: 0.0
Indices: [2 8], Value: 0.0
Indices: [2 9], Value: 0.0


In [16]:
# Print the first 5 samples for verification
for i in range(5):
    print(f"Sample {i}:")
    print(f"Original Label: {np.argmax(y_train[i])}")  # Prints the original label
    print(f"One-Hot Encoded: {y_train[i]}")
    print()

Sample 0:
Original Label: 0
One-Hot Encoded: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

Sample 1:
Original Label: 0
One-Hot Encoded: [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]

Sample 2:
Original Label: 4
One-Hot Encoded: [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]

Sample 3:
Original Label: 3
One-Hot Encoded: [0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]

Sample 4:
Original Label: 8
One-Hot Encoded: [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]



In [17]:
def create_tensor_from_labels(indices, values, shape):
    # Convert indices and values to tensors
    indices_tensor = tf.constant(indices, dtype=tf.int64)
    values_tensor = tf.constant(values, dtype=tf.float32)

    # Create sparse tensor
    sparse_tensor = tf.sparse.SparseTensor(
        indices=indices_tensor,
        values=values_tensor,
        dense_shape=shape
    )

    # Convert sparse tensor to dense tensor
    dense_tensor = tf.sparse.to_dense(sparse_tensor)
    return dense_tensor

In [18]:
# Assuming y_train is already one-hot encoded
formatted_y_train_indices, formatted_y_train_values = format_labels_one_hot(y_train)

# Define the shape for the dense tensor
shape_y_train = [y_train.shape[0], y_train.shape[1]]  # [num_samples, num_classes]

# Create tensor
y_train_tensor = create_tensor_from_labels(formatted_y_train_indices, formatted_y_train_values, shape_y_train)


In [19]:
# Print tensor shape
print("Shape of y_train_tensor:", y_train_tensor.shape)

Shape of y_train_tensor: (10000, 10)


In [20]:
# Print tensor shape
print("Shape of x_train_tensor:", x_train_tensor.shape)

Shape of x_train_tensor: (10000, 28, 28, 1)


In [21]:
# Define the CNN model
def create_cnn_model():
    model = models.Sequential()

    # Add layers according to the provided architecture
    model.add(layers.Conv2D(32, (3, 3), input_shape=(28, 28, 1)))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))

    model.add(layers.Conv2D(64, (3, 3)))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))

    model.add(layers.Conv2D(128, (3, 3)))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))

    model.add(layers.Flatten())

    model.add(layers.Dense(256))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))

    model.add(layers.Dense(128))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))

    model.add(layers.Dense(64))
    model.add(layers.BatchNormalization())
    model.add(layers.Activation('relu'))

    model.add(layers.Dense(10))
    model.add(layers.Activation('softmax'))

    return model


In [22]:
# Create and compile the model
model = create_cnn_model()
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['categorical_accuracy'])


In [23]:
# Train the model
history = model.fit(x_train, y_train,
                    batch_size=batchSize,
                    epochs=numEpochs,
                    verbose=1)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [24]:
# Evaluate the model
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=2)
print(f"Test loss: {test_loss}")
print(f"Test accuracy: {test_acc}")

63/63 - 1s - loss: 4.7933 - categorical_accuracy: 0.1020 - 1s/epoch - 17ms/step
Test loss: 4.793262481689453
Test accuracy: 0.10199999809265137
