In [2]:
import zipfile
import os
import shutil

zip_path = "/content/archive (3).zip"
extract_path = "./mnist_data"
target_path = os.path.join(extract_path, "MNIST", "raw")

# Create the target directory if it doesn't exist
os.makedirs(target_path, exist_ok=True)

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

# Move the extracted files to the target directory
for filename in os.listdir(extract_path):
    if os.path.isfile(os.path.join(extract_path, filename)):
        shutil.move(os.path.join(extract_path, filename), target_path)


print(f"Files extracted and moved to: {target_path}")
print("Files in target directory:", os.listdir(target_path))

Files extracted and moved to: ./mnist_data/MNIST/raw
Files in target directory: ['train-labels.idx1-ubyte', 't10k-labels.idx1-ubyte', 't10k-images.idx3-ubyte', 'train-images.idx3-ubyte']


In [3]:
for root, dirs, files in os.walk(extract_path):
    print(root, dirs[:3], files[:3])

./mnist_data ['MNIST', 'train-images-idx3-ubyte', 'train-labels-idx1-ubyte'] []
./mnist_data/MNIST ['raw'] []
./mnist_data/MNIST/raw [] ['train-labels.idx1-ubyte', 't10k-labels.idx1-ubyte', 't10k-images.idx3-ubyte']
./mnist_data/train-images-idx3-ubyte [] ['train-images-idx3-ubyte']
./mnist_data/train-labels-idx1-ubyte [] ['train-labels-idx1-ubyte']
./mnist_data/t10k-labels-idx1-ubyte [] ['t10k-labels-idx1-ubyte']
./mnist_data/t10k-images-idx3-ubyte [] ['t10k-images-idx3-ubyte']


In [4]:
import tensorflow as tf

# 1. Load MNIST directly (no cache_dir argument here)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# 2. Normalize to [0,1] and expand channel dimension (grayscale → (28,28,1))
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

x_train = tf.expand_dims(x_train, -1)  # shape: (60000, 28, 28, 1)
x_test = tf.expand_dims(x_test, -1)    # shape: (10000, 28, 28, 1)

# 3. Resize to (32,32,1) for ResNet compatibility
x_train = tf.image.resize(x_train, [32, 32])
x_test = tf.image.resize(x_test, [32, 32])

print("Train set:", x_train.shape, y_train.shape)
print("Test set:", x_test.shape, y_test.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Train set: (60000, 32, 32, 1) (60000,)
Test set: (10000, 32, 32, 1) (10000,)


In [5]:
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt

In [6]:
x_train = x_train /255.0
x_test = x_test /255.0

In [7]:
x_train_rgb = tf.image.grayscale_to_rgb(x_train)  # (60000, 32, 32, 3)
x_test_rgb = tf.image.grayscale_to_rgb(x_test)    # (10000, 32, 32, 3)

In [8]:
def residual_block(x, filters, downsample=False):
    shortcut = x
    strides = 1

    # Check if downsampling is needed OR if the number of filters is changing
    if downsample or x.shape[-1] != filters:
        strides = 2 if downsample else 1 # Use stride 2 only for downsampling
        shortcut = layers.Conv2D(filters, (1,1), strides=strides, padding="same")(shortcut)
        shortcut = layers.BatchNormalization()(shortcut)

    x = layers.Conv2D(filters, (3,3), strides=strides, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    x = layers.Conv2D(filters, (3,3), strides=1, padding="same")(x)
    x = layers.BatchNormalization()(x)

    x = layers.add([shortcut, x])
    x = layers.ReLU()(x)
    return x

inputs = layers.Input(shape=(32,32,1))
x = layers.Conv2D(32, (3,3), strides=1, padding="same")(inputs)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)

# Add residual blocks
x = residual_block(x, 32) # filters: 32 -> 32 (no change, no downsample)
x = residual_block(x, 32, downsample=True) # filters: 32 -> 32 (downsample)
x = residual_block(x, 64) # filters: 32 -> 64 (change, no downsample) - This is where the error occurred
x = residual_block(x, 64, downsample=True) # filters: 64 -> 64 (downsample)
x = residual_block(x, 128) # filters: 64 -> 128 (change, no downsample)

x = layers.GlobalAveragePooling2D()(x)
outputs = layers.Dense(10, activation='softmax')(x)

model = models.Model(inputs, outputs)
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.summary()


In [None]:
history = model.fit(
    x_train_rgb if 'rgb' in locals() else x_train,
    y_train,
    epochs=1,
    batch_size=128,
    validation_data=(x_test_rgb if 'rgb' in locals() else x_test, y_test)
)

[1m469/469[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m955s[0m 2s/step - accuracy: 0.8922 - loss: 0.3805 - val_accuracy: 0.1135 - val_loss: 5.9291
