Let's implement a 3-layer neural network manually to make sure I understand basic NN architecture

This notebook uses LeCun et al's MNIST database of handwritten digits to train and test a 3-layer fully connected neural network, without using PyTorch or TensorFlow.

Verify dataset exists or download it:

In [1]:
import os
import requests

datasets = {
    'training set images': 'train-images-idx3-ubyte.gz',
    'training set labels': 'train-labels-idx1-ubyte.gz',
    'test set images': 't10k-images-idx3-ubyte.gz',
    'test set labels': 't10k-labels-idx1-ubyte.gz',
}
remote_path = 'http://yann.lecun.com/exdb/mnist/'
remote_path = 'https://cseweb.ucsd.edu/~weijian/static/datasets/mnist/' # lecun's site gives http error 403 (2024/05/26)
local_path = 'datasets'

if not os.path.exists(local_path):
    os.mkdir(local_path)

for dataset_name, filename in datasets.items():
    local_file = os.path.join(local_path, filename)
    if os.path.exists(local_file):
        print(f'{dataset_name} ({local_file}) exists')
    else:
        print(f'Downloading {dataset_name} from {remote_path + filename}')
        response = requests.get(remote_path + filename)
        response.raise_for_status()
        # Open file in write-binary mode, write response content to it:
        with open(local_file, 'wb') as file:
            file.write(response.content)
            print(f"{dataset_name} saved to {local_file}")


Downloading training set images from https://cseweb.ucsd.edu/~weijian/static/datasets/mnist/train-images-idx3-ubyte.gz
training set images saved to datasets/train-images-idx3-ubyte.gz
Downloading training set labels from https://cseweb.ucsd.edu/~weijian/static/datasets/mnist/train-labels-idx1-ubyte.gz
training set labels saved to datasets/train-labels-idx1-ubyte.gz
Downloading test set images from https://cseweb.ucsd.edu/~weijian/static/datasets/mnist/t10k-images-idx3-ubyte.gz
test set images saved to datasets/t10k-images-idx3-ubyte.gz
Downloading test set labels from https://cseweb.ucsd.edu/~weijian/static/datasets/mnist/t10k-labels-idx1-ubyte.gz
test set labels saved to datasets/t10k-labels-idx1-ubyte.gz


Load the training data.

Format spec:

<small><pre>
TRAINING SET LABEL FILE (train-labels-idx1-ubyte):
[offset] [type]          [value]          [description]
0000     32 bit integer  0x00000801(2049) magic number (MSB first)
0004     32 bit integer  60000            number of items
0008     unsigned byte   ??               label (values 0 to 9)
........
xxxx     unsigned byte   ??               label

TRAINING SET IMAGE FILE (train-images-idx3-ubyte):
[offset] [type]          [value]          [description]
0000     32 bit integer  0x00000803(2051) magic number
0004     32 bit integer  60000            number of images
0008     32 bit integer  28               number of rows
0012     32 bit integer  28               number of columns
0016     unsigned byte   ??               pixel (organized row-wise, values 0..255, 0 means background, 255 means foreground)
........
xxxx     unsigned byte   ??               pixel
</pre></small>

In [None]:
import gzip
import struct
import numpy as np

label_file = local_path + '/' + datasets['training set labels']
image_file = local_path + '/' + datasets['training set images']

def check_data(label_file:str, image_file:str):
    '''check if data files look correct and have the same number of records'''
    with gzip.open(label_file, 'rb') as gz_labels:
        labels_data_header = gz_labels.read(8)
        labels_magic_num = struct.unpack('>i', labels_data_header[0:4])[0]
        labels_count = struct.unpack('>i', labels_data_header[4:8])[0]
    with open(label_file, 'rb') as labels:
        # gzip stores the uncompressed size as the last 4 bytes of the file
        labels.seek(-4, 2)
        file_footer = labels.read(4)
        labels_data_size = struct.unpack('<I', file_footer)[0]
    with gzip.open(image_file, 'rb') as gz_images:
        images_data_header = gz_images.read(16)
        images_magic_num = struct.unpack('>i', images_data_header[0:4])[0]
        images_count = struct.unpack('>i', images_data_header[4:8])[0]
        image_format_rows = struct.unpack('>i', images_data_header[8:12])[0]
        image_format_cols = struct.unpack('>i', images_data_header[12:16])[0]
    with open(image_file, 'rb') as images:
        images.seek(-4, 2)
        file_footer = images.read(4)
        images_data_size = struct.unpack('<I', file_footer)[0]

    assert labels_magic_num == 2049
    assert images_magic_num == 2051
    assert labels_count == images_count
    assert labels_data_size == labels_count + 8
    assert images_data_size == images_count*image_format_rows*image_format_cols + 16

    return(images_count, [image_format_rows, image_format_cols])


(num_records, image_dimensions) = check_data(label_file, image_file)

with gzip.open(image_file, 'rb') as f:
    f.seek(8)
    image_data = f.read()
    np.frombuffer(

    # for i, label in enumerate(struct.unpack('>i', x)[0] for x in image_data[8:]):
    #     # rl = struct.unpack('>i', label)[0]
    #     print(i, label)
    #     # data[0]['label'] = 

# def load_data(label_file, image_file):
#     '''Take a gzipped label and image file and load them into a DF'''