In [2]:
import gzip
import struct
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

#create custom class to load data

class FashionMNIST(Dataset):
    def __init__(self, images_path, labels_path):
        self.images = self._read_images(images_path)
        self.labels = self._read_labels(labels_path)

    def _read_images(self, path):
        with gzip.open(path, 'rb') as f:
            magic, num, rows, cols = struct.unpack(">IIII", f.read(16))
            data = np.frombuffer(f.read(), dtype=np.uint8).reshape(num, 1, rows, cols)
            data = data.astype(np.float32) / 255.0
            return torch.tensor(data, dtype=torch.float32)

    def _read_labels(self, path):
        with gzip.open(path, 'rb') as f:
            magic, num = struct.unpack(">II", f.read(8))
            data = np.frombuffer(f.read(), dtype=np.uint8)
            return torch.tensor(data, dtype=torch.long)

    def __len__(self):
        return len(self.labels)

    def __getitem__(self, idx):
        return self.images[idx], self.labels[idx]


In [3]:
# define paths for data access
data_path = '/Users/danieluehling/Documents/GitHub/fashion-mnist/data/fashion'

train_images_path = '/Users/danieluehling/Documents/GitHub/fashion-mnist/data/fashion/train-images-idx3-ubyte.gz'
train_labels_path = '/Users/danieluehling/Documents/GitHub/fashion-mnist/data/fashion/train-labels-idx1-ubyte.gz'
test_images_path  = '/Users/danieluehling/Documents/GitHub/fashion-mnist/data/fashion/t10k-images-idx3-ubyte.gz'
test_labels_path  = '/Users/danieluehling/Documents/GitHub/fashion-mnist/data/fashion/t10k-labels-idx1-ubyte.gz'

# load into dataset
train_data = FashionMNIST(train_images_path, train_labels_path)
test_data  = FashionMNIST(test_images_path, test_labels_path)

# data loaders
train_dataloader = DataLoader(train_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=False)

**ResNet18**

In [7]:
# Define Residual Block

class Residual(nn.Module):
    def __init__(self, num_channels, use_1x1conv=False, strides=1):
        super().__init__()
        self.conv1 = nn.LazyConv2d(num_channels, kernel_size=3, padding=1, stride=strides)
        self.conv2 = nn.LazyConv2d(num_channels, kernel_size=3, padding=1)
        self.bn1 = nn.LazyBatchNorm2d()
        self.bn2 = nn.LazyBatchNorm2d()
        if use_1x1conv:
            self.conv3 = nn.LazyConv2d(num_channels, kernel_size=1, stride=strides)
        else:
            self.conv3 = None

    def forward(self, X):
        Y = F.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.conv3:
            X = self.conv3(X)
        return F.relu(Y + X)

# Define ResNet

class ResNet(nn.Module):
    def __init__(self, arch, num_classes=10):
        super().__init__()
        self.net = nn.Sequential(self.b1())
        for i, b in enumerate(arch):
            self.net.add_module(f'b{i+2}', self.block(*b, first_block=(i==0)))
        self.net.add_module('last', nn.Sequential(
            nn.AdaptiveAvgPool2d((1, 1)),
            nn.Flatten(),
            nn.LazyLinear(num_classes)
        ))

    def b1(self):
        return nn.Sequential(
            nn.LazyConv2d(64, kernel_size=7, stride=2, padding=3),
            nn.LazyBatchNorm2d(), nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2, padding=1)
        )

    def block(self, num_residuals, num_channels, first_block=False):
        blk = []
        for i in range(num_residuals):
            if i == 0 and not first_block:
                blk.append(Residual(num_channels, use_1x1conv=True, strides=2))
            else:
                blk.append(Residual(num_channels))
        return nn.Sequential(*blk)

    def forward(self, x):
        return self.net(x)


# Define ResNet18 Architecture

class ResNet18(ResNet):
    def __init__(self, num_classes=10):
        super().__init__(((2, 64), (2, 128), (2, 256), (2, 512)), num_classes)


# Training and Testing Functions

def train_loop(dataloader, model, loss_fn, optimizer):
    model.train()
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        pred = model(X)
        loss = loss_fn(pred, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 10 == 0:
            current = batch * len(X)
            print(f"loss: {loss.item():>7f} [{current:>5d}/{size:>5d}]")

def test_loop(dataloader, model, loss_fn):
    model.eval()
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Accuracy: {(100 * correct):>0.1f}%, Avg loss: {test_loss:>8f}\n")


# Training Loop

device = torch.device('cpu')

model = ResNet18().to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
epochs = 10

for t in range(epochs):
    print(f"Epoch {t+1}\n" + "-"*30)
    train_loop(train_dataloader, model, loss_fn, optimizer)
    test_loop(test_dataloader, model, loss_fn)

print("training complete")


Epoch 1
------------------------------
loss: 2.610945 [    0/60000]
loss: 0.933686 [  640/60000]
loss: 0.855690 [ 1280/60000]
loss: 0.929078 [ 1920/60000]
loss: 0.642682 [ 2560/60000]
loss: 0.612818 [ 3200/60000]
loss: 0.809693 [ 3840/60000]
loss: 0.546354 [ 4480/60000]
loss: 0.520474 [ 5120/60000]
loss: 0.272110 [ 5760/60000]
loss: 0.599447 [ 6400/60000]
loss: 0.613149 [ 7040/60000]
loss: 0.530319 [ 7680/60000]
loss: 0.419030 [ 8320/60000]
loss: 0.631364 [ 8960/60000]
loss: 0.672996 [ 9600/60000]
loss: 0.553733 [10240/60000]
loss: 0.436740 [10880/60000]
loss: 0.593849 [11520/60000]
loss: 0.560120 [12160/60000]
loss: 0.567501 [12800/60000]
loss: 0.524435 [13440/60000]
loss: 0.431685 [14080/60000]
loss: 0.404833 [14720/60000]
loss: 0.322221 [15360/60000]
loss: 0.535387 [16000/60000]
loss: 0.513838 [16640/60000]
loss: 0.267830 [17280/60000]
loss: 0.404571 [17920/60000]
loss: 0.364049 [18560/60000]
loss: 0.466163 [19200/60000]
loss: 0.777100 [19840/60000]
loss: 0.349647 [20480/60000]
loss

In [8]:
# saving the model 

EPOCH = epochs

# define path

PATH = "assignment3_model.pt"

#save the model

torch.save({
    'epoch': EPOCH,
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict()
    }, PATH)

In [None]:
#load the model

checkpoint = torch.load(PATH)
mode.load_state_dict(checkpoint['model_state_dict'])
optimizer.load_stat_dict(checkpoint['optimizer_state_dict'])
EPOCH = checkpoint['epoch']