In [38]:
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from torchvision import datasets
import torch
import torch.nn as nn
import torch.optim as optim

In [71]:
train_data = datasets.FashionMNIST('data', download=True, train=True)
train_X = train_data.data.float()
train_y = train_data.targets
test_data = datasets.FashionMNIST('data', download=True, train=False)
test_X = test_data.data.float()
test_y = test_data.targets
labels = ['T-shirt/top', 'Trouser', 'Pullover', 'Dress', 'Coat',
'Sandal', 'Shirt', 'Sneaker', 'Bag', 'Ankle boot']
print(train_X.size(),train_y.size())

torch.Size([60000, 28, 28]) torch.Size([60000])


In [72]:
train_X=train_X.unsqueeze(1)
test_X=test_X.unsqueeze(1)
train_X = torch.tensor(np.pad(train_X, ((0,0),(0,0),(2,2),(2,2)), 'constant')) 
test_X = torch.tensor(np.pad(test_X, ((0,0),(0,0),(2,2),(2,2)), 'constant'))


train_X.size()
all_idx = np.arange(len(train_X))
np.random.shuffle(all_idx)
train_idx = all_idx[:50000]
dev_idx = all_idx[50000:]
dev_X = train_X[dev_idx]
dev_y = train_y[dev_idx]
train_X = train_X[train_idx]
train_y = train_y[train_idx]
train_X.size()

torch.Size([50000, 1, 32, 32])

In [73]:
class BatchedIterator:
    def __init__(self, X, y, batch_size):
        self.X = X
        self.y = y
        self.batch_size = batch_size
    
    def iterate_once(self):
        for start in range(0, len(self.X), self.batch_size):
            end = start + self.batch_size
            yield self.X[start:end], self.y[start:end]

In [74]:
import torch.nn.functional as F
F.relu(torch.tensor([-5]))
train_X.size()[1:]

torch.Size([1, 32, 32])

In [78]:
class Convolutional(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1= nn.Conv2d(1,out_channels=6,kernel_size=5) #input:(N,C_in,H_in,W_in)
        self.conv2= nn.Conv2d(6,16,kernel_size=5)
        self.conv3= nn.Conv2d(16,120,kernel_size=5)
        
        self.max_pool=nn.MaxPool2d(2,2) #kernel,stride
        
        self.dense_1=nn.Linear(120,84)
        self.dense_2=nn.Linear(84,10)
        
        
    def forward(self, X):
        h=F.relu(self.conv1(X))
        h=self.max_pool(h)
        h=F.relu(self.conv2(h))
        h=self.max_pool(h)
        h=F.relu(self.conv3(h))
        h=h.view(-1,self.num_flat_features(h))
        h=F.relu(self.dense_1(h))
        out=F.relu(self.dense_2(h))
        return out
    
    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features

In [79]:
model_conv=Convolutional()

for n, p in model_conv.named_parameters():
    print(n, p.size())
    
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_conv.parameters())

conv1.weight torch.Size([6, 1, 5, 5])
conv1.bias torch.Size([6])
conv2.weight torch.Size([16, 6, 5, 5])
conv2.bias torch.Size([16])
conv3.weight torch.Size([120, 16, 5, 5])
conv3.bias torch.Size([120])
dense_1.weight torch.Size([84, 120])
dense_1.bias torch.Size([84])
dense_2.weight torch.Size([10, 84])
dense_2.bias torch.Size([10])


In [80]:
#conv model
batch_size = 1000
train_iter = BatchedIterator(train_X, train_y, batch_size)
dev_iter = BatchedIterator(dev_X, dev_y, batch_size)
test_iter = BatchedIterator(test_X, test_y, batch_size)

all_train_loss = []
all_dev_loss = []
all_train_acc = []
all_dev_acc = []

n_epochs = 10
for epoch in range(n_epochs):
    # training loop
    for bi, (batch_x, batch_y) in enumerate(train_iter.iterate_once()):
        
        y_out = model_conv(batch_x) #forward prop
        loss = criterion(y_out, batch_y) #computes loss of batch
        optimizer.zero_grad() #sets gradients of all model parameters to zero
        loss.backward() #computes gradient 
        optimizer.step() #performs optimalisation step with adam
        
    # one train epoch finished, evaluate on the train and the dev set (NOT the test)
    train_out = model_conv(train_X)
    train_loss = criterion(train_out, train_y)
    all_train_loss.append(train_loss.item()) #.item() tensor -> int
    train_pred = train_out.max(axis=1)[1]
    train_acc = torch.eq(train_pred, train_y).sum().float() / len(train_X) #.eq(x,y) counts x[i]=y[i]
    all_train_acc.append(train_acc)
    
    dev_out = model_conv(dev_X)
    dev_loss = criterion(dev_out, dev_y)
    all_dev_loss.append(dev_loss.item())
    dev_pred = dev_out.max(axis=1)[1]
    dev_acc = torch.eq(dev_pred, dev_y).sum().float() / len(dev_X)
    all_dev_acc.append(dev_acc)
    
    print(f"Epoch: {epoch}\n  train accuracy: {train_acc}  train loss: {train_loss}")
    print(f"  dev accuracy: {dev_acc}  dev loss: {dev_loss}")

Epoch: 0
  train accuracy: 0.6695399880409241  train loss: 0.9920851588249207
  dev accuracy: 0.6715999841690063  dev loss: 0.9942250847816467
Epoch: 1
  train accuracy: 0.7002000212669373  train loss: 0.8279196619987488
  dev accuracy: 0.6972000002861023  dev loss: 0.841567873954773
Epoch: 2
  train accuracy: 0.7117199897766113  train loss: 0.7849411964416504
  dev accuracy: 0.7088000178337097  dev loss: 0.7995408177375793
Epoch: 3
  train accuracy: 0.7215399742126465  train loss: 0.7590897083282471
  dev accuracy: 0.7195000052452087  dev loss: 0.775739312171936
Epoch: 4
  train accuracy: 0.7283400297164917  train loss: 0.7377325296401978
  dev accuracy: 0.7253000140190125  dev loss: 0.7566822171211243
Epoch: 5
  train accuracy: 0.7289999723434448  train loss: 0.7300497889518738
  dev accuracy: 0.7232000231742859  dev loss: 0.7545873522758484
Epoch: 6
  train accuracy: 0.732420027256012  train loss: 0.7094653248786926
  dev accuracy: 0.7249000072479248  dev loss: 0.7400246858596802
Ep

(7200000, 720000)