In [1]:
import math

import matplotlib.pyplot as plt
import torch
from   torch import nn, optim
import torch.nn.functional as F
from   torch.utils.data import DataLoader, TensorDataset
from   torchvision import datasets, transforms

In [2]:
W = torch.randn(256, 4) / math.sqrt(256)
W.requires_grad_()

tensor([[-0.0625, -0.0360,  0.0428, -0.0627],
        [ 0.0290, -0.0901,  0.0452,  0.0170],
        [-0.0469,  0.0030, -0.0849,  0.0567],
        ...,
        [-0.0629,  0.0163,  0.0182, -0.0351],
        [-0.1026,  0.0437,  0.1005,  0.0608],
        [ 0.0586, -0.0132,  0.0254, -0.0386]], requires_grad=True)

In [3]:
b = torch.zeros(4, requires_grad=True)

In [4]:
loss_func = F.cross_entropy
# loss = loss_func(mod(X), y)

In [5]:
#opt = optim.SGD(mod.parameter(), lr=ETA)

In [6]:
'''
with torch.no_grad():
    for param in mod.parameters:
        param -= param.grad * ETA # grad desc step
    mod.zero_grad()
'''

# Equivalent to:
'''
opt.step()
opt.zero_grad()
'''

'\nopt.step()\nopt.zero_grad()\n'

In [7]:
'''
train_dataset = TensorDataset(X_train, y_train)
train_dataloader = DataLoader(train_dataset, batc_size=BATCH)
'''

# now instead of manually iterating through data like:
'''
for i in range((n - 1) // bs + 1):
    X_batch = X_train[start:end]
    y_batch = y_train[start:end]
    preds = mod(X_batch)
'''

# ...can simplify to
'''
for X_batch, y_batch in train_dataloader:
    preds = mod(X_batch)
'''

'\nfor X_batch, y_batch in train_dataloader:\n    preds = mod(X_batch)\n'

In [8]:
points = torch.tensor([1., 4., 2., 1., 3., 5.])
points[0]

tensor(1.)

In [9]:
float(points[0])

1.0

In [10]:
points.shape

torch.Size([6])

In [11]:
points = torch.tensor([[1., 4.], [2., 1.], [3., 5.]])
points.storage()

 1.0
 4.0
 2.0
 1.0
 3.0
 5.0
[torch.FloatStorage of size 6]

In [12]:
points.shape, points.size()

(torch.Size([3, 2]), torch.Size([3, 2]))

In [13]:
points.storage_offset() # index of 1st elem

0

In [14]:
points[1].storage_offset()

2

In [15]:
points.stride() 

(2, 1)

In [16]:
points = torch.tensor([[1., 2.], [3., 4.]], dtype=torch.float32)

In [17]:
points = torch.tensor(
    [[1., 2.], [3., 4.]], dtype=torch.float32, device='cpu')

In [18]:
#points2 = points.to(device='cuda')
#points3 = points.to(device='cuda:0')

In [19]:
# CNN for MNIST
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.cn1 = nn.Conv2d(1, 16, 3, 1)
        self.cn2 = nn.Conv2d(16, 32, 3, 1)
        self.dp1 = nn.Dropout2d(0.1)
        self.dp2 = nn.Dropout2d(0.25)
        self.fc1 = nn.Linear(12 * 12 * 32, 64)
        self.fc2 = nn.Lenear(64, 10)
        
    def forward(self, x):
        x = self.cn1(x)
        x = F.relu(x)
        x = self.cn2(x)
        x = F.relu(x)
        x = F.max_pool2d(x, 2)
        x = self.dp1(x)
        x = torch.flatten(x, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.dp2(x)
        x = self.fc2(x)
        op = F.log_softmax(x, dim=1)
        return op

In [20]:
def train(mod, device, train_dataloader, optim, epoch):
    mod.train()
    for i, (X, y) in enumerate(train_dataloader):
        X, y = X.to(device), y.to(device)
        optim.zero_grad()
        pred_prob = mod(X)
        loss = F.nll_(pred_prob, y) # negative log likelihood
        loss.backward()
        optim.step()
        if i % 10 == 0:
            print(f'epoch: {epoch} '
                  f'[{i * len(X)}/{len(train_dataloader.dataset)} '
                  f'({100. * i / len(train_dataloader)::.2f}%)]\t'
                  f'training loss: {loss.item():.6f}')

In [22]:
def test(mod, device, test_dataloader):
    mod.eval()
    loss = 0
    success = 0
    with torch.no_grad():
        for X, y in test_dataloader:
            X, y = X.to(device), y.to(device)
            pred_prob = mod(X)
            loss += F.nll_loss(pred_prob, y, reduction='sum').items()
            pred = pred_prob.argmax(dim=1, keepdim=True)
            success += pred.eq(y.view_as(pred)).sum().item()
    n = len(test_dataloader.dataset)
    loss /= n
    print(f'\nTest dataset: Overall loss: {loss:.4f}, Overall Accuracy: '
          f'{success}/{n} ({100*success / n:.2f}%)')

In [26]:
DATA = '../../../data'

In [29]:
NORM_MEAN = 0.1302 # train_X.mean() / 256.
NORM_SD = 0.3069 # train_X.std() / 256.
BATCH = 32

In [30]:
train_dataloader = DataLoader(
    datasets.MNIST(
        DATA, 
        train=True, 
        download=True, 
        transform=transforms.Compose([
            transforms.ToTensor(), 
            transforms.Normalize((NORM_MEAN,), (NORM_SD,))])
    ),
    batch_size = BATCH,
    shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../../../data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting ../../../data/MNIST/raw/train-images-idx3-ubyte.gz to ../../../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../../../data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting ../../../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../../../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../../../data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting ../../../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../../../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../../../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting ../../../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../../../data/MNIST/raw



  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [32]:
test_dataloader = DataLoader(
    datasets.MNIST(
        DATA, 
        train=False, 
        transform=transforms.Compose([
            transforms.ToTensor(), 
            transforms.Normalize((NORM_MEAN,), (NORM_SD,))])), 
    batch_size=500, 
    shuffle=False)