## **MNIST**

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np

In [2]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.cuda.manual_seed_all(0)

In [3]:
if torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cpu')

In [4]:
import torchvision
import torchvision.transforms as transforms

train_dataset = torchvision.datasets.MNIST(root="MNIST_data/",
                                           train=True,
                                           transform=transforms.ToTensor(),
                                           download=True)
test_dataset = torchvision.datasets.MNIST(root="MNIST_data/",
                                           train=False,
                                           transform=transforms.ToTensor(),
                                           download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST/raw/train-images-idx3-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST/raw/train-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST/raw/t10k-images-idx3-ubyte.gz to MNIST_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting MNIST_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to MNIST_data/MNIST/raw



In [6]:
x,y = train_dataset[0]
x.shape

torch.Size([1, 28, 28])

In [7]:
batch_size = 128

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

In [8]:
class CNN(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(1, 32, 3, 1, 1)
    self.pool = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(32, 64, 3, 1, 1)
    self.fc1 = nn.Linear(64*7*7, 128)
    self.fc2 = nn.Linear(128, 10)
    self.activation = nn.ReLU()
  
  def forward(self, x):
    x = self.pool(self.activation(self.conv1(x)))
    x = self.pool(self.activation(self.conv2(x)))
    x = torch.flatten(x, 1) # flatten all dimensions except batch
    x = self.activation(self.fc1(x))
    x = self.fc2(x)
    return x

In [9]:
model = CNN().to(device)

In [10]:
optimizer = optim.Adam(model.parameters(), lr=0.001) # set optimizer

In [11]:
criterion = nn.CrossEntropyLoss()

In [13]:
epochs = 30

model.train()
for epoch in range(epochs):
  model.train()
  avg_cost = 0
  total_batch_num = len(train_dataloader)

  for b_x, b_y in train_dataloader:
    logits = model(b_x.to(device)) # forward propagation
    loss = criterion(logits, b_y.to(device)) # get cost

    avg_cost += loss / total_batch_num
    optimizer.zero_grad()
    loss.backward() # backward propagation
    optimizer.step() # update parameters
  print('Epoch : {} / {}, cost : {}'.format(epoch+1, epochs, avg_cost))

Epoch : 1 / 30, cost : 0.049544744193553925
Epoch : 2 / 30, cost : 0.03532176464796066
Epoch : 3 / 30, cost : 0.02807445637881756
Epoch : 4 / 30, cost : 0.021563606336712837
Epoch : 5 / 30, cost : 0.01771136000752449
Epoch : 6 / 30, cost : 0.015364877879619598
Epoch : 7 / 30, cost : 0.011270388029515743
Epoch : 8 / 30, cost : 0.010334578342735767
Epoch : 9 / 30, cost : 0.008652827702462673
Epoch : 10 / 30, cost : 0.008359694853425026
Epoch : 11 / 30, cost : 0.0067559522576630116
Epoch : 12 / 30, cost : 0.0057537308894097805
Epoch : 13 / 30, cost : 0.003946640994399786
Epoch : 14 / 30, cost : 0.0049757217057049274
Epoch : 15 / 30, cost : 0.005081916227936745
Epoch : 16 / 30, cost : 0.005046988371759653
Epoch : 17 / 30, cost : 0.003985259681940079
Epoch : 18 / 30, cost : 0.003964055795222521
Epoch : 19 / 30, cost : 0.0022959981579333544
Epoch : 20 / 30, cost : 0.0024622026830911636
Epoch : 21 / 30, cost : 0.002475167391821742
Epoch : 22 / 30, cost : 0.004593860357999802
Epoch : 23 / 30, 

In [15]:
correct = 0
total = 0

model.eval()
for b_x, b_y in test_dataloader:
  with torch.no_grad():
    logits = model(b_x.to(device))

  probs = nn.Softmax(dim=1)(logits)
  predicts = torch.argmax(logits, dim=1)

  total += len(b_y)
  correct += (predicts == b_y.to(device)).sum().item()

print(f'Accuracy of the network on test images: {100 * correct // total}%')

Accuracy of the network on test images: 99%


## **CIFAR-10**

In [14]:
import torch
import torch.nn as nn
import torch.optim as optim

In [16]:
torch.manual_seed(0)
torch.cuda.manual_seed(0)
torch.cuda.manual_seed_all(0)

In [17]:
if torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cpu')

In [18]:
import torchvision
import torchvision.transforms as transforms

train_dataset = torchvision.datasets.CIFAR10(root="CIFAR10/",
                                             train=True,
                                             transform=transforms.ToTensor(),
                                             download=True)
test_dataset = torchvision.datasets.CIFAR10(root="CIFAR10/",
                                            train=False,
                                            transform=transforms.ToTensor(),
                                            download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to CIFAR10/cifar-10-python.tar.gz


  0%|          | 0/170498071 [00:00<?, ?it/s]

Extracting CIFAR10/cifar-10-python.tar.gz to CIFAR10/
Files already downloaded and verified


In [19]:
batch_size = 128

train_dataloader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataloader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size)

In [20]:
class CNN(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv2d(3, 6, 5)
    self.pool = nn.MaxPool2d(2, 2)
    self.conv2 = nn.Conv2d(6, 16, 5)
    self.fc1 = nn.Linear(16*5*5, 120)
    self.fc2 = nn.Linear(120, 84)
    self.fc3 = nn.Linear(84, 10)
    self.activation = nn.ReLU()
  
  def forward(self, x):
    x = self.pool(self.activation(self.conv1(x)))
    x = self.pool(self.activation(self.conv2(x)))
    x = torch.flatten(x, 1) # flatten all dimensions except batch
    x = self.activation(self.fc1(x))
    x = self.activation(self.fc2(x))
    x = self.fc3(x)
    return x

In [21]:
model = CNN().to(device)

In [22]:
optimizer = optim.Adam(model.parameters(), lr=0.001) # set optimizer

In [23]:
criterion = nn.CrossEntropyLoss()

In [24]:
epochs = 30

model.train()
for epoch in range(epochs):
  model.train()
  avg_cost = 0
  total_batch_num = len(train_dataloader)

  for b_x, b_y in train_dataloader:
    logits = model(b_x.to(device)) # forward propagation
    loss = criterion(logits, b_y.to(device)) # get cost

    avg_cost += loss / total_batch_num
    optimizer.zero_grad()
    loss.backward() # backward propagation
    optimizer.step() # update parameters
  print('Epoch : {} / {}, cost : {}'.format(epoch+1, epochs, avg_cost))

Epoch : 1 / 30, cost : 1.8605352640151978
Epoch : 2 / 30, cost : 1.540860891342163
Epoch : 3 / 30, cost : 1.427911639213562
Epoch : 4 / 30, cost : 1.342151165008545
Epoch : 5 / 30, cost : 1.2803467512130737
Epoch : 6 / 30, cost : 1.2282757759094238
Epoch : 7 / 30, cost : 1.1907529830932617
Epoch : 8 / 30, cost : 1.1504677534103394
Epoch : 9 / 30, cost : 1.1273761987686157
Epoch : 10 / 30, cost : 1.096234679222107
Epoch : 11 / 30, cost : 1.0672800540924072
Epoch : 12 / 30, cost : 1.0443137884140015
Epoch : 13 / 30, cost : 1.0183218717575073
Epoch : 14 / 30, cost : 0.9997025728225708
Epoch : 15 / 30, cost : 0.9771366119384766
Epoch : 16 / 30, cost : 0.9679811596870422
Epoch : 17 / 30, cost : 0.9411746859550476
Epoch : 18 / 30, cost : 0.9289048910140991
Epoch : 19 / 30, cost : 0.9131205081939697
Epoch : 20 / 30, cost : 0.8981179594993591
Epoch : 21 / 30, cost : 0.8878688216209412
Epoch : 22 / 30, cost : 0.8775702118873596
Epoch : 23 / 30, cost : 0.8681841492652893
Epoch : 24 / 30, cost : 

In [25]:
correct = 0
total = 0

model.eval()
for b_x, b_y in test_dataloader:
  with torch.no_grad():
    logits = model(b_x.to(device))
  
  probs = nn.Softmax(dim=1)(logits)
  predicts = torch.argmax(logits, dim=1)

  total += len(b_y)
  correct += (predicts == b_y.to(device)).sum().item()

print(f'Accuracy of the network on test images: {100 * correct // total} %')

Accuracy of the network on test images: 63 %
