In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split
import numpy as np
from tqdm import tqdm

In [None]:
# load data
mnist = fetch_openml('mnist_784', cache=False)
mnist.data.shape

(70000, 784)

In [None]:
class MyDataset(Dataset):
  def __init__(self, x, y):
    self.data = x
    self.labels = y

  def __len__(self):
    return len(self.labels)
  
  def __getitem__(self, index):
    return self.data[index], self.labels[index]

In [None]:
# preprocess data
data = mnist.data.astype('float32')
label = mnist.target.astype('int64')
data /= 255.0
X = []
y = []
for i in range(len(label)): 
  X.append(torch.reshape(torch.tensor(data.iloc[i]), (1, 28, 28)))
  y.append(label.iloc[i])
dataset = MyDataset(X,y)
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [50000, 20000])

In [None]:
image, label = train_dataset[100]
print(type(image))
print(type(label))

<class 'torch.Tensor'>
<class 'numpy.int64'>


In [None]:
image.is_cuda

False

In [None]:
import matplotlib.pyplot as plt
a = torch.tensor(X.iloc[2])
a = torch.reshape(a, (1, 28, 28))
plt.imshow(a.squeeze())

AttributeError: ignored

In [None]:
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=512, shuffle=True, num_workers=2)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=512, shuffle=False, num_workers=2)

In [None]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, 5)
        self.conv2 = nn.Conv2d(10, 10, 3)
        self.fc1 = nn.Linear(250, 120)
        self.fc2 = nn.Linear(120, 60)
        self.fc3 = nn.Linear(60, 10)
        self.dropout1 = nn.Dropout(0.10)
        self.dropout2 = nn.Dropout(0.50)

    def forward(self, x):
        x = F.relu(F.max_pool2d((self.conv1(x)), (2, 2)))
        x = self.dropout1(x)
        x = F.relu(F.max_pool2d((self.conv2(x)), 2))
        x = torch.flatten(x, 1) 
        x = F.relu(self.fc1(x))
        x = self.dropout2(x)
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        # softmax is not used here as the predefined loss function automatically assigns it
        return x

net = Net()
net = net.to(device)
criterion = nn.CrossEntropyLoss()

In [None]:
optimizer = optim.SGD(net.parameters(), lr=0.01, momentum=0.5)
net = net.float()
net.train()
for epoch in range(100):  # loop over the dataset multiple times
    print("\nStarting epoch {}".format(epoch+1))
    
    total = 0
    running_loss = 0.0

    # to make a beautiful progress bar
    loader = tqdm(enumerate(train_loader), total=len(train_loader))
    for i, data in loader:
        # get the data points
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        # zero the parameter gradients (else, they are accumulated)
        optimizer.zero_grad()

        # forward the data through the network
        outputs = net(inputs.float())
        # calculate the loss given the output of the network and the target labels
        loss = criterion(outputs, labels)
        # calculate the gradients of the network w.r.t. its parameters
        loss.backward()
        # Let the optimiser take an optimization step using the calculated gradients
        optimizer.step()
        
        running_loss += loss
        total += outputs.size(0)

        loader.set_description("loss: {:.5f}".format(running_loss/total))

print('Finished Training')


Starting epoch 1


loss: 0.00021:  92%|█████████▏| 90/98 [00:01<00:00, 74.86it/s]


KeyboardInterrupt: ignored

In [None]:
inputs.size()

torch.Size([512, 1, 28, 28])

In [None]:
labels.size()

torch.Size([512])

In [None]:
outputs.data.size()

torch.Size([512, 10])

In [None]:
net.eval()
class Accuracy:
    """A class to keep track of the accuracy while training"""
    def __init__(self):
        self.correct = 0
        self.total = 0
        
    def reset(self):
        """Resets the internal state"""
        self.correct = 0
        self.total = 0
        
    def update(self, output, labels):
        """
        Updates the internal state to later compute the overall accuracy
        
        output: the output of the network for a batch
        labels: the target labels
        """
        _, predicted = torch.max(output.data, 1) # predicted now contains the predicted class index/label
        
        self.total += labels.size(0)
        self.correct += (predicted == labels).sum().item() # .item() gets the number, not the tensor

    def compute(self):
        return self.correct/self.total

accuracy = Accuracy()

accuracy.reset()
# Gradients are calculated on the forward pass for every iteration.
# As we do not need gradients now, we can disable the calculation.
with torch.no_grad():
    for data in tqdm(train_loader):
        # get the data points
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        # forward the data through the network
        outputs = net(inputs.float())
        
        accuracy.update(outputs, labels)

print("Training Accuracy: {:.2f}%".format(100 * accuracy.compute()))

accuracy.reset()        
with torch.no_grad():
    for data in tqdm(val_loader):
        # get the data points
        inputs, labels = data
        inputs, labels = inputs.to(device), labels.to(device)
        # forward the data through the network
        outputs = net(inputs.float())
        
        accuracy.update(outputs, labels)
        
print("\nTesting Accuracy: {:.2f}%".format(100 * accuracy.compute()))

100%|██████████| 98/98 [00:00<00:00, 107.59it/s]


Training Accuracy: 98.52%


100%|██████████| 40/40 [00:00<00:00, 73.56it/s]


Testing Accuracy: 97.97%



