In [1]:
import time
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn.functional as F
import torch

In [2]:
# Hyperparameters
RANDOM_SEED = 1
BATCH_SIZE = 100
NUM_EPOCHS = 100
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [38]:
# Load dataswet
train_dataset = datasets.MNIST(
    root="data", train=True, transform=transforms.ToTensor(), download=True)

test_dataset = datasets.MNIST(root='data', 
                              train=False, 
                              transform=transforms.ToTensor())

train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=False)

# Check the datasets
for images, labels in train_loader:
    print("Image batch dimensions: ", images.shape)
    print("Image label dimensions: ", labels.shape)
    break

Image batch dimensions:  torch.Size([100, 1, 28, 28])
Image label dimensions:  torch.Size([100])


In [48]:
class MLP(torch.nn.Module):

    def __init__(self, num_features, num_hidden, num_classes):
        super().__init__()
        
        self.num_classes = num_classes
        
        ### 1st hidden layer
        self.linear_1 = torch.nn.Linear(num_features, num_hidden)

        ### Output layer
        self.linear_out = torch.nn.Linear(num_hidden, num_classes)

    def forward(self, x):
        out = self.linear_1(x)
        out = torch.sigmoid(out)
        logits = self.linear_out(out)
        #probas = torch.softmax(logits, dim=1)
        return logits#, probas


# Model initialization
torch.manual_seed(RANDOM_SEED)

model = MLP(num_features=28*28, num_hidden=100, num_classes=10)

model = model.to(DEVICE)

optimizer = torch.optim.SGD(model.parameters(), lr=0.1)



In [49]:
# compute loss (helper function)
def compute_loss(net, data_loader):
    curr_loss = 0. 
    with torch.no_grad():
        for batch_num, (features, targets) in enumerate(data_loader):
            features = features.view(-1, 28*28).to(DEVICE)
            targets = targets.to(DEVICE)
            logits = net(features)
            loss = F.cross_entropy(logits, targets)
            curr_loss += loss
        return curr_loss/batch_num

In [50]:

# Train
start_time = time.time()
minibatch_cost = []
epoch_cost = []

for epoch in range(NUM_EPOCHS):
    model.train()
    for batch_idx, (features, targets) in enumerate(train_loader):
        features = features.view(-1, 28*28).to(DEVICE)
        targets = targets.to(DEVICE)
        
        # Model and backprop
        logit_preds = model(features)
        cost = F.cross_entropy(logit_preds, targets) 
        
        cost.backward() # compute gradient
        
        # Update weights
        optimizer.step()
        
        # Logging 
        minibatch_cost.append(cost.item())
        if not batch_idx % 50:
            print(f"Epoch : {epoch+1} | Batch {batch_idx}/{len(train_loader)} | Cost {cost.item():.3f}")
    
    # Compute train loss
    cost = compute_loss(model, train_loader)
    epoch_cost.append(cost)
    print(f"Epoch: {epoch + 1}/{NUM_EPOCHS} | Train cost: {cost:.2f}")
    print(f"Time elapsed: {(time.time() - start_time)/60}")
    
print(f"Total training time: {(time.time() - start_time)/60}")
    

            
            

Epoch : 1 | Batch 0/600 | Cost 2.337
Epoch : 1 | Batch 50/600 | Cost 1.092
Epoch : 1 | Batch 100/600 | Cost 0.329
Epoch : 1 | Batch 150/600 | Cost 0.630
Epoch : 1 | Batch 200/600 | Cost 1.015
Epoch : 1 | Batch 250/600 | Cost 0.868
Epoch : 1 | Batch 300/600 | Cost 0.348
Epoch : 1 | Batch 350/600 | Cost 0.390
Epoch : 1 | Batch 400/600 | Cost 0.464
Epoch : 1 | Batch 450/600 | Cost 0.810
Epoch : 1 | Batch 500/600 | Cost 0.994
Epoch : 1 | Batch 550/600 | Cost 0.918
Epoch: 1/100 | Train cost: 0.88
Time elapsed: 0.17378560304641724
Epoch : 2 | Batch 0/600 | Cost 0.864
Epoch : 2 | Batch 50/600 | Cost 1.141
Epoch : 2 | Batch 100/600 | Cost 1.400
Epoch : 2 | Batch 150/600 | Cost 1.706
Epoch : 2 | Batch 200/600 | Cost 0.982
Epoch : 2 | Batch 250/600 | Cost 1.076
Epoch : 2 | Batch 300/600 | Cost 0.983
Epoch : 2 | Batch 350/600 | Cost 0.795
Epoch : 2 | Batch 400/600 | Cost 0.681
Epoch : 2 | Batch 450/600 | Cost 1.610
Epoch : 2 | Batch 500/600 | Cost 2.001
Epoch : 2 | Batch 550/600 | Cost 3.421
Epoc