In [2]:
# necessary imports
import torch,torchvision
import torch.cuda
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader
from torchvision import datasets
import matplotlib.pyplot as plt
import numpy as np



In [3]:
mydataset = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=torchvision.transforms.ToTensor()
)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to data/FashionMNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/26421880 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/train-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/29515 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/train-labels-idx1-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/4422102 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to data/FashionMNIST/raw

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/5148 [00:00<?, ?it/s]

Extracting data/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to data/FashionMNIST/raw



In [4]:
torch.cuda.is_available()

True

In [5]:
# Realization of LeNet 
import torch.nn as nn

class LeNet(nn.Module):
    def __init__(self):
        super().__init__()

        # convolution layers
        self._body = nn.Sequential(
            # First convolution Layer
            # input size = (32, 32), output size = (28, 28)

            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5),
            # ReLU activation

            nn.ReLU(),

            # Max pool 2-d
            
            nn.MaxPool2d(kernel_size=2),
            
            # Second convolution layer
            # input size = (14, 14), output size = (10, 10)
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2),
            # output size = (5, 5)
        )
        
        # Fully connected layers
        self._head = nn.Sequential(
            # First fully connected layer
            # in_features = total number of weights in last conv layer = 16 * 5 * 5
            nn.Linear(in_features=256, out_features=120), 
            
            # ReLU activation
            nn.ReLU(inplace=True),
            
            # second fully connected layer
            # in_features = output of last linear layer = 120 
            nn.Linear(in_features=120, out_features=84), 
            
            # ReLU activation
            nn.ReLU(inplace=True),
            
            # Third fully connected layer which is also output layer
            # in_features = output of last linear layer = 84
            # and out_features = number of classes = 10 (MNIST data 0-9)
            nn.Linear(in_features=84, out_features=10)
        )

    def forward(self, x):
        # apply feature extractor
        x = self._body(x)
        # flatten the output of conv layers
        # dimension should be batch_size * number_of weight_in_last conv_layer
        x = x.view(x.size()[0], -1)
        # apply classification head
        x = self._head(x)
        return x

In [7]:
import time

# device = torch.device('cpu')
device = torch.device('cuda:0')

# Create model and move model to device
mynn = LeNet()
mynn = mynn.to(device = device)

# Hyper parameters for training
lr = .04
batch_size = 64
N_epochs = 1

# Create dataloaders for training and validation
train_dataloader = DataLoader(mydataset, batch_size = batch_size, shuffle = True)

# Create optimizer
optimizer = torch.optim.SGD(mynn.parameters(), lr = lr) # this line creates a optimizer, and we tell optimizer we are optimizing the parameters in mymodel

losses = [] # training losses of each epoch
validate_losses = [] # validation losses of each epoch
num_batches = len(train_dataloader)

for epoch in range(N_epochs):
    batch_loss = []
    
    per_batch_time = 0.0

    for batch_id, (x_batch, y_batch) in enumerate(train_dataloader):
        start_time = time.time()
        # data to device
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)

        # pass input data to get the prediction outputs by the current model
        prediction = mynn(x_batch)

        # compare prediction and the actual output label and compute the loss
        loss = nn.functional.cross_entropy(prediction,y_batch)

        # compute the gradient
        optimizer.zero_grad()
        loss.backward()

        # update parameters
        optimizer.step()

        end_time = time.time()
        per_batch_time += (end_time - start_time)
        # add this loss to batch_loss for later computation
        batch_loss.append(loss.detach().clone().to("cpu").numpy())
    
    losses.append(np.mean(np.array(batch_loss)))
    per_batch_time = per_batch_time/num_batches
    print(f"Epoch = {epoch}, device = {device}, per_batch_time = {per_batch_time}, train_loss = {losses[-1]}")




Epoch = 0, device = cuda:0, per_batch_time = 0.0019713782552462904, train_loss = 1.4621922969818115
