# Deep Learning Development

## Data Preparation

### Data loading

In [2]:
from torchvision.datasets import CIFAR10

train_data = CIFAR10(root='./data/CIFAR10/train/', train=True, download=True)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/CIFAR10/train/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:27<00:00, 6195728.92it/s]


Extracting ./data/CIFAR10/train/cifar-10-python.tar.gz to ./data/CIFAR10/train/


In [3]:
print("info: ", train_data)
print("len: ", len(train_data))
print("shape: ", train_data.data.shape)
print("targets: ", train_data.targets)
print("classes: ", train_data.classes)
print("class_to_idx ", train_data.class_to_idx)

info:  Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ./data/CIFAR10/train/
    Split: Train
len:  50000
shape:  (50000, 32, 32, 3)
targets:  [6, 9, 9, 4, 1, 1, 2, 7, 8, 3, 4, 7, 7, 2, 9, 9, 9, 3, 2, 6, 4, 3, 6, 6, 2, 6, 3, 5, 4, 0, 0, 9, 1, 3, 4, 0, 3, 7, 3, 3, 5, 2, 2, 7, 1, 1, 1, 2, 2, 0, 9, 5, 7, 9, 2, 2, 5, 2, 4, 3, 1, 1, 8, 2, 1, 1, 4, 9, 7, 8, 5, 9, 6, 7, 3, 1, 9, 0, 3, 1, 3, 5, 4, 5, 7, 7, 4, 7, 9, 4, 2, 3, 8, 0, 1, 6, 1, 1, 4, 1, 8, 3, 9, 6, 6, 1, 8, 5, 2, 9, 9, 8, 1, 7, 7, 0, 0, 6, 9, 1, 2, 2, 9, 2, 6, 6, 1, 9, 5, 0, 4, 7, 6, 7, 1, 8, 1, 1, 2, 8, 1, 3, 3, 6, 2, 4, 9, 9, 5, 4, 3, 6, 7, 4, 6, 8, 5, 5, 4, 3, 1, 8, 4, 7, 6, 0, 9, 5, 1, 3, 8, 2, 7, 5, 3, 4, 1, 5, 7, 0, 4, 7, 5, 5, 1, 0, 9, 6, 9, 0, 8, 7, 8, 8, 2, 5, 2, 3, 5, 0, 6, 1, 9, 3, 6, 9, 1, 3, 9, 6, 6, 7, 1, 0, 9, 5, 8, 5, 2, 9, 0, 8, 8, 0, 6, 9, 1, 1, 6, 3, 7, 6, 6, 0, 6, 6, 1, 7, 1, 5, 8, 3, 6, 6, 8, 6, 8, 4, 6, 6, 1, 3, 8, 3, 4, 1, 7, 1, 3, 8, 5, 1, 1, 4, 0, 9, 3, 7, 4, 9, 9, 2, 4, 9, 9, 1, 0, 5, 9, 

In [4]:
print("type: ", type(train_data[0]))
print("len: ", len(train_data[0]))
data, label = train_data[0]
print("type of data: ", type(data))
print(data)

type:  <class 'tuple'>
len:  2
type of data:  <class 'PIL.Image.Image'>
<PIL.Image.Image image mode=RGB size=32x32 at 0x158D9CB20>


In [7]:
print(type(label))
print(label)
print(train_data.classes[label])

<class 'int'>
6
frog


In [8]:
test_data = CIFAR10(root='./data/CIFAR10/test/', 
                    train=False,
                    download=True)
print(test_data)
print("len: ", len(test_data))
print("shape: ", test_data.data.shape)

Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/CIFAR10/test/cifar-10-python.tar.gz


100%|██████████| 170498071/170498071 [00:26<00:00, 6437060.83it/s] 


Extracting ./data/CIFAR10/test/cifar-10-python.tar.gz to ./data/CIFAR10/test/
Dataset CIFAR10
    Number of datapoints: 10000
    Root location: ./data/CIFAR10/test/
    Split: Test
len:  10000
shape:  (10000, 32, 32, 3)


### Data Transforms

In [9]:
from torchvision import transforms

train_transfroms = transforms.Compose([
    transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=(0.4914, 0.4822, 0.4465),
        std=(0.2023, 0.1994, 0.2010)
    )
])

train_data = CIFAR10(root='./data/CIFAR10/train/',
                     train=True,
                     download=True,
                     transform=train_transfroms)

Files already downloaded and verified


In [10]:
print(train_data)

Dataset CIFAR10
    Number of datapoints: 50000
    Root location: ./data/CIFAR10/train/
    Split: Train
    StandardTransform
Transform: Compose(
               RandomCrop(size=(32, 32), padding=4)
               RandomHorizontalFlip(p=0.5)
               ToTensor()
               Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.201))
           )


In [11]:
print(train_data.transforms)

StandardTransform
Transform: Compose(
               RandomCrop(size=(32, 32), padding=4)
               RandomHorizontalFlip(p=0.5)
               ToTensor()
               Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.201))
           )


In [12]:
data, label = train_data[0]

print("type: ", type(data))
print("size: ", data.size())

type:  <class 'torch.Tensor'>
size:  torch.Size([3, 32, 32])


In [13]:
test_transforms = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(
        (0.4914, 0.4822, 0.4465),
        (0.2023, 0.1994, 0.2010)
    )
])

In [14]:
test_data = CIFAR10(
    root='./data/CIFAR10/test/',
    train=False,
    transform=test_transforms
)
print(test_data)

Dataset CIFAR10
    Number of datapoints: 10000
    Root location: ./data/CIFAR10/test/
    Split: Test
    StandardTransform
Transform: Compose(
               ToTensor()
               Normalize(mean=(0.4914, 0.4822, 0.4465), std=(0.2023, 0.1994, 0.201))
           )


### Data Batching

In [15]:
import torch

trainloader = torch.utils.data.DataLoader(
    train_data,
    batch_size=16,
    shuffle=True
)

In [16]:
data_batch, labels_batch = next(iter(trainloader))
print(data_batch.size())

torch.Size([16, 3, 32, 32])


In [17]:
print(labels_batch.size())

torch.Size([16])


In [18]:
testloader = torch.utils.data.DataLoader(
    test_data,
    batch_size=16,
    shuffle=False
)

### General Data Preparation

#### DataLoad Classes

In [21]:
torch.utils.data.DataLoader(
    train_data,
    batch_size=1,
    shuffle=False, 
    sampler=None,
    batch_sampler=None,
    num_workers=0,      # used to increase the number of CPU processor that generate batches in parallel
    collate_fn=None,
    pin_memory=False,
    drop_last=False,
    timeout=0,
    worker_init_fn=None,
    multiprocessing_context=None,
    generator=None
)

<torch.utils.data.dataloader.DataLoader at 0x159e68fa0>

# Model Development

## Using existing and pretrained models

In [22]:
from torchvision import models

vgg16 = models.vgg16(pretrained=True)

Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /Users/woojin/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:10<00:00, 53.4MB/s] 


In [24]:
print(vgg16.classifier)

Sequential(
  (0): Linear(in_features=25088, out_features=4096, bias=True)
  (1): ReLU(inplace=True)
  (2): Dropout(p=0.5, inplace=False)
  (3): Linear(in_features=4096, out_features=4096, bias=True)
  (4): ReLU(inplace=True)
  (5): Dropout(p=0.5, inplace=False)
  (6): Linear(in_features=4096, out_features=1000, bias=True)
)


In [29]:
# wagelow = torch.hub.load(
#     'nvidia/DeepLearningExamples:torchhub',
#     'nvidia_wagelow'
# )

In [28]:
torch.hub.list(
    'nvidia/DeepLearningExamples:torchhub'
)

Using cache found in /Users/woojin/.cache/torch/hub/nvidia_DeepLearningExamples_torchhub


['nvidia_convnets_processing_utils',
 'nvidia_efficientnet',
 'nvidia_efficientnet_b0',
 'nvidia_efficientnet_b4',
 'nvidia_efficientnet_widese_b0',
 'nvidia_efficientnet_widese_b4',
 'nvidia_fastpitch',
 'nvidia_gpunet',
 'nvidia_hifigan',
 'nvidia_resneXt',
 'nvidia_resnet50',
 'nvidia_resnext101_32x4d',
 'nvidia_se_resnext101_32x4d',
 'nvidia_ssd',
 'nvidia_ssd_processing_utils',
 'nvidia_tacotron2',
 'nvidia_textprocessing_utils',
 'nvidia_tft',
 'nvidia_tft_data_utils',
 'nvidia_tts_utils',
 'nvidia_waveglow']

### The PyTorch NN module

In [31]:
import torch.nn as nn
import torch.nn.functional as F

class SimpleNet(nn.Module):
    
    def __init__(self):
        super(SimpleNet, self).__init__()
        self.fc1 = nn.Linear(2048, 256)
        self.fc2 = nn.Linear(256, 64)
        self.fc3 = nn.Linear(64, 2)
        
    def foraward(self, x):
        x = x.view(-1, 2048)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.softmax(self.fc3(x), dim=1)
        return x

In [33]:
simplenet = SimpleNet()

print(simplenet)

SimpleNet(
  (fc1): Linear(in_features=2048, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=2, bias=True)
)


## Training

### Fundamental training loop

In [35]:
from torch import nn
import torch.nn.functional as F

In [43]:
class LeNet5(nn.Module):
    def __init__(self):
        super(LeNet5, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)     # Conv2d(in_channel, out_channel, kernal_size) -> out_channel = number of convolutional filters
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
    
    def forward(self, x):
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, int(x.nelement() / x.shape[0]))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [44]:
device = 'mps' if torch.backends.mps.is_available() else 'cpu'
model = LeNet5().to(device=device)

In [45]:
from torch import optim
from torch import nn

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), 
                      lr = 0.001,
                      momentum = 0.9)

In [61]:
# training loop
N_EPOCHS = 10
for epcoch in range(N_EPOCHS):              # outer training loop: loop over 10 epochs
    
    with torch.no_grad():
        epoch_loss = 0.0
        for inputs, labels in trainloader:      
            inputs = inputs.to(device)          # Move inputs and labels to GPU
            labels = labels.to(device)
            
            optimizer.zero_grad()               # zero out gradients before each backpropagation pass, or they'll accumualte
            
            outputs = model(inputs)             # perform forward pass
            loss = criterion(outputs, labels)   # compute loss
            loss.backward()                     # perform backpropagation: compute gradients
            optimizer.step()                    # adjust parameters based on gradients
            
            epoch_loss += loss.item()           # Accumualte batch loss so we can average over the epoch
    
    print("Epoch: {} Loss: {}".format(epcoch, epoch_loss/len(trainloader)))

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

# Validation

In [51]:
# we need to split out training dataset into a training dataset and a validation dataset.

from torch.utils.data import random_split

train_set, val_set = random_split(
    train_data,
    [40000, 10000]
)

trainloader = torch.utils.data.DataLoader(
    train_set,
    batch_size=16,
    shuffle=True
)

valloader = torch.utils.data.DataLoader(
    val_set,
    batch_size=16,
    shuffle=True
)

In [52]:
# we define out model, loss function, and optimizer.

from torch import optim
from torch import nn

model = LeNet5().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), 
                      lr=0.001,
                      momentum=0.9)

In [53]:
N_EPOCHS = 10

for epoch in range(N_EPOCHS):
    
    # Training
    train_loss = 0.0
    model.train()       # configure the model for training
    for inputs, labels in trainloader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        
    # Validation
    val_loss = 0.0
    model.eval()        # Configure the model for testing
    for inputs, labels in valloader:
        inputs = inputs.to(device)
        labels = labels.to(device)
        
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        val_loss += loss.item()
        
    print(
        "Epoch: {} Train Loss: {} Val Loss: {}".format(
            epoch,
            train_loss/len(trainloader),
            val_loss/len(valloader)
        )
    )

Epoch: 0 Train Loss: 2.0125209325790405 Val Loss: 1.7608585536956787
Epoch: 1 Train Loss: 1.6992714359998704 Val Loss: 1.6621589928627014
Epoch: 2 Train Loss: 1.5642587807178496 Val Loss: 1.5214239137649537
Epoch: 3 Train Loss: 1.4744960788488388 Val Loss: 1.4405505835533141
Epoch: 4 Train Loss: 1.4080534720897675 Val Loss: 1.3835458879470826
Epoch: 5 Train Loss: 1.348703851914406 Val Loss: 1.37481858959198
Epoch: 6 Train Loss: 1.3036948616981507 Val Loss: 1.3268832291603088
Epoch: 7 Train Loss: 1.2651420257210733 Val Loss: 1.2583676339149474
Epoch: 8 Train Loss: 1.2290809517145156 Val Loss: 1.2357687570095062
Epoch: 9 Train Loss: 1.2035881046772003 Val Loss: 1.1894946933746338


# Testing

In [57]:
num_correct = 0.0
for x_test_batch, y_test_batch in testloader:
    model.eval()        # set the model to evaluation mode for testing
    y_test_batch = y_test_batch.to(device)
    x_test_batch = x_test_batch.to(device)
    y_pred_batch = model(x_test_batch)      # predict the outcomes for each batch
    _, predicted = torch.max(y_pred_batch, 1)   # select the class index with the hightest probability
    num_correct += (predicted == y_test_batch).float().sum()    # compare the prediction to the true label and count the number of correct predictions
    
accuracy = num_correct / (len(testloader) * testloader.batch_size)

In [58]:
print(len(testloader), testloader.batch_size)
print("Test Accuracy: {}".format(accuracy))

625 16
Test Accuracy: 0.6172000169754028


# Model Deployment

## Saving Models

In [59]:
torch.save(model.state_dict(), "./models/lenet5_model.pt")

In [60]:
model = LeNet5().to(device)
model.load_state_dict(torch.load('./models/lenet5_model.pt'))

<All keys matched successfully>