## CNN for Ambient Brightness Recognition

## 0. import packages

In [11]:
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim

from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision import transforms

In [12]:
print(f"Nvidia GPU: {torch.cuda.is_available()}")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Current Device: {device}")

Nvidia GPU: True
Current Device: cuda


## 1. Data Preparation

In [19]:
#load date to train, valid and test
train_path = "./Image/Train"
test_path = "./Image/Test"

train_data = ImageFolder(train_path, transform=transforms.ToTensor())
print(train_data)
test_data = ImageFolder(test_path, transform=transforms.ToTensor())
print(test_data)

Dataset ImageFolder
    Number of datapoints: 4
    Root location: ./Image/Train
    StandardTransform
Transform: ToTensor()


NameError: name 'test_data' is not defined

In [20]:
# Shape of the data

for x, y in train_data:
    print(x.shape)
    print(y)
    break

train_loader = DataLoader(train_data, batch_size=10, shuffle=True)

for x, y in train_loader:
    print(x.shape)
    print(y)
    break

print("Num of Img in Training Set:", len(train_data))
print("Num of Img in Test Set:", len(test_data))

torch.Size([3, 1080, 1920])
0
torch.Size([4, 3, 1080, 1920])
tensor([2, 1, 3, 0])
Num of Img in Training Set: 4


NameError: name 'valid_data' is not defined

## 5.CNN Model

In [11]:
class CNN(nn.Module):
    def __init__(self) -> None:
        super(CNN, self).__init__()

        # Level 1: (0 + 24) / 2 = 12
        # Level 2: (25 + 49) / 2 = 37
        # Level 3: (50 + 74) / 2 = 62
        # Level 4: (75 + 100) / 2 = 87
        self.brightness_levels = 4
        self.out_channels = 16

        self.pool = nn.AvgPool2d(kernel_size=2, stride=2)

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=self.out_channels, kernel_size=3, padding=1)
        self.batch_norm1 = nn.BatchNorm2d(self.out_channels)

        self.out_channels *= 2

        self.conv2 = nn.Conv2d(in_channels=self.out_channels//2, out_channels=self.out_channels, kernel_size=3, padding=1)
        self.batch_norm2 = nn.BatchNorm2d(self.out_channels)

        self.out_channels *= 2

        self.conv3 = nn.Conv2d(in_channels=self.out_channels//2, out_channels=self.out_channels, kernel_size=3, padding=1)
        self.batch_norm3 = nn.BatchNorm2d(self.out_channels)

        # A size of 1920x1080 picture is reduced by a factor of 2 three times in three pooling layers -> 240x135.
        # Tensor is represented in 1080x1920 -> 135x240
        self.fc = nn.Linear(self.out_channels * 135 * 240, self.brightness_levels)

    def forward(self, x):

        x = self.batch_norm1(self.pool(torch.relu(self.conv1(x))))
        x = self.batch_norm2(self.pool(torch.relu(self.conv2(x))))
        x = self.batch_norm3(self.pool(torch.relu(self.conv3(x))))

        # Resize the input from 4D to 3D wrt to the batch size in order to fit the input size of FC layer
        x = x.view(-1, self.out_channels * 135 * 240)

        return torch.softmax(self.fc(x), dim=1)

## 6. Training

In [14]:
def get_accuracy(model, data, batch_size=32):
    # note: why should we use a larger batch size here?
    loader = DataLoader(data, batch_size=batch_size)

    model.eval() # annotate model for evaluation (why do we need to do this?)

    correct = 0
    total = 0

    # Loader returns images and corresponding labels in a batch
    # imgs = a batch of imgs at current iteration
    for imgs, labels in loader:
        output = model(imgs)

        # output.max(1, keepdim=True) returns the index of highest prob in softmax
        _, pred = output.max(1, keepdim=True)
        correct += pred.eq(labels.view_as(pred)).sum().item()
        total += imgs.shape[0]

    return correct/total

In [7]:
def train(model, train_data, valid_data, batch_size=32, weight_decay=0.0,
          optimizer="sgd", momentum=0.9, learning_rate=0.001, num_epochs=7,
          shuffle_data=True, checkpoint_path=None, print_acc=True):

    # Shuffled data, last batch dropped if smaller than rest
    train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=shuffle_data, drop_last=True)

    # Loss Function
    # Since it is a multi-class classcificaion problem, we used softmax.
    # With softmax, we need to use corss-entropy. If use square loss, the gradient signal will be too small.
    criterion = nn.CrossEntropyLoss()

    # Optimizer
    # Used SGD instead of Adam since SGD generlize better than Adam, although Adam is faster.
    assert optimizer in ("sgd", "adam")
    if optimizer == "sgd":
        optimizer = optim.SGD(model.parameters(),
                              lr=learning_rate,
                              momentum=momentum,
                              weight_decay=weight_decay)
    else:
        optimizer = optim.Adam(model.parameters(),
                               lr=learning_rate,
                               weight_decay=weight_decay)
    iters, losses, train_acc, val_acc, n = [], [], [], [], 0

    print("\n ----- Start Training ----- \n")

    for epoch in range(num_epochs):

        for imgs, labels in iter(train_loader):

            # Start training mode
            model.train()

            output = model(imgs)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()

            optimizer.zero_grad()
            n += 1

        iters.append(n)
        losses.append(float(loss)/batch_size)
        train_acc.append(get_accuracy(model, train_data, batch_size))
        val_acc.append(get_accuracy(model, valid_data, batch_size))

        if checkpoint_path is not None:
            torch.save(model.state_dict(), checkpoint_path)

        if print_acc:
            print("Iter %d. [Val Acc %.0f%%] [Train Acc %.0f%%, Loss %f]" % (
                    n, val_acc[-1] * 100, train_acc[-1] * 100, losses[-1]))

    # Plot Graph
    plt.title("Learning Curve")
    plt.plot(iters, losses, label="Train")
    plt.xlabel("Iterations")
    plt.ylabel("Loss")
    plt.show()

    plt.title("Learning Curve")
    plt.plot(iters, train_acc, label="Train")
    plt.plot(iters, val_acc, label="Validation")
    plt.xlabel("Iterations")
    plt.ylabel("Training Accuracy")
    plt.legend(loc='best')
    plt.show()

    print("Final Training Accuracy: {}".format(train_acc[-1]))
    print("Final Validation Accuracy: {}".format(val_acc[-1]))