# Image Classification with Custom ImageFolder Dataset in PyTorch

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F

from torchvision import datasets, transforms

device = 'cuda' if torch.cuda.is_available() else 'cpu'

print(f'Using device: {device}')

Using device: cuda


# ImageFolder Dataset 
* convert images to tensors
* Images have 3 channels (Red, Green, Blue, RGB)
* normalize RGB pixel values for each channel with a mean of (0.485, 0.456, 0.406)
* normalize RGB pixel values for each channel with a std of (0.229, 0.224, 0.225)
* These mean and std values are computed from Imagenet dataset

In [2]:
transform=transforms.Compose([
    transforms.Resize((28, 28)), # Resize image into these dimensions
    transforms.ToTensor(),
    transforms.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225)),
])

### Weather Image Recognition Dataset
* Download: https://www.kaggle.com/datasets/jehanbhathena/weather-dataset

In [3]:
data_root_folder = './data/Weather Image Recognition/'

full_dataset = datasets.ImageFolder(
    root=data_root_folder, 
    transform=transform
)

### See all classes
* integer mapping of string names
* (Remember models don't work with strings)

In [4]:
full_dataset.classes

['dew',
 'fogsmog',
 'frost',
 'glaze',
 'hail',
 'lightning',
 'rain',
 'rainbow',
 'rime',
 'sandstorm',
 'snow']

In [5]:
NUM_CLASSES = len(full_dataset.classes)
NUM_CLASSES

11

### Train/Test Split

In [6]:
split_ratio = 0.80

total_size = len(full_dataset)
train_size = int(split_ratio * total_size)  # 80% for training
test_size = total_size - train_size  # 20% for testing

train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [train_size, test_size])

print(f'Train len: {len(train_dataset)}')
print(f'Test len: {len(test_dataset)}')

Train len: 5489
Test len: 1373


In [7]:
ex_img, ex_target = train_dataset[0] # img, seg_mask

print(ex_img.shape)
print(ex_target) 

torch.Size([3, 28, 28])
3


# Dataloader

In [8]:
batch_size = 64

train_dataloader = torch.utils.data.DataLoader(
    dataset=train_dataset,
    batch_size=batch_size,
)

test_dataloader = torch.utils.data.DataLoader(
    dataset=test_dataset,
    batch_size=batch_size,
)

### Get sample batch data

In [9]:
ex_img_batch, ex_target_batch = next(iter(train_dataloader))
print(ex_img_batch.shape)
print(ex_target_batch.shape)

torch.Size([64, 3, 28, 28])
torch.Size([64])


# Model

In [10]:
class ConvNet(nn.Module):
    def __init__(self, input_channels, num_classes):
        super().__init__()
        
        self.conv1 = nn.Conv2d(in_channels=input_channels, out_channels=32, kernel_size=3)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3)
        
        self.max_pool = nn.MaxPool2d(kernel_size=2)

        self.relu = nn.ReLU()
        
        self.fc1 = nn.Linear(9216, 128)
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        #######################
        # Convolutional Part
        #######################
        #print(f'Input dims: {x.shape}')
        
        x = self.conv1(x) # (N, 1, 28, 28) -> (N, 32, 26, 26)
        #print(f'After conv1 {x.shape}')
        x = self.relu(x) # no dim change
        x = self.conv2(x) # (N, 32, 26, 26) -> (N, 64, 24, 24)
        #print(f'After conv2 {x.shape}')
        x = self.relu(x) # no dim change
        x = self.max_pool(x) # (N, 64, 24, 24) -> (N, 64, 12, 12)
        #print(f'After maxpool {x.shape}')
        #######################
        #######################

        #######################
        ## Fully Connected Part
        #######################
        x = torch.flatten(x, 1) # (N, 64, 12, 12) -> (N, 64*12*12) -> (N, 9216)
        x = self.fc1(x) # (N, 9216) -> (N, 128)
        x = self.relu(x) # no dim change
        logits = self.fc2(x) # (N, 128) - (N, 10)
        #######################
        #######################
        
        return logits

### Dummy Input for Dimentional Testing

In [11]:
model = ConvNet(
    input_channels=3, # 3 for RGB images 
    num_classes=NUM_CLASSES
)

In [12]:
dummy_input = torch.randn(1, 3, 28, 28)

In [13]:
dummy_preds = model(dummy_input)
dummy_preds.shape

torch.Size([1, 11])

## Print Model Parametrs

In [14]:
for p in model.parameters():
    print(p.shape)

torch.Size([32, 3, 3, 3])
torch.Size([32])
torch.Size([64, 32, 3, 3])
torch.Size([64])
torch.Size([128, 9216])
torch.Size([128])
torch.Size([11, 128])
torch.Size([11])


## Print with Names

In [15]:
for n, p in model.named_parameters():
    print(f'name: {n} and parameter data: {p.shape}')
    print()

name: conv1.weight and parameter data: torch.Size([32, 3, 3, 3])

name: conv1.bias and parameter data: torch.Size([32])

name: conv2.weight and parameter data: torch.Size([64, 32, 3, 3])

name: conv2.bias and parameter data: torch.Size([64])

name: fc1.weight and parameter data: torch.Size([128, 9216])

name: fc1.bias and parameter data: torch.Size([128])

name: fc2.weight and parameter data: torch.Size([11, 128])

name: fc2.bias and parameter data: torch.Size([11])



# Optimizer & Loss

In [16]:
model = model.to(device)

learning_rate = 0.02

optimizer = torch.optim.SGD(
    model.parameters(), 
    lr=learning_rate
)

criterion = nn.CrossEntropyLoss() # Negative log-likehood

# Training

In [17]:
def train(model, train_loader, optimizer, criterion, epoch):
    model.train()
    
    loss_history = []
    
    for batch_idx, (img, target) in enumerate(train_loader):
        # Move to GPU (if available)
        img = img.to(device)
        target = target.to(device)

        # Forward pass
        preds = model(img)
        # Compute gradients
        loss = criterion(preds, target)
        
        # Zero gradients, perform a backward pass, and update the weights.
        # In PyTorch, gradients are accumulated, you need to reset gradients in each loop
        optimizer.zero_grad()
        # Compute gradients
        loss.backward()
        # Update parameters (weights and biases)
        optimizer.step()
        
        loss_history.append(loss.item())

    avg_loss = sum(loss_history)/len(loss_history)
    return avg_loss

# Testing
* No trainin in testing code
* Disable Autograd
* No optimizer

In [18]:
@torch.no_grad()
def test(model, test_loader, criterion):
    model.eval()
    
    loss_history = []
    acc_history = []
    
    for img, target in test_loader:
        # Move to GPU (if available)
        img = img.to(device)
        target = target.to(device)

        # Forward pass
        preds = model(img)
        # Compute error
        loss = criterion(preds, target)
        
        # Compute accuracy
        _, predicted = torch.max(preds, 1)
        accuracy = (predicted == target).sum().item() / target.size(0)

        loss_history.append(loss.item())
        acc_history.append(accuracy)
    
    avg_loss = sum(loss_history)/len(loss_history)
    avg_acc = sum(acc_history)/len(acc_history)
    return avg_loss, avg_acc

### Start Training
* Training consists of two steps: forward and backward propagation
* In forward propagation, we input the data into the model and measure the error (with loss function)
* In backward propagation, we adjust the internal paramters of the model so that model makes better predictions next time
* One complete cycle of the dataset is called "epoch" (one loop cycle of all data)

In [19]:
def start_training(model, train_dataloader, test_dataloader, optimizer, criterion, num_epochs, print_interval):

    # Loop over all epochs
    for epoch in range(1, NUM_EPOCHS+1):
        avg_train_loss = train(model, train_dataloader, optimizer, criterion, epoch)
        avg_test_loss, avg_test_acc = test(model, test_dataloader, criterion)

        if (epoch + 1) % print_interval == 0:
            print(f'Epoch: [{epoch+1}/{num_epochs}], Avg train loss: {avg_train_loss:.4f}, test loss: {avg_test_loss:.4f}, test_acc: {avg_test_acc*100.0:.2f}%')

### NOTE: this dataset is larger, training takes longer...

In [20]:
NUM_EPOCHS = 20
print_interval = 2 

start_training(
    model,
    train_dataloader,
    test_dataloader,
    optimizer,
    criterion,
    NUM_EPOCHS,
    print_interval
)

Epoch: [2/20], Avg train loss: 1.9248, test loss: 1.5932, test_acc: 49.64%
Epoch: [4/20], Avg train loss: 1.3963, test loss: 1.3502, test_acc: 56.18%
Epoch: [6/20], Avg train loss: 1.1920, test loss: 1.2085, test_acc: 59.61%
Epoch: [8/20], Avg train loss: 1.0545, test loss: 1.1053, test_acc: 63.39%
Epoch: [10/20], Avg train loss: 0.9547, test loss: 1.0780, test_acc: 64.40%
Epoch: [12/20], Avg train loss: 0.8762, test loss: 1.0672, test_acc: 64.55%
Epoch: [14/20], Avg train loss: 0.8051, test loss: 1.0637, test_acc: 66.14%
Epoch: [16/20], Avg train loss: 0.7340, test loss: 1.0641, test_acc: 66.00%
Epoch: [18/20], Avg train loss: 0.6606, test loss: 1.0644, test_acc: 66.93%
Epoch: [20/20], Avg train loss: 0.5860, test loss: 1.0682, test_acc: 67.01%


# Save/Load Model

In [21]:
torch.save({
    'model_state_dict': model.state_dict(),
    'optimizer_state_dict': optimizer.state_dict(),
    'class_str_names': full_dataset.classes,
},
    'convnet_ImageFolder_checkpoint.pt'
)