In [1]:
import os
import glob
import torch
import torchvision
import pathlib
import torch.nn as nn
from torchvision import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import numpy as np
import matplotlib.pyplot as plt

In [2]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
resize= (150,150)
# resize= (16,16)

transformer = transforms.Compose([
    transforms.Resize(resize),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize([0.5, 0.5, 0.5],
                         [0.5, 0.5, 0.5])])

In [4]:
#Dataloader
batch_size = 32
train_path, test_path = "./seg_train/seg_train/", "./seg_test/seg_test/"
train_loader = DataLoader(torchvision.datasets.ImageFolder(train_path, transform=transformer), batch_size=batch_size, shuffle= True)
test_loader = DataLoader(torchvision.datasets.ImageFolder(test_path, transform=transformer), batch_size=batch_size, shuffle= True)

In [5]:
root = pathlib.Path(train_path)
# classes = sorted([j.name for j in root.iterdir()])
classes=sorted([j.name.split('/')[-1] for j in root.iterdir() if not j.name.startswith(".")])

In [6]:
classes

['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']

In [7]:
class ConvNet(nn.Module):
    def __init__(self,num_classes=6):
        super(ConvNet, self).__init__()
    # Input shape: 
    # batch_size, channels, image_size_height, image_size_width
    # https://pytorch.org/docs/stable/generated/torch.nn.Conv2d.html
    #(256, 3, 150, 150)
    
    # Output shape: 
    # Output height = (Input height + padding height top + padding height bottom - kernel height) / (stride height) + 1
    # Output width = (Output width + padding width right + padding width left - kernel width) / (stride width) + 1
    #               (150+1+1-3)/1+1 = 150
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
        # (256, 12, 150, 150)
        self.bn1 = nn.BatchNorm2d(num_features=12)
        # (256, 12, 150, 150)
        self.relu1 = nn.ReLU()
        # (256, 12, 150, 150)
        self.pool = nn.MaxPool2d(kernel_size=2)
        # Reduce the image size by kernel_size
        # (256, 12, 75, 75)
        
        self.conv2= nn.Conv2d(in_channels=12, out_channels=20, kernel_size=3, stride=1, padding=1)
        # (256, 20, 75, 75)
        self.relu2 = nn.ReLU()
        # (256, 20, 75, 75)

        self.conv3 = nn.Conv2d(in_channels=20, out_channels=32, kernel_size=3, stride=1, padding=1)
        # (256, 32, 75, 75)
        self.bn3 = nn.BatchNorm2d(num_features=32)
        # (256, 32, 75, 75)
        self.relu3 = nn.ReLU()
        # (256, 32, 75, 75)
        
        self.fc = nn.Linear(in_features=32*resize[0]//2*resize[1]//2, out_features=num_classes)
        
    def forward(self, inp):
        output = self.conv1(inp)
        output = self.bn1(output)
        output = self.relu1(output)
        output = self.pool(output)

        output = self.conv2(output)
        output = self.relu2(output)

        output = self.conv3(output)
        output = self.bn3(output)
        output = self.relu3(output)

        output = output.view(-1, 32 * resize[0]//2 * resize[1]//2)
        output = self.fc(output)
        return output

In [8]:
model = ConvNet(len(classes)).to(device)

In [9]:
optimizer = Adam(model.parameters(),lr=0.001, weight_decay=0.0001)
loss_func = nn.CrossEntropyLoss()
num_epoch = 10
train_count = len(glob.glob(train_path+'/*/*.jpg'))
test_count = len(glob.glob(test_path+'/*/*.jpg'))

In [10]:
best_acc = 0.

for epoch in range(num_epoch):
    model.train()
    train_acc = 0.
    test_acc = 0
    train_loss = 0.
    for i, (images, labels) in enumerate(train_loader):
        optimizer.zero_grad()
        outputs = model(images)
        loss = loss_func(outputs, labels)
        loss.backward()
        optimizer.step()
        if not i%50: print(f"Batch {i}")

        train_loss += loss.cpu().data*images.size(0)
        _, prediction = torch.max(outputs.data, 1)
        train_acc += int(torch.sum(prediction==labels.data))
        
    train_acc = train_acc / train_count
    train_loss = train_loss / train_count
    
    model.eval()
    test_acc = 0

    for i, (images, labels) in enumerate(test_loader):
        outputs = model(images)
        _, prediction = torch.max(outputs.data, 1)
        test_acc += int(torch.sum(prediction==labels.data))
    test_acc = test_acc / test_count
    print(f"Epoch: {epoch}, train loss: {train_loss}, train acc: {train_acc}, test acc {test_acc}")
    
    if test_acc > best_acc:
        torch.save(model.state_dict(),"best_checkpoint.model")
        best_acc= test_acc

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


Batch 0
Batch 50
Batch 100
Batch 150
Batch 200
Batch 250
Batch 300
Batch 350
Batch 400
Epoch: 0, train loss: 5.438430309295654, train acc: 0.5840102607952117, test acc 0.6743333333333333
Batch 0
Batch 50
Batch 100
Batch 150
Batch 200
Batch 250
Batch 300
Batch 350
Batch 400
Epoch: 1, train loss: 0.7532532811164856, train acc: 0.75502351432236, test acc 0.7366666666666667
Batch 0
Batch 50
Batch 100
Batch 150
Batch 200
Batch 250
Batch 300
Batch 350
Batch 400
Epoch: 2, train loss: 0.4815417230129242, train acc: 0.8415277183981759, test acc 0.735
Batch 0
Batch 50
Batch 100
Batch 150
Batch 200
Batch 250
Batch 300
Batch 350
Batch 400
Epoch: 3, train loss: 0.34936562180519104, train acc: 0.8857773977483255, test acc 0.7356666666666667
Batch 0
Batch 50
Batch 100
Batch 150
Batch 200
Batch 250
Batch 300
Batch 350
Batch 400
Epoch: 4, train loss: 0.2692165970802307, train acc: 0.917272338606242, test acc 0.718
Batch 0
Batch 50
Batch 100
Batch 150
Batch 200
Batch 250
Batch 300
Batch 350
Batch 400
Ep