In [1]:
#Load libraries
import os
import numpy as np
import torch
import glob
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import torchvision
import pathlib
import matplotlib.pyplot as plt

In [2]:
#checking for device
device=torch.device('cuda')

In [3]:
print(device)

cuda


In [4]:
#Transforms
transformer=transforms.Compose([
    transforms.Resize((150,150)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),  #0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # 0-1 to [-1,1] , formula (x-mean)/std
                        [0.5,0.5,0.5])
])

In [5]:
help(torchvision.datasets.ImageFolder)

Help on class ImageFolder in module torchvision.datasets.folder:

class ImageFolder(DatasetFolder)
 |  ImageFolder(root: str, transform: Optional[Callable] = None, target_transform: Optional[Callable] = None, loader: Callable[[str], Any] = <function default_loader at 0x00000232BFC4EAC0>, is_valid_file: Optional[Callable[[str], bool]] = None)
 |  
 |  A generic data loader where the images are arranged in this way by default: ::
 |  
 |      root/dog/xxx.png
 |      root/dog/xxy.png
 |      root/dog/[...]/xxz.png
 |  
 |      root/cat/123.png
 |      root/cat/nsdf3.png
 |      root/cat/[...]/asd932_.png
 |  
 |  This class inherits from :class:`~torchvision.datasets.DatasetFolder` so
 |  the same methods can be overridden to customize the dataset.
 |  
 |  Args:
 |      root (string): Root directory path.
 |      transform (callable, optional): A function/transform that  takes in an PIL image
 |          and returns a transformed version. E.g, ``transforms.RandomCrop``
 |      target_tr

In [6]:
#Dataloader
train_path= r"C:\Users\caren\OneDrive\Desktop\customised dataset 25000 pictures\archive\seg_train\seg_train"
test_path= r"C:\Users\caren\OneDrive\Desktop\customised dataset 25000 pictures\archive\seg_test\seg_test"
train_loader=DataLoader(
    torchvision.datasets.ImageFolder(train_path,transform=transformer),
    batch_size=64, shuffle=True
)
test_loader=DataLoader(
    torchvision.datasets.ImageFolder(test_path,transform=transformer),
    batch_size=32, shuffle=True
)

In [7]:
#categories
root=pathlib.Path(train_path)
classes=sorted([j.name.split('/')[-1] for j in root.iterdir()])

In [8]:
print(classes)

['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']


In [9]:
class ConvNet(nn.Module):
    def __init__(self,num_classes=6):
        super(ConvNet,self).__init__()
        
        #Output size after convolution filter
        #((w-f+2P)/s) +1
        
        #Input shape= (256,3,150,150)
        
        self.conv1=nn.Conv2d(in_channels=3,out_channels=12,kernel_size=3,stride=1,padding=1)
        #Shape= (256,12,150,150)
        self.bn1=nn.BatchNorm2d(num_features=12)
        #Shape= (256,12,150,150)
        self.relu1=nn.ReLU()
        #Shape= (256,12,150,150)
        
        self.pool=nn.MaxPool2d(kernel_size=2)
        #Reduce the image size be factor 2
        #Shape= (256,12,75,75)
        
        
        self.conv2=nn.Conv2d(in_channels=12,out_channels=20,kernel_size=3,stride=1,padding=1)
        #Shape= (256,20,75,75)
        self.relu2=nn.ReLU()
        #Shape= (256,20,75,75)
        
         
        self.conv3=nn.Conv2d(in_channels=20,out_channels=32,kernel_size=3,stride=1,padding=1)
        #Shape= (256,32,75,75)
        self.bn3=nn.BatchNorm2d(num_features=32)
        #Shape= (256,32,75,75)
        self.relu3=nn.ReLU()
        #Shape= (256,32,75,75)
        
        
        self.fc=nn.Linear(in_features=75 * 75 * 32,out_features=num_classes)
        
        
        
        #Feed forwad function
        
    def forward(self,input):
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.relu1(output)
            
        output=self.pool(output)
            
        output=self.conv2(output)
        output=self.relu2(output)
        output=self.conv3(output)
        output=self.bn3(output)
        output=self.relu3(output)
            
            
            #Above output will be in matrix form, with shape (256,32,75,75)
            
        output=output.view(-1,32*75*75)
            
            
        output=self.fc(output)
            
        return output   
            

In [10]:
model=ConvNet(num_classes=6).to(device)

In [11]:
#Optmizer and loss function
optimizer=Adam(model.parameters(),lr=0.001,weight_decay=0.0001)
loss_function=nn.CrossEntropyLoss()

In [12]:
num_epochs=10

In [13]:
#calculating the size of training and testing images
train_count=len(glob.glob(train_path+'/**/*.jpg'))
test_count=len(glob.glob(test_path+'/**/*.jpg'))

In [14]:
print(train_count,test_count)

14034 3000


In [15]:
#Model training and saving best model

best_accuracy=0.0

for epoch in range(num_epochs):
    
    #Evaluation and training on training dataset
    model.train()
    train_accuracy=0.0
    train_loss=0.0
    
    for i, (images,labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        optimizer.zero_grad()
        
        outputs=model(images)
        loss=loss_function(outputs,labels)
        loss.backward()
        optimizer.step()
        
        
        train_loss+= loss.cpu().data*images.size(0)
        _,prediction=torch.max(outputs.data,1)
        train_accuracy+=int(torch.sum(prediction==labels.data))
        
    train_accuracy=train_accuracy/train_count
    train_loss=train_loss/train_count
    
    
    # Evaluation on testing dataset
    model.eval()
    
    test_accuracy=0.0
    for i, (images,labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            outputs=model(images)
        _,prediction=torch.max(outputs.data,1)
        test_accuracy+=int(torch.sum(prediction==labels.data))
    
    test_accuracy=test_accuracy/test_count
    
    
    print('Epoch: '+str(epoch)+' Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy)+' Test Accuracy: '+str(test_accuracy))
    
    #Save the best model
    if test_accuracy>best_accuracy:
        torch.save(model.state_dict(),'best_checkpointmodel.pth')
        best_accuracy=test_accuracy
    #plots


Epoch: 0 Train Loss: tensor(5.9605) Train Accuracy: 0.5907795354139946 Test Accuracy: 0.6613333333333333
Epoch: 1 Train Loss: tensor(1.8766) Train Accuracy: 0.7069972922901525 Test Accuracy: 0.7046666666666667
Epoch: 2 Train Loss: tensor(0.6331) Train Accuracy: 0.8103890551517743 Test Accuracy: 0.6573333333333333
Epoch: 3 Train Loss: tensor(0.4854) Train Accuracy: 0.8487245261507766 Test Accuracy: 0.7213333333333334
Epoch: 4 Train Loss: tensor(0.3627) Train Accuracy: 0.8870599971497791 Test Accuracy: 0.744
Epoch: 5 Train Loss: tensor(0.2615) Train Accuracy: 0.9164172723386063 Test Accuracy: 0.709
Epoch: 6 Train Loss: tensor(0.2193) Train Accuracy: 0.9326635314236853 Test Accuracy: 0.7626666666666667
Epoch: 7 Train Loss: tensor(0.2351) Train Accuracy: 0.9295282884423542 Test Accuracy: 0.7296666666666667
Epoch: 8 Train Loss: tensor(0.1733) Train Accuracy: 0.9487672794641585 Test Accuracy: 0.7373333333333333
Epoch: 9 Train Loss: tensor(0.1375) Train Accuracy: 0.9583867749750605 Test Accur