In [42]:
import torch
import numpy as np 
import os
import glob
import torch.nn as nn
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable
import torchvision
import pathlib

In [43]:
#checking for device
#if using CPU training will be slower
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(torch.cuda.is_available())

False


In [44]:
#Transforming the Size of All Images
#All for all images we add the possibiliy of horizontally flipping our images randomly to add VARIATION to our sets
transformer = transforms.Compose([
    transforms.Resize((150,150)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(), #changes to pixel range of each color channel from 0-255 to 0-1 and changes data type from numpy to tensors
    #because pyTorch framework takes Tensor as input
    transforms.Normalize([.5,.5,.5], #new pixels will be calculate as (x-mean)/standard deviation where x is the old pixel value
                         [.5,.5,.5]) #changes range from 0-1 to -1-1
    #column represents RGB channel and row represents mean and divisoin, so they are .5 for all channels
])

In [45]:
#DataLoader helps in reading the data in feeding it into the model for training and matching
train_path = "archive\seg_train\seg_train"
test_path = "archive\seg_test\seg_test"
train_loader = DataLoader(torchvision.datasets.ImageFolder(train_path,transform=transformer), #feed in our train directory path and transformer
                          batch_size=256,shuffle = True) #batch size should be loaded to the size of my CPU memory
test_loader = DataLoader(torchvision.datasets.ImageFolder(test_path,transform=transformer), 
                          batch_size=256,shuffle = True) 

In [46]:
#categories
root = pathlib.Path(train_path)
classes = sorted([j.name.split('/')[-1] for j in root.iterdir()])#fetching all class names and categories and putting them in a list

In [47]:
print(classes)

['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']


In [73]:
#CNN Network
class ConvNet(nn.Module):
    def __init__(self, num_classes=6):
        super(ConvNet,self).__init__() #specifying all layers in neural network

        #output size after convolution filter: (150-3+2(1))/(1) +1

        #Input shape= (256,3,150,150) as 256 is the batch size, 3 is the number of channels(RGB), along with height and width
        self.conv1 = nn.Conv2d(in_channels=3,out_channels=12,kernel_size=3,stride=1,padding=1)
        #shape = (256,12,150,150) since output channels = 12
        self.bn1 = nn.BatchNorm2d(num_features=12) # adding batch normalization where we are adding the number of features that is equal to number of channels
        #shape remains the same
        self.reul1=nn.ReLU() #brings non linearity 
        #shapes remains the same

        self.pool=nn.MaxPool2d(kernel_size=2) #reduces height and width of our convolution output by a factor of 2
        #shape = (256,12,75,75)

        #add another convolution layer
        self.conv2 = nn.Conv2d(in_channels=12,out_channels=20,kernel_size=3,stride=1,padding=1)
        #shape = (256,20,75,75) since output channels = 20
        self.reul2=nn.ReLU() #brings non linearity 
        
        #add last layer
        self.conv3 = nn.Conv2d(in_channels=20,out_channels=32,kernel_size=3,stride=1,padding=1)
        #shape = (256,30,75,75) since output channels = 30
        self.bn3 = nn.BatchNorm2d(num_features=32)
        self.reul3=nn.ReLU() #brings non linearity

         #add fully connected layer where we feed the number of input features 
        self.fc=nn.Linear(in_features=32*75*75,out_features=num_classes)

        #feed foward function
    def forward(self,input): #pass in all inputs from above layers
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.reul1(output)

        output=self.pool(output)

        output=self.conv2(output)
        output=self.reul2(output)
        
        output=self.conv3(output)
        output=self.bn3(output)
        output=self.reul3(output)

        #Output will be in Matrix form with shape of (256,32,75,75)

        output=output.view(-1,32*75*75) #in order to feed into full connected layer we reshape into a vector

        output=self.fc(output) #feed into fully connected layer 

        return output

In [74]:
model = ConvNet(num_classes=6).to(device)

In [79]:
#Call Optimizer and loss we use corss entropy
optimizer = Adam(model.parameters(),lr=.001,weight_decay=.0001)
loss_function = nn.CrossEntropyLoss()

In [80]:
num_epochs = 10

In [81]:
#calculating number of images
train_count = len(glob.glob(train_path+'/**/*.jpg'))
test_count = len(glob.glob(test_path+'/**/*.jpg'))
print(train_count,test_count)

14034 3000


In [83]:
#Training our network on our data network loader
#Calculate the training loss and accuracy
#Save model that gives best accuracy
best_accuracy = 0.0
for epoch in range(num_epochs):
    #Eval and training on training dataset
    model.train() #sets in training mode and model keeps some layers that will behave diff
    train_accuracy,train_loss = 0.0,0.0
    for i, (images,labels) in enumerate(train_loader): #looping inside the batches inside train loaders
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(images.cuda())
        optimizer.zero_grad()#back propogation accumulates gradiance we have to zero them out at the begininning of each new batch
        outputs = model(images) # gives prediciton
        loss = loss_function(outputs,labels) #predicts error
        loss.backward()#performs back propogation
        optimizer.step() #updates variance and bias

        train_loss+= loss.cpu().data*images.size(0)
        _,prediction=torch.max(outputs.data,1)

        train_accuracy+=int(torch.sum(prediction==labels.data)) #prediction vs actual value
        
    train_accuracy=train_accuracy/train_count #gives us final accuracy for partricular epoch
    train_loss=train_loss/train_count

    #Eval on testing dataset
    model.eval() #once this is called our model deactives such layer so our model ouput is as expected
    test_accuracy = 0.0
    for i, (images,labels) in enumerate(test_loader): #looping inside the batches inside test loaders
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(images.cuda())
        outputs = model(images) # gives prediciton
        _,prediction=torch.max(outputs.data,1) #get the id 
        test_accuracy+=int(torch.sum(prediction==labels.data))
    test_accuracy=test_accuracy/test_count #gives us test accuracy
    print('Epoch: '+str(epoch)+'Train Loss: '+str(train_loss) + 'Train Accuracy ' +str(train_accuracy) + "Test Accuracy "+str(test_accuracy))

    #Save Best Model
    if test_accuracy>best_accuracy:
        torch.save(model.state_dict(),'best_checkpoint.model')
        best_accuracy = test_accuracy


Epoch: 0Train Loss: tensor(7.6228)Train Accuracy0.5559355850078381Test Accuracy0.5326666666666666
Epoch: 1Train Loss: tensor(1.3853)Train Accuracy0.7151204218326921Test Accuracy0.6683333333333333
Epoch: 2Train Loss: tensor(1.3030)Train Accuracy0.758016246259085Test Accuracy0.63
Epoch: 3Train Loss: tensor(0.6851)Train Accuracy0.8425965512327205Test Accuracy0.7386666666666667
Epoch: 4Train Loss: tensor(0.3777)Train Accuracy0.8978195810175289Test Accuracy0.7033333333333334
Epoch: 5Train Loss: tensor(0.3008)Train Accuracy0.9142796066695169Test Accuracy0.7256666666666667
Epoch: 6Train Loss: tensor(0.1961)Train Accuracy0.9424967935014964Test Accuracy0.7323333333333333
Epoch: 7Train Loss: tensor(0.1510)Train Accuracy0.9545389767706998Test Accuracy0.7343333333333333
Epoch: 8Train Loss: tensor(0.1063)Train Accuracy0.9719253242126265Test Accuracy0.7
Epoch: 9Train Loss: tensor(0.1054)Train Accuracy0.9686475701866895Test Accuracy0.7503333333333333
