In [7]:
import os
import numpy as np 
import torch
import glob
import torch.nn as nn 
from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable 
import torchvision
import pathlib


In [8]:
#checking if cuda is availaible or its running on cpu
device=torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [9]:
transformer = transforms.Compose([
    transforms.Resize((150, 150)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),    #resevaluate color range of 0 to 255 >> 0 to 1 and numpy to tensor
    transforms.Normalize([0.5, 0.5, 0.5],
                         [0.5, 0.5, 0.5]) #to normalize range to -1 to , column represent RGB and row represent mean anddeviation f= (x - mean)/std_dev
])

In [18]:
#can't feed all images at once due to memory overload, we need to feed in batches for both test and train

train_path = '../data/seg_train/seg_train'
test_path = '../data/seg_test/seg_test'

train_loader = DataLoader(
    torchvision.datasets.ImageFolder(train_path, transform=transformer),
    batch_size=256, 
    shuffle=True 
)

test_loader = DataLoader(
    torchvision.datasets.ImageFolder(test_path, transform=transformer),
    batch_size=256, 
    shuffle=True 
)

In [19]:
#categories

root=pathlib.Path(train_path)
classes = sorted([j.name.split('/') for j in root.iterdir()])

In [21]:
print(classes)

[['buildings'], ['forest'], ['glacier'], ['mountain'], ['sea'], ['street']]


In [None]:
class Conv_net(nn.Module):
    def __init__(self, num_classes=6):
        super(Conv_net, self).__init__()

        #Input shape = (256, 3, 150, 150)
        #256 = batch size
        #3 = color scheme == RGB
        #150, 150 = image size

        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)

        #new shape after convolution will be (256, 12, 150, 150)

        self.bn1 = nn.BatchNorm2d(num_features = 12)

        self.relu1 = nn.ReLU()

        self.pool = nn.MaxPool2d(kernel_size=2)

        self.conv2 = nn.Conv2d(in_channels=12, out_channels=20, kernel_size=3, stride=1, padding=1)

        self.bn2 = nn.BatchNorm2d(num_features = 12)

        self.conv3 = nn.Conv2d(in_channels=20, out_channels=32, kernel_size=3, stride=1, padding=1)

        self.bn3 = nn.BatchNorm2d(num_features = 12)

        self.relu3 = nn.ReLU()
        self.fc=nn.Linear(in_features=75 * 75 * 32,out_features=num_classes)
        
        
        
        #Feed forwad function
        
    def forward(self,input):
        output=self.conv1(input)
        output=self.bn1(output)
        output=self.relu1(output)
            
        output=self.pool(output)
            
        output=self.conv2(output)
        output=self.relu2(output)
            
        output=self.conv3(output)
        output=self.bn3(output)
        output=self.relu3(output)
            
            
        output=output.view(-1,32*75*75)
            
            
        output=self.fc(output)
            
        return output        


In [None]:
model=ConvNet(num_classes=6).to(device)

In [None]:
optimizer=Adam(model.parameters(),lr=0.001,weight_decay=0.0001)
loss_function=nn.CrossEntropyLoss()

In [None]:
num_epochs=10

In [None]:
train_count=len(glob.glob(train_path+'/**/*.jpg'))
test_count=len(glob.glob(test_path+'/**/*.jpg'))

In [None]:
print(train_count,test_count)

In [None]:

best_accuracy=0.0

for epoch in range(num_epochs):
    
    model.train()
    train_accuracy=0.0
    train_loss=0.0
    
    for i, (images,labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        optimizer.zero_grad()
        
        outputs=model(images)
        loss=loss_function(outputs,labels)
        loss.backward()
        optimizer.step()
        
        
        train_loss+= loss.cpu().data*images.size(0)
        _,prediction=torch.max(outputs.data,1)
        
        train_accuracy+=int(torch.sum(prediction==labels.data))
        
    train_accuracy=train_accuracy/train_count
    train_loss=train_loss/train_count
    
    
    # Evaluation on testing dataset
    model.eval()
    
    test_accuracy=0.0
    for i, (images,labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images=Variable(images.cuda())
            labels=Variable(labels.cuda())
            
        outputs=model(images)
        _,prediction=torch.max(outputs.data,1)
        test_accuracy+=int(torch.sum(prediction==labels.data))
    
    test_accuracy=test_accuracy/test_count
    
    
    print('Epoch: '+str(epoch)+' Train Loss: '+str(train_loss)+' Train Accuracy: '+str(train_accuracy)+' Test Accuracy: '+str(test_accuracy))
    
    #Save the best model
    if test_accuracy>best_accuracy:
        torch.save(model.state_dict(),'best_checkpoint.model')
        best_accuracy=test_accuracy