In [20]:
#loading important libraries
import os
import numpy as np

from PIL import Image

import torch
import glob
import torch.nn as nn

import torchvision
import pathlib

from torchvision.transforms import transforms
from torch.utils.data import DataLoader
from torch.optim import Adam
from torch.autograd import Variable

In [2]:
# checking for device existence
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')   
print(device)

cpu


In [3]:
# Transforms
transformer = transforms.Compose([
    transforms.Resize((150,150)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(), #from 0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # from 0-1 to [-1,1], formula (x-mean)/std
                        [0.5,0.5,0.5])
    
])

In [4]:

TRAIN_DIRECTORY = "../datasets/scenary_detection_data/scenary_train/"
TEST_DIRECTORY = "../datasets/scenary_detection_data/scenary_test/"
PREDICTION_DATA = "../datasets/scenary_detection_data/scenary_pred/"

In [5]:
#DATALOADER
train_loader = DataLoader(
    torchvision.datasets.ImageFolder(TRAIN_DIRECTORY, transform = transformer),
    batch_size = 256, shuffle = True
)

test_loader = DataLoader(
    torchvision.datasets.ImageFolder(TEST_DIRECTORY, transform = transformer),
    batch_size = 256, shuffle = True
)

In [6]:
# categories
root = pathlib.Path(TRAIN_DIRECTORY)
classes = sorted([i.name.split('/')[-1] for i in root.iterdir()])
print(classes)

['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']


In [7]:
# CNN network 
class ConvNet(nn.Module):
    def __init__(self, num_classes = 6):
        super(ConvNet, self).__init__()

        # Input shape = (256,3,150,150)
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=12, kernel_size=3, stride=1, padding=1)
        # shape = (256,12,150,150)
        self.bn1 = nn.BatchNorm2d(num_features=12)
        # shape = (256,12,150,150)
        self.relu1=  nn.ReLU()
        # shape = (256,12,150,150)

        self.pool = nn.MaxPool2d(kernel_size=2)
        # shape = (256,12,75,75)

        self.conv2 = nn.Conv2d(in_channels=12, out_channels=20, kernel_size=3, stride=1, padding=1)
        # shape = (256,20,75,75)
        self.relu2=  nn.ReLU()
        # shape = (256,20,75,75)

        self.conv3 = nn.Conv2d(in_channels=20, out_channels=32, kernel_size=3, stride=1, padding=1)
        # shape = (256,32,75,75)
        self.bn3 = nn.BatchNorm2d(num_features=32)
        # shape = (256,32,75,75)
        self.relu3=  nn.ReLU()
        # shape = (256,32,75,75)

        self.fc = nn.Linear(in_features=32*75*75, out_features=num_classes)

    
    # Feed forward function
    def forward(self, input):
        output = self.conv1(input)
        output = self.bn1(output)
        output = self.relu1(output)

        output = self.pool(output)

        output = self.conv2(output)
        output = self.relu2(output)

        output = self.conv3(output)
        output = self.bn3(output)
        output = self.relu3(output)

        # this output is in matrix form with shape of (256,32,75,75)

        output = output.view(-1,32*75*75)
        output = self.fc(output)

        return output

            


In [8]:
model = ConvNet(num_classes=6).to(device)

In [9]:
#Optimizer and loss function
optimizer = Adam(model.parameters(),lr=0.001,weight_decay=0.0001)
loss_function = nn.CrossEntropyLoss()

In [10]:
num_epochs = 5

In [11]:
# calculating the size of training and testing images
train_count = len(glob.glob(TRAIN_DIRECTORY+'/**/*.jpg'))
test_count = len(glob.glob(TEST_DIRECTORY+'/**/*.jpg'))

In [12]:
print(train_count,test_count)

14034 3000


In [13]:
# Model training and saving the model
best_accuracy = 0.0

for epoch in range(num_epochs):

    # Evalution and training on training dataset
    model.train()
    train_accuracy = 0.0
    train_loss = 0.0

    for i, (images, labels) in enumerate(train_loader):
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())

        optimizer.zero_grad()

        outputs = model(images)
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()

        train_loss += loss.cpu().data*images.size(0)
        _,prediction = torch.max(outputs.data, 1)

        train_accuracy += int(torch.sum(prediction == labels.data))

    train_accuracy = train_accuracy/train_count
    train_loss = train_loss/train_count

    # Evalution on testing data
    model.eval()

    test_accuracy = 0.0
    for i, (images, labels) in enumerate(test_loader):
        if torch.cuda.is_available():
            images = Variable(images.cuda())
            labels = Variable(labels.cuda())

        outputs = model(images)
        _,prediction = torch.max(outputs.data, 1)
        test_accuracy += int(torch.sum(prediction == labels.data))

    test_accuracy = test_accuracy/test_count

    print('Epoch:' +str(epoch)+' -->'+' Train_Loss: ' +str(int(train_loss))+','+' Train Accuracy: ' +str(train_accuracy)+','+' Test Accuracy: ' +str(test_accuracy))

    # Save the best model
    if test_accuracy>best_accuracy:
        torch.save(model.state_dict(), 'best_checkpoint.model')
        best_accuracy = test_accuracy

Epoch: 0 Train_Loss: 18 Train Accuracy: 0.49401453612654983 Test Accuracy: 0.633
Epoch: 1 Train_Loss: 1 Train Accuracy: 0.7245974062989882 Test Accuracy: 0.6563333333333333
Epoch: 2 Train_Loss: 0 Train Accuracy: 0.7902949978623344 Test Accuracy: 0.7273333333333334
Epoch: 3 Train_Loss: 0 Train Accuracy: 0.854353712412712 Test Accuracy: 0.739
Epoch: 4 Train_Loss: 0 Train Accuracy: 0.8852786090922047 Test Accuracy: 0.7376666666666667


## Inferences

In [14]:
checkpoint_m = torch.load('best_checkpoint.model')
model = ConvNet(num_classes=6)
model.load_state_dict(checkpoint_m)
model.eval()

ConvNet(
  (conv1): Conv2d(3, 12, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn1): BatchNorm2d(12, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu1): ReLU()
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(12, 20, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (relu2): ReLU()
  (conv3): Conv2d(20, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu3): ReLU()
  (fc): Linear(in_features=180000, out_features=6, bias=True)
)

In [15]:
# Transforms
transformer_pred = transforms.Compose([
    transforms.Resize((150,150)),
    transforms.ToTensor(), #from 0-255 to 0-1, numpy to tensors
    transforms.Normalize([0.5,0.5,0.5], # from 0-1 to [-1,1], formula (x-mean)/std
                        [0.5,0.5,0.5])
])

In [16]:
# prediction function
def predict_model(img_path, transformer_pred):
    image = Image.open(img_path)
    image_tensor = transformer_pred(image).float()
    image_tensor = image_tensor.unsqueeze_(0)

    if torch.cuda.is_available():
        image_tensor.cuda()

    input = Variable(image_tensor)

    output = model(input)
    index = output.data.numpy().argmax()
    pred = classes[index]
    return pred 
    

In [17]:
images_path = glob.glob(PREDICTION_DATA+'/*.jpg')

In [21]:
pred_dict = {}

for i in images_path: 
    pred_dict[i[i.rfind('/')+1:]] = predict_model(i, transformer_pred)

In [23]:
len(pred_dict)

7301

In [26]:
print(dict(list(pred_dict.items())[5:15]))

{'4149.jpg': 'sea', '12550.jpg': 'buildings', '4986.jpg': 'glacier', '2595.jpg': 'glacier', '6682.jpg': 'street', '13550.jpg': 'sea', '4668.jpg': 'sea', '23856.jpg': 'street', '4166.jpg': 'glacier', '9433.jpg': 'forest'}
