In [None]:
!pip install timm

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import torch
from torch import nn
import torch.nn.functional as F
import torchvision.transforms as transforms  
import torchvision
import os
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from torch.utils.data import random_split

import cv2
import pandas as pd
import torchvision.transforms as transforms 
from torchvision.transforms import ToTensor,Normalize, RandomHorizontalFlip, Resize
from torch.utils.data import DataLoader
from torch.utils.data.sampler import SubsetRandomSampler
from torch.autograd import Variable

import timm
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session



below will load data and give transformer settings

In [None]:
data_dir_Train = "/kaggle/input/intel-image-classification/seg_train"
data_dir_Test = "/kaggle/input/intel-image-classification/seg_test"
data_dir_pred = "/kaggle/input/intel-image-classification/seg_pred/seg_pred"

train_dir = data_dir_Train + "/seg_train"




CNN_transforms = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.RandomHorizontalFlip(p=0.5), # randomly flip and rotate
    transforms.ColorJitter(0.3,0.4,0.4,0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.425, 0.415, 0.405), (0.205, 0.205, 0.205))
])

attention_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(p=0.3), # randomly flip and rotate
#     transforms.RandomVerticalFlip(p=0.5),
#     transforms.ColorJitter(0.3,0.4,0.4,0.2),
    transforms.ToTensor(),
    transforms.Normalize((0.425, 0.415, 0.405), (0.205, 0.205, 0.205))
#     transforms.Normalize((0.4951, 0.4982, 0.4979), (0.2482, 0.2467, 0.2807))
])

train = torchvision.datasets.ImageFolder(train_dir, transform=attention_transform)

val_size = int(len(train) * 0.2)
null_size = int(len(train) * 0.2)
train_size = len(train) - val_size

train_ds, val_ds= random_split(train, [train_size, val_size])

Below is code for checking whether enlarging an image is valid. 

The method demonstrated is a typical way of loading an image to display. 

In [None]:
from torchvision.io import read_image
img_path = "../input/intel-image-classification/seg_pred/seg_pred/10004.jpg"
test_image = Image.open(img_path).convert("RGB")
test_image.show()
# test_image_2 = attention_transform(test_image)
# test_image_2
# # # test_image_2
# tensor_image_2

In [None]:
train_loader = DataLoader(train_ds, batch_size=64, shuffle=True, num_workers=2, pin_memory=True)
valid_loader = DataLoader(val_ds, batch_size=128, num_workers=2, pin_memory=True)


below gives device settings

In [None]:
device_use =  torch.device('cuda' if torch.cuda.is_available() else 'cpu')

Following will be setup for models

below is the code for training loops

In [None]:
timm.list_models()

In [None]:
# # code below is for revealing the structure of model

# sample_model = timm.create_model('vit_base_patch16_224', num_classes=6)

# sample_model
# torch.manual_seed(42)
model = timm.create_model('vit_base_patch16_224', pretrained=False)
model.head = nn.Linear(model.head.in_features, 6)
# model.load_state_dict(torch.load('../input/model-states/vit_normal_model_state.pt'))

# below chunk is for loading state dict for CPU
model.load_state_dict(torch.load('../input/model-states/VIT_model_state.pt', map_location=torch.device('cpu')))


model.to(device_use)
model

In [None]:
# below loads convolution NN

model = torchvision.models.wide_resnet50_2(pretrained=False)

# for param in model.parameters():
#     param.required_grad = False


num_ftrt = model.fc.in_features

model.fc = nn.Linear(num_ftrt,6)
model.load_state_dict(torch.load('../input/model-states/cnn_model_state.pt', map_location=torch.device('cpu')))
model.to(device_use)
model

In [None]:
# configuration for optimizer
import torch.optim as optim

criterion = nn.CrossEntropyLoss()

# specify optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones=[3, 5], gamma=0.1)

In [None]:
# load previously saved model for further training
model.load_state_dict(torch.load('./model_state.pt'))
model.to(device_use)


In [None]:
@torch.no_grad()
def evaluate(model):
    valid_loss = 0.0
    model.eval()
    for batch_idx, (data, target) in enumerate(valid_loader):
        
        # move tensor to gpu
        if torch.cuda.is_available():
            data, target = data.to(device_use), target.to(device_use)
        # forward pass: compute the validation predictions
        output = model(data)
        # calculate the loss
        loss = criterion(output, target)
        # update the validation loss 
        valid_loss += loss.item()*data.size(0)
        if(batch_idx % 5 == 0):
            print(batch_idx, valid_loss)
    return valid_loss

In [None]:
len(train_loader.sampler)
valid_loss_min = np.Inf
val_loss = []
tn_loss = []

Warning: running training code can be cumulative. Fortunately the saving option ensures over-trained model won't be saved. 
By the way here tracks how many times below code has run. Thus trained for $x * recorded$ many epoches. 

Note: sometimes to overcome local minima, the model's validation loss will be high for at most 2-3 rounds(lr=1e-2). Thus should set an early stopping rate of 5 epoches for safety. 
And, patience is required, as 10 epoches of training can take 30 minutes. But the result is worthful. 

training consists of: 
20 epoches with lr=1e-2, overfit after 15 epoches

10 epoches with lr=5e-4

reached accuracy of 0.85

In [None]:
valid_loss_min

In [None]:
# number of epochs for training set
import time

epochs = 7
torch.cuda.empty_cache()
# track change in validation loss


for epoch in range(epochs):

    # keep track of training and validation loss
    train_loss = 0.0
    valid_loss = 0.0
    time_start = time.time()
    
    # Train the model
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader): 

        # move tensor to gpu if cuda is available
        if torch.cuda.is_available:
            data, target = data.to(device_use), target.to(device_use)
        # clear the gradiant of all optimizer variable
        optimizer.zero_grad()
        # forward pass: compute pradictions by passing inputs
        output = model(data)
        # calculate batch loss

        loss = criterion(output, target)

        # backward pass: compute gradiant of the loss with respect to the parameters
        loss.backward()

        # update parameters by optimizing single step
        optimizer.step()

        # update training loss
        train_loss += loss.item()*data.size(0)
        if(batch_idx % 10 == 0):
            print(batch_idx, train_loss)
    valid_loss = evaluate(model)
    # calculate average loss
    train_loss = train_loss/len(train_loader.sampler)
    valid_loss = valid_loss/len(valid_loader.sampler)
    val_loss.append(valid_loss)
    tn_loss.append(train_loss)
    # update learning rate
    scheduler.step()
    # Print the train and validation loss statistic
    print('Epoch: {} \t Training Loss: {:.3f} \t Validation Loss: {:.3f}'.format(epoch, train_loss, valid_loss))
    
    # save model if validation loss decrease
    if valid_loss <= valid_loss_min:
        print("Validation loss decreased {:.4f}--->{:.4f}  Saving model...".format(valid_loss_min, valid_loss))
        # save current model
        torch.save(model.state_dict(), 'model_state.pt')
        valid_loss_min = valid_loss
    print('Learning Rate ------------->{:.4f}'.format(optimizer.state_dict()['param_groups'][0]['lr']))

    time_end = time.time()
    print("training time for epoch {} is: {}".format(epoch, time_end - time_start))

In [None]:
# below is the code segments for model evaluation. place them inside a function!!!


@torch.no_grad()
def accuracy_examine():
    valid_accuracy = 0.0
    model.eval()

    for batch_idx, (data, target) in enumerate(valid_loader):

        # move tensors to GPU if CUDA is available
        if device_use.type == "cuda":
            data, target = data.cuda(), target.cuda()
#         elif device_use.type == "xla":
#             data = data.to(device_use, dtype=torch.float32)
#             target = target.to(device_use, dtype=torch.int64)

        with torch.no_grad():
            # forward pass: compute predicted outputs by passing inputs to the model
            output = model(data)
            # calculate the batch loss
            
            # Calculate Accuracy
            accuracy = (output.argmax(dim=1) == target).float().mean()
            # update average validation loss and accuracy
            
            valid_accuracy += accuracy
#         if count % 20 == 0:
#             print(count, valid_accuracy)
    print(valid_accuracy)
    print(len(valid_loader))
    print(valid_accuracy / len(valid_loader))

accuracy_examine()

Starting from now, will implement saliency map based on trained model from above. 

General idea is: extract the gradient of input from backward pass of evaluating an image(which requires forward pass). 

In [None]:
model.eval()


img_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# load single image
sample_building = "../input/intel-image-classification/seg_test/seg_test/buildings/20064.jpg"
sample_forest = "../input/intel-image-classification/seg_test/seg_test/forest/20062.jpg"
sample_glacier = "../input/intel-image-classification/seg_test/seg_test/glacier/20111.jpg"
sample_mountain = "../input/intel-image-classification/seg_test/seg_test/mountain/20120.jpg"
sample_sea = "../input/intel-image-classification/seg_test/seg_test/sea/20106.jpg"
sample_street = "../input/intel-image-classification/seg_test/seg_test/street/20070.jpg"

samples = [sample_building, sample_forest, sample_glacier, sample_mountain, sample_sea, sample_street]
target_sample = [0, 1, 2, 3, 4, 5]
from torchvision.io import read_image
for i in range(len(samples)):
    target_cpu = torch.Tensor([target_sample[i]]).type(torch.LongTensor)
    target = target_cpu.to(device_use)
    sample = samples[i]
    img = Image.open(sample).convert("RGB")

    img_show = img_transform(img)
    
    img_t = attention_transform(img)
    img_input_cpu = Variable(img_t, requires_grad=True).unsqueeze(dim=0)
    img_input = img_input_cpu.to(device_use)
    model_out = model(img_input)
    img_input.retain_grad()
    print(model_out)
    loss = model_out[0][i]
    loss.backward()
    images_grads = img_input.grad.data
    abs_images_grads = images_grads.abs()
    saliency, _ = abs_images_grads.max(dim=1)
    
    saliency = saliency.cpu().numpy()
    N = img_input.shape[0]
    for i in range(N):
        plt.subplot(2, N, i + 1)
        plt.imshow(img_show[i])
        plt.axis('off')
        plt.subplot(2, N, N + i + 1)
        plt.imshow(saliency[i], cmap=plt.cm.hot)
        plt.axis('off')
        plt.gcf().set_size_inches(12, 5)
    plt.show()

