In [1]:
from Ewa_loading_data_scene_kinds import MultiPartitioningClassifier
import yaml
from argparse import Namespace
import torch

with open('../config/baseM_Ewa.yml') as f:
    config = yaml.load(f, Loader=yaml.FullLoader)
    
model_params = config["model_params"]
tmp_model = MultiPartitioningClassifier(hparams=Namespace(**model_params))

train_data_loader = tmp_model.train_dataloader()
val_data_loader = tmp_model.val_dataloader()

In [2]:
it = iter(val_data_loader)
first_batch = next(it)
print(first_batch[1].shape)
print(torch.permute(first_batch[1], (0,3,1,2)).shape)

torch.Size([16, 224, 224, 3])
torch.Size([16, 3, 224, 224])


In [3]:
import os
import pandas as pd
import numpy as np
import torch
import torchvision
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms,models
import torch.nn as nn
from torch.nn import functional as F
from torch.optim import lr_scheduler
from sklearn.metrics import accuracy_score, confusion_matrix
from torchsummary import summary
import random
from sklearn.metrics import f1_score, confusion_matrix, precision_score, recall_score
from torchsummary import summary
import warnings
warnings.filterwarnings("ignore", message="numerical errors at iteration 0")


num_epochs = 600
num_classes_coarse = 3298
num_classes_middle = 7202
num_classes_fine = 12893
learning_rate = 0.001

multi_hop_dim_1 = 200
multi_hop_dim_2 = 120
multi_hop_dim_3 = 80
multi_hop_dim_4 = 50


class GeoClassification(nn.Module):

    def __init__(self):
        super(GeoClassification, self).__init__()

        self.rgb_model = models.resnet34(pretrained=True)  ## load pre-trained weights ##
        self.rgb_features = nn.Sequential(*list(self.rgb_model.children())[:-2])
        
        self.seg_model = models.resnet18(pretrained=True)  ## load pre-trained weights ##
        self.seg_features = nn.Sequential(*list(self.seg_model.children())[:-2])
        
        self.W_s1 = nn.Linear(512, multi_hop_dim_1)
        self.W_s2 = nn.Linear(multi_hop_dim_1, multi_hop_dim_2)
        self.W_s3 = nn.Linear(multi_hop_dim_2, multi_hop_dim_3)
        self.W_s4 = nn.Linear(multi_hop_dim_3, multi_hop_dim_4)
        
        self.rgb_linear_1 = nn.Linear(multi_hop_dim_4 * 512, 12000)
        self.rgb_linear_2 = nn.Linear(12000, 6000)
        
        self.seg_linear_1 = nn.Linear(multi_hop_dim_4 * 512, 12000)
        self.seg_linear_2 = nn.Linear(12000, 6000)
        
        self.atmf_1 = nn.Linear(6000,400)
        self.atmf_2 = nn.Linear(400,80)
        self.atmf_3 = nn.Linear(80,30)
        self.atmf_4 = nn.Linear(30,10)
        self.atmf_5 = nn.Linear(10,1)
        
        self.fc_concat1 = nn.Linear(12000,10000)
        self.fc_concat2 = nn.Linear(10000,6000)
        self.fc_concat3 = nn.Linear(6000, num_classes_coarse)
        
        self.dropout = nn.Dropout(p=0.20)
        self.relu = torch.nn.LeakyReLU()
        
    
    def attention_net(self, features):

        attn_weight_matrix = self.W_s4(self.dropout(self.relu(self.W_s3(self.dropout(self.relu(self.W_s2(self.dropout(self.relu(self.W_s1(features))))))))))
        attn_weight_matrix = attn_weight_matrix.permute(0, 2, 1)
        attn_weight_matrix = F.softmax(attn_weight_matrix, dim=2)

        return attn_weight_matrix


    def forward(self, rgb_image, seg_image):
        
        seg_image = torch.permute(seg_image, (0,3,1,2))
        
        rgb_features = self.rgb_features(rgb_image).reshape(-1, 512, 49).permute(0,2,1)
        seg_features = self.seg_features(seg_image).reshape(-1, 512, 49).permute(0,2,1)
        
        rgb_attention_weights = self.attention_net(rgb_features)
        seg_attention_weights = self.attention_net(seg_features)

        att_rgb_features = torch.bmm(rgb_attention_weights, rgb_features)
        att_rgb_features = att_rgb_features.view(-1, att_rgb_features.size()[1] * att_rgb_features.size()[2])

        att_seg_features = torch.bmm(seg_attention_weights, seg_features)
        att_seg_features = att_seg_features.view(-1, att_seg_features.size()[1] * att_seg_features.size()[2])
        
        att_rgb_features = self.rgb_linear_2(self.dropout(self.relu(self.rgb_linear_1(att_rgb_features))))
        att_seg_features = self.seg_linear_2(self.dropout(self.relu(self.seg_linear_1(att_seg_features))))
        
        
        s_rgb = self.atmf_5(self.dropout(self.relu(self.atmf_4(self.dropout(self.relu(self.atmf_3(self.dropout(self.relu(self.atmf_2(self.dropout(self.relu(self.atmf_1(att_rgb_features)))))))))))))
        s_seg = self.atmf_5(self.dropout(self.relu(self.atmf_4(self.dropout(self.relu(self.atmf_3(self.dropout(self.relu(self.atmf_2(self.dropout(self.relu(self.atmf_1(att_seg_features)))))))))))))

        s_comb = torch.cat((s_rgb, s_seg), 0)
        s_comb = F.softmax(s_comb, dim=0) + 1
        att_rgb_features = torch.mul(att_rgb_features, s_comb[0].item())
        att_seg_features = torch.mul(att_seg_features, s_comb[1].item())

        concat_embed = torch.cat((att_rgb_features, att_seg_features),1)
        
        concat_embed = self.fc_concat1(concat_embed)
        concat_embed = self.dropout(concat_embed)
        concat_embed = self.relu(concat_embed)


        concat_embed = self.fc_concat2(concat_embed)
        concat_embed = self.dropout(concat_embed)
        concat_embed = self.relu(concat_embed)

        
        output = self.fc_concat3(concat_embed)
        
        return (output)
    

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = GeoClassification()     
model = model.to(device)
model = nn.DataParallel(model)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
step_lr_scheduler = lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)

print(summary(model, [(3, 224, 224), (224, 224, 3)]))

    There is an imbalance between your GPUs. You may want to exclude GPU 0 which
    has less than 75% of the memory or cores of GPU 1. You can do so by setting
    the device_ids argument to DataParallel, or by setting the CUDA_VISIBLE_DEVICES
    environment variable.


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 64, 112, 112]           9,408
            Conv2d-2         [-1, 64, 112, 112]           9,408
            Conv2d-3         [-1, 64, 112, 112]           9,408
       BatchNorm2d-4         [-1, 64, 112, 112]             128
            Conv2d-5         [-1, 64, 112, 112]           9,408
       BatchNorm2d-6         [-1, 64, 112, 112]             128
              ReLU-7         [-1, 64, 112, 112]               0
              ReLU-8         [-1, 64, 112, 112]               0
       BatchNorm2d-9         [-1, 64, 112, 112]             128
        MaxPool2d-10           [-1, 64, 56, 56]               0
        MaxPool2d-11           [-1, 64, 56, 56]               0
      BatchNorm2d-12         [-1, 64, 112, 112]             128
             ReLU-13         [-1, 64, 112, 112]               0
             ReLU-14         [-1, 64, 1

In [4]:
import warnings
warnings.filterwarnings("ignore")

n_total_steps = len(train_data_loader)

batch_wise_loss = []
batch_wise_micro_f1 = []
batch_wise_macro_f1 = []
epoch_wise_macro_f1 = []
epoch_wise_micro_f1 = []

for epoch in range(num_epochs):
    for i, (rgb_image, seg_image, label, _, _, _) in enumerate(train_data_loader):
        rgb_image = rgb_image.type(torch.float32).to(device)
        seg_image = seg_image.type(torch.float32).to(device)

        label = label[0].to(device)
        
         # Forward pass
        model.train()
        outputs = model(rgb_image, seg_image)
        loss = criterion(outputs, label)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        step_lr_scheduler.step()
        
        if (i+1) % 20 == 0:
            print (f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
     
    target_total_test = []
    predicted_total_test = []

    with torch.no_grad():
        
        n_correct = 0
        n_samples = 0

        for i, (rgb_image, seg_image, label, _, _, _) in enumerate(val_data_loader):
            
            rgb_image = rgb_feature.type(torch.float32).to(device)
            seg_image = seg_image.type(torch.float32).to(device)

            label = label[0].to(device)


            # Forward pass
            model.eval()
            outputs = model(rgb_image, seg_image)
            #print(outputs)
            # max returns (value ,index)
            _, predicted = torch.max(outputs.data, 1)
            #print(label)
            #print(predicted)
            n_samples += label.size(0)
            n_correct += (predicted == label).sum().item()

            target_total_test.append(label)
            predicted_total_test.append(predicted)

            target_inter = [t.cpu().numpy() for t in target_total_test]
            predicted_inter = [t.cpu().numpy() for t in predicted_total_test]
            target_inter =  np.stack(target_inter, axis=0).ravel()
            predicted_inter =  np.stack(predicted_inter, axis=0).ravel()

        current_macro = f1_score(target_inter, predicted_inter, average="macro")
        epoch_wise_macro_f1.append(f1_score(target_inter, predicted_inter, average="macro"))
        epoch_wise_micro_f1.append(f1_score(target_inter, predicted_inter, average="micro"))

        
        acc = 100.0 * n_correct / n_samples
        print(f'Accuracy of the network on the test set after Epoch {epoch+1} is: {acc} %')        
        print(f' Micro F1 on the testing: {f1_score(target_inter, predicted_inter, average="micro")}')
        print(f' Macro F1 on the testing: {f1_score(target_inter, predicted_inter, average="macro")}')
        print(f' Precision on the testing: {precision_score(target_inter, predicted_inter)}')
        print(f' Recall on the testing: {recall_score(target_inter, predicted_inter)}')
        print(confusion_matrix(target_inter, predicted_inter))   
        print(f'Best Macro F1 on test set till this epoch: {max(epoch_wise_macro_f1)} Found in Epoch No: {epoch_wise_macro_f1.index(max(epoch_wise_macro_f1))+1}')
        print(f'Best Micro F1 on test set till this epoch: {max(epoch_wise_micro_f1)} Found in Epoch No: {epoch_wise_micro_f1.index(max(epoch_wise_micro_f1))+1}')

RuntimeError: CUDA out of memory. Tried to allocate 458.00 MiB (GPU 0; 23.65 GiB total capacity; 21.51 GiB already allocated; 362.56 MiB free; 21.78 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [18]:
import numpy as np
from sklearn.metrics import top_k_accuracy_score
import torch

def topk_accuracy(target, output, k):
    topn = np.argsort(output, axis = 1)[:,-k:]
    return np.mean(np.array([1 if target[k] in topn[k] else 0 for k in range(len(topn))]))

y_true = np.array([0, 1, 2, 2])
y_score = np.array([[0.5, 0.2, 0.2],  # 0 is in top 2
                    [0.3, 0.4, 0.2],  # 1 is in top 2
                    [0.2, 0.4, 0.3],  # 2 is in top 2
                    [0.7, 0.2, 0.1]]) # 2 isn't in top 2
print(topk_accuracy(y_true, y_score, k = 2))

0.75
