In [1]:
# importing the libraries
import numpy as np

# for evaluating the model
from sklearn.metrics import accuracy_score
from tqdm import tqdm

# PyTorch libraries and modules
import torch
from torch.autograd import Variable
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout
from torch.optim import Adam, SGD

import pickle
def load_data(in_dir):
    f = open(in_dir,'rb')
    train_data,train_label,valid_data,valid_label = pickle.load(f)
    return train_data,train_label,valid_data,valid_label

data_path = 'adress_spectograms.pkl'
checkpoint = 'checkpoint/'

train_data,train_label,valid_data,valid_label = load_data(data_path)

In [2]:
# converting training images into torch format
train_x = train_data
train_x  = torch.from_numpy(train_x)

# converting the target into torch format
train_y = train_label
train_y = train_y.reshape(2267).astype(float);
train_y = torch.from_numpy(train_y)


# shape of training data
# train_x.shape, train_y.shape
from torch.utils.data import TensorDataset, DataLoader



CTX = torch.device('cuda')

train_dataset = TensorDataset(train_x.to(device = CTX, dtype=torch.float),train_y.to(device = CTX, dtype=torch.float)) # create your datset

 # create your dataloader


In [3]:
# converting training images into torch format
val_x = valid_data
val_x  = torch.from_numpy(val_x)

# converting the target into torch format
val_y = valid_label
val_y = val_y.reshape(567).astype(float);
val_y = torch.from_numpy(val_y)
# shape of training data

val_dataset = TensorDataset(val_x,val_y) # create your datset

In [4]:
from torch.utils.data.dataloader import DataLoader
from torch.utils.data import random_split

batch_size = 16
# val_size = 297
# train_size = train_x.size(0) - val_size 

# train_data,val_data = random_split(dataset,[train_size,val_size])
# print(f"Length of Train Data : {len(train_data)}")
# print(f"Length of Validation Data : {len(val_data)}")

#output
#Length of Train Data : 2379
#Length of Validation Data : 297

#load the train and validation into batches.
train_dl = DataLoader(train_dataset,batch_size, shuffle = True, num_workers = 0)
val_dl = DataLoader(val_dataset, batch_size*2, num_workers = 0)

In [5]:
import torch.nn as nn
import torch.nn.functional as F

class ImageClassificationBase(nn.Module):
    
    def training_step(self, batch):
        images, labels = batch 
        out = self(images.to(CTX))                  # Generate predictions
        loss = F.cross_entropy(out, labels.to(torch.int64).cuda()) # Calculate loss
        return loss
    
    def validation_step(self, batch):
        images, labels = batch 
        out = self(images.to(device = CTX, dtype=torch.float))                    # Generate predictions
        loss = F.cross_entropy(out, labels.to(torch.int64).cuda())   # Calculate loss
        acc = accuracy(out, labels.to(torch.int64).cuda())           # Calculate accuracy
        return {'val_loss': loss.detach(), 'val_acc': acc}
        
    def validation_epoch_end(self, outputs):
        batch_losses = [x['val_loss'] for x in outputs]
        epoch_loss = torch.stack(batch_losses).mean()   # Combine losses
        batch_accs = [x['val_acc'] for x in outputs]
        epoch_acc = torch.stack(batch_accs).mean()      # Combine accuracies
        return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
    def epoch_end(self, epoch, result):
        print("Epoch [{}], train_loss: {:.6f}, val_loss: {:.4f}, val_acc: {:.4f}".format(
            epoch, result['train_loss'], result['val_loss'], result['val_acc']))
        
        

import torch
import math
import torch.nn as nn
import torch.nn.functional as F

# importing the libraries
import numpy as np

# for evaluating the model
from sklearn.metrics import accuracy_score
from tqdm import tqdm

# PyTorch libraries and modules
import torch
from torch.autograd import Variable
from torch.nn import Linear, ReLU, CrossEntropyLoss, Sequential, Conv2d, MaxPool2d, Module, Softmax, BatchNorm2d, Dropout
from torch.optim import Adam, SGD

class BasicConv(nn.Module):
    def __init__(self, in_planes, out_planes, kernel_size, stride=1, padding=0, dilation=1, groups=1, relu=True, bn=True, bias=False):
        super(BasicConv, self).__init__()
        self.out_channels = out_planes
        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size, stride=stride, padding=padding, dilation=dilation, groups=groups, bias=bias)
        self.bn = nn.BatchNorm2d(out_planes,eps=1e-5, momentum=0.01, affine=True) if bn else None
        self.relu = nn.ReLU() if relu else None

    def forward(self, x):
        x = self.conv(x)
        if self.bn is not None:
            x = self.bn(x)
        if self.relu is not None:
            x = self.relu(x)
        return x

class Flatten(nn.Module):
    def forward(self, x):
        return x.view(x.size(0), -1)

class ChannelGate(nn.Module):
    def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max']):
        super(ChannelGate, self).__init__()
        self.gate_channels = gate_channels
        self.mlp = nn.Sequential(
            Flatten(),
            nn.Linear(gate_channels, gate_channels // reduction_ratio),
            nn.ReLU(),
            nn.Linear(gate_channels // reduction_ratio, gate_channels)
            )
        self.pool_types = pool_types
    def forward(self, x):
        channel_att_sum = None
        for pool_type in self.pool_types:
            if pool_type=='avg':
                avg_pool = F.avg_pool2d( x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
                channel_att_raw = self.mlp( avg_pool )
            elif pool_type=='max':
                max_pool = F.max_pool2d( x, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
                channel_att_raw = self.mlp( max_pool )
            elif pool_type=='lp':
                lp_pool = F.lp_pool2d( x, 2, (x.size(2), x.size(3)), stride=(x.size(2), x.size(3)))
                channel_att_raw = self.mlp( lp_pool )
            elif pool_type=='lse':
                # LSE pool only
                lse_pool = logsumexp_2d(x)
                channel_att_raw = self.mlp( lse_pool )

            if channel_att_sum is None:
                channel_att_sum = channel_att_raw
            else:
                channel_att_sum = channel_att_sum + channel_att_raw

        scale = F.sigmoid( channel_att_sum ).unsqueeze(2).unsqueeze(3).expand_as(x)
#         print(x.shape())
        return x * scale

def logsumexp_2d(tensor):
    tensor_flatten = tensor.view(tensor.size(0), tensor.size(1), -1)
    s, _ = torch.max(tensor_flatten, dim=2, keepdim=True)
    outputs = s + (tensor_flatten - s).exp().sum(dim=2, keepdim=True).log()
    return outputs

class ChannelPool(nn.Module):
    def forward(self, x):
        return torch.cat( (torch.max(x,1)[0].unsqueeze(1), torch.mean(x,1).unsqueeze(1)), dim=1 )

class SpatialGate(nn.Module):
    def __init__(self):
        super(SpatialGate, self).__init__()
        kernel_size = 7
        self.compress = ChannelPool()
        self.spatial = BasicConv(2, 1, kernel_size, stride=1, padding=(kernel_size-1) // 2, relu=False)
    def forward(self, x):
        x_compress = self.compress(x)
        x_out = self.spatial(x_compress)
        scale = F.sigmoid(x_out) # broadcasting
        return x * scale

class CBAM(nn.Module):
    def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False):
        super(CBAM, self).__init__()
        self.ChannelGate = ChannelGate(gate_channels, reduction_ratio, pool_types)
        self.no_spatial=no_spatial
        if not no_spatial:
            self.SpatialGate = SpatialGate()
    def forward(self, x):
        x_out = self.ChannelGate(x)
        if not self.no_spatial:
            x_out = self.SpatialGate(x_out)
        return x_out

class SAM(nn.Module):
    def __init__(self, gate_channels, reduction_ratio=16, pool_types=['avg', 'max'], no_spatial=False):
        super(SAM, self).__init__()
        self.ChannelGate = ChannelGate(gate_channels, reduction_ratio, pool_types)
        self.SpatialGate = SpatialGate()
        self.sigmoid = torch.nn.Sigmoid()
    def forward(self, x):
        M_c = self.ChannelGate(x)
        M_s = self.SpatialGate(x)
        
        M_f = self.sigmoid(M_c + M_s)
        
        
        return x + x*M_f
    


In [6]:
class Att_Net(ImageClassificationBase):   
    def __init__(self):
        super(Att_Net, self).__init__()
        #The LW_CNN module utilizes three convolutions (C), two max-pooling
        # (MP), one average-pooling (AP), and one batch normalization
        # (BN) layer.
        self.LW_CNN = Sequential(
            # Defining a 2D convolution layer
#             Conv2d(300, 256, kernel_size=3, stride=1, padding=1),
#             BatchNorm2d(256),
#             ReLU(inplace=True),
#             MaxPool2d(kernel_size=2, stride=2),
#             # Defining another 2D convolution layer
#             Conv2d(256, 128, kernel_size=3, stride=1, padding=1),
#             BatchNorm2d(128),
#             ReLU(inplace=True),
#             MaxPool2d(kernel_size=1, stride=1),
            
              #C1, we used 120 number of kernels with size (11
              # × 11) using (4 × 4) stride setting without padding to extract
              # initially hidden patterns from input data. 
              Conv2d(2, 120, kernel_size=(11,11), stride=(4,4), padding=0),
              MaxPool2d(kernel_size=(3,3), stride=None),
              Conv2d(120, 256, kernel_size=(5,5), stride=(1,1), padding='same'),
              MaxPool2d(kernel_size=(3,3)),
              Conv2d(256, 384, kernel_size=(3,3), padding='same'),
#               MaxPool2d(kernel_size=1, stride=0),
#               Conv2d(128, 1, kernel_size=(3,3), stride=(1,1), padding='same'),
              torch.nn.AvgPool2d(kernel_size=(1,1)),
              BatchNorm2d(384),
              ReLU(inplace=True),
        )

        self.linear_layers = Sequential(
            Linear(384 * 9 * 1, 256),
            Linear(256, 64),
            Linear(64, 2),
        )

        self.attention = SAM(gate_channels=384)
        

    # Defining the forward pass    
    def forward(self, x):
#         x = x.view(-1, x.size(3),x.size(2),x.size(1))
#         print(x.size)
        x = self.LW_CNN(x)
        x = self.attention(x)
        x = x.view(x.size(0), -1)
        x = self.linear_layers(x)
        return x

In [7]:
# defining the model
model = Att_Net()
# defining the optimizer
optimizer = Adam(model.parameters(), lr=0.07)
# defining the loss function
criterion = CrossEntropyLoss()
# checking if GPU is available
if torch.cuda.is_available():
    model = model.cuda()
    criterion = criterion.cuda()
    
print(model)

Att_Net(
  (LW_CNN): Sequential(
    (0): Conv2d(2, 120, kernel_size=(11, 11), stride=(4, 4))
    (1): MaxPool2d(kernel_size=(3, 3), stride=(3, 3), padding=0, dilation=1, ceil_mode=False)
    (2): Conv2d(120, 256, kernel_size=(5, 5), stride=(1, 1), padding=same)
    (3): MaxPool2d(kernel_size=(3, 3), stride=(3, 3), padding=0, dilation=1, ceil_mode=False)
    (4): Conv2d(256, 384, kernel_size=(3, 3), stride=(1, 1), padding=same)
    (5): AvgPool2d(kernel_size=(1, 1), stride=(1, 1), padding=0)
    (6): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): ReLU(inplace=True)
  )
  (linear_layers): Sequential(
    (0): Linear(in_features=3456, out_features=256, bias=True)
    (1): Linear(in_features=256, out_features=64, bias=True)
    (2): Linear(in_features=64, out_features=2, bias=True)
  )
  (attention): SAM(
    (ChannelGate): ChannelGate(
      (mlp): Sequential(
        (0): Flatten()
        (1): Linear(in_features=384, out_features=24, bias=True

In [8]:
from torchsummary import summary
summary(model, (2, 64, 344))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1          [-1, 120, 14, 84]          29,160
         MaxPool2d-2           [-1, 120, 4, 28]               0
            Conv2d-3           [-1, 256, 4, 28]         768,256
         MaxPool2d-4            [-1, 256, 1, 9]               0
            Conv2d-5            [-1, 384, 1, 9]         885,120
         AvgPool2d-6            [-1, 384, 1, 9]               0
       BatchNorm2d-7            [-1, 384, 1, 9]             768
              ReLU-8            [-1, 384, 1, 9]               0
           Flatten-9                  [-1, 384]               0
           Linear-10                   [-1, 24]           9,240
             ReLU-11                   [-1, 24]               0
           Linear-12                  [-1, 384]           9,600
          Flatten-13                  [-1, 384]               0
           Linear-14                   



In [9]:
def accuracy(outputs, labels):
    _, preds = torch.max(outputs, dim=1)
    return torch.tensor(torch.sum(preds == labels).item() / len(preds))

  
@torch.no_grad()
def evaluate(model, val_loader):
    model.eval()
    outputs = [model.validation_step(batch) for batch in val_loader]
    return model.validation_epoch_end(outputs)

  
def fit(epochs, lr, model, train_loader, val_loader, opt_func = torch.optim.SGD):
    
    history = []
    optimizer = opt_func(model.parameters(),lr)
    for epoch in range(epochs):
        
        model.train()
        train_losses = []
        for batch in train_loader:
            loss = model.training_step(batch)
            train_losses.append(loss)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            
        result = evaluate(model, val_loader)
        result['train_loss'] = torch.stack(train_losses).mean().item()
        model.epoch_end(epoch, result)
        history.append(result)
    
    return history

In [10]:
# CTX = torch.device('cuda')
# train_dl.to(CTX)  #train_dataset.train_data is a Tensor(input data)
# # train_dl.train_labels.to(CTX)
# # val_dl.train_data.to(CTX)  #train_dataset.train_data is a Tensor(input data)
# # val_dl.train_labels.to(CTX)
num_epochs = 400
opt_func = torch.optim.Adam
lr = 0.001
#fitting the model on training data and record the result after each epoch
history = fit(num_epochs, lr, model, train_dl, val_dl, opt_func)


Epoch [0], train_loss: 0.7890, val_loss: 0.7011, val_acc: 0.5613
Epoch [1], train_loss: 0.7147, val_loss: 0.7241, val_acc: 0.5206
Epoch [2], train_loss: 0.7110, val_loss: 0.7041, val_acc: 0.5457
Epoch [3], train_loss: 0.6897, val_loss: 0.6760, val_acc: 0.5620
Epoch [4], train_loss: 0.6840, val_loss: 0.6794, val_acc: 0.5831
Epoch [5], train_loss: 0.6802, val_loss: 0.7106, val_acc: 0.5008
Epoch [6], train_loss: 0.6746, val_loss: 0.9277, val_acc: 0.4783
Epoch [7], train_loss: 0.6739, val_loss: 1.0432, val_acc: 0.4783
Epoch [8], train_loss: 0.6615, val_loss: 0.7177, val_acc: 0.5636
Epoch [9], train_loss: 0.6446, val_loss: 0.6757, val_acc: 0.6008
Epoch [10], train_loss: 0.6416, val_loss: 0.6804, val_acc: 0.5817
Epoch [11], train_loss: 0.6226, val_loss: 0.8687, val_acc: 0.4935
Epoch [12], train_loss: 0.6100, val_loss: 0.6679, val_acc: 0.6199
Epoch [13], train_loss: 0.5898, val_loss: 0.7893, val_acc: 0.5428
Epoch [14], train_loss: 0.5556, val_loss: 0.6801, val_acc: 0.5994
Epoch [15], train_lo

Epoch [124], train_loss: 0.0000, val_loss: 5.8729, val_acc: 0.5765
Epoch [125], train_loss: 0.0000, val_loss: 5.7818, val_acc: 0.5845
Epoch [126], train_loss: 0.0000, val_loss: 6.0353, val_acc: 0.5776
Epoch [127], train_loss: 0.0000, val_loss: 6.1616, val_acc: 0.5717
Epoch [128], train_loss: 0.0000, val_loss: 6.0794, val_acc: 0.5682
Epoch [129], train_loss: 0.0000, val_loss: 6.2709, val_acc: 0.5665
Epoch [130], train_loss: 0.0000, val_loss: 6.3082, val_acc: 0.5682
Epoch [131], train_loss: 0.0000, val_loss: 6.3669, val_acc: 0.5700
Epoch [132], train_loss: 0.0000, val_loss: 6.4474, val_acc: 0.5700
Epoch [133], train_loss: 0.0000, val_loss: 6.4645, val_acc: 0.5700
Epoch [134], train_loss: 0.0000, val_loss: 6.5214, val_acc: 0.5724
Epoch [135], train_loss: 0.0000, val_loss: 6.7639, val_acc: 0.5793
Epoch [136], train_loss: 0.0000, val_loss: 6.7460, val_acc: 0.5776
Epoch [137], train_loss: 0.0000, val_loss: 6.9229, val_acc: 0.5741
Epoch [138], train_loss: 0.0000, val_loss: 6.6310, val_acc: 0.

Epoch [247], train_loss: 0.0001, val_loss: 5.5161, val_acc: 0.5668
Epoch [248], train_loss: 0.0001, val_loss: 5.7647, val_acc: 0.5675
Epoch [249], train_loss: 0.0001, val_loss: 5.8897, val_acc: 0.5692
Epoch [250], train_loss: 0.0001, val_loss: 6.0081, val_acc: 0.5623
Epoch [251], train_loss: 0.0001, val_loss: 6.1667, val_acc: 0.5553
Epoch [252], train_loss: 0.0000, val_loss: 6.1461, val_acc: 0.5623
Epoch [253], train_loss: 0.0000, val_loss: 6.5247, val_acc: 0.5543
Epoch [254], train_loss: 0.0000, val_loss: 6.3586, val_acc: 0.5595
Epoch [255], train_loss: 0.0000, val_loss: 6.4625, val_acc: 0.5595
Epoch [256], train_loss: 0.0000, val_loss: 6.3901, val_acc: 0.5612
Epoch [257], train_loss: 0.0000, val_loss: 6.5311, val_acc: 0.5612
Epoch [258], train_loss: 0.0000, val_loss: 6.6263, val_acc: 0.5595
Epoch [259], train_loss: 0.0000, val_loss: 6.4759, val_acc: 0.5623
Epoch [260], train_loss: 0.0000, val_loss: 6.7901, val_acc: 0.5560
Epoch [261], train_loss: 0.0000, val_loss: 6.7000, val_acc: 0.

Epoch [370], train_loss: 0.0000, val_loss: 7.6929, val_acc: 0.5637
Epoch [371], train_loss: 0.0000, val_loss: 7.5352, val_acc: 0.5516
Epoch [372], train_loss: 0.0000, val_loss: 7.4940, val_acc: 0.5516
Epoch [373], train_loss: 0.0000, val_loss: 7.7806, val_acc: 0.5481
Epoch [374], train_loss: 0.0000, val_loss: 7.8324, val_acc: 0.5481
Epoch [375], train_loss: 0.0000, val_loss: 7.3649, val_acc: 0.5457
Epoch [376], train_loss: 0.0000, val_loss: 7.8382, val_acc: 0.5516
Epoch [377], train_loss: 0.0000, val_loss: 7.8848, val_acc: 0.5481
Epoch [378], train_loss: 0.0000, val_loss: 7.7550, val_acc: 0.5533
Epoch [379], train_loss: 0.0000, val_loss: 7.9631, val_acc: 0.5498
Epoch [380], train_loss: 0.0000, val_loss: 8.0728, val_acc: 0.5533
Epoch [381], train_loss: 0.0000, val_loss: 7.8943, val_acc: 0.5516
Epoch [382], train_loss: 0.0000, val_loss: 7.9707, val_acc: 0.5550
Epoch [383], train_loss: 0.0000, val_loss: 8.1556, val_acc: 0.5481
Epoch [384], train_loss: 0.0000, val_loss: 7.9883, val_acc: 0.