**Import necessary modules**
nn and optim for neural network construction and optimizers


In [28]:
import torch, cv2, os, random, json, time, sys
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import _LRScheduler
import math

In [29]:
# This is a test for the imported torch library

# Assume input feature map with dimensions [batch_size, channels, height, width]
input_feature_map = torch.randn(1, 5, 5)  # Example input

# Define global average pooling operation
global_avg_pool = nn.AdaptiveAvgPool2d(1)
global_max_pool = nn.AdaptiveMaxPool2d(1)
convolution = nn.Conv2d(3, 5, 3)

# Apply global average pooling to input
map = global_max_pool(input_feature_map)

print(map.shape)
print(input_feature_map)
print(map)

torch.Size([1, 1, 1])
tensor([[[-1.0334,  1.8149, -0.5768, -1.2554, -0.9456],
         [ 0.0864, -0.2413, -0.2717,  1.8801,  0.0804],
         [ 0.6446, -1.8619,  0.6822, -0.8070,  0.7017],
         [ 0.2820,  0.3635, -0.8231, -0.5546, -0.7125],
         [-1.0746,  0.7954,  1.0528,  0.6721,  1.5809]]])
tensor([[[1.8801]]])


# Model Architecture
* Individual Components:
    * CBAM Module
    * Channel Attention
    * Spatial Attention
    * DCNN (Deep Convolutional Neural Network)

Convolutional Block Attention Module:
This allows the neural network to focus on specific aspects of the image and improves
the representation of interests. If, for example, the input feature map tensor dimensions
are 6 x 127 x 127, the output will have the same dimensions. The CBAM module works on each feature map and enhances certain aspects of each feature map.

# Proposed Architecture for this task:
**Channel Attention**


**Spatial Attention**


**CBAM (Convolutional Block Attention Module)**


**CNN Feature Extracion**
Input (512 x 512) ->

6-filter Conv2d (output 6 x 510 x 510) (3 x 3 filter) ->
CBAM (output 6 x 510 x 510) ->
MaxPool (output 6 x 255 x 255) (2 x 2 window) ->

12-filter Conv2d (output 12 x 253 x 253) (3 x 3 filter) ->
CBAM (output 12 x 253 x 253) ->
MaxPool (output 12 x 126 x 126) (2 x 2 window) ->

18-filter Conv2d (output 18 x 124 x 124) (3 x 3 filter) ->
CBAM (output 18 x 124 x 124) ->
MaxPool (output 18 x 62 x 62) (2 x 2 window) ->

24-filter Conv2d (output 24 x 60 x 60) (3 x 3 filter) ->
CBAM (output 24 x 60 x 60) ->
MaxPool (output 24 x 30 x 30) (2 x 2 window) ->

30-filter Conv2d (output 30 x 28 x 28) (3 x 3 filter) ->
CBAM (output 30 x 28 x 28) ->
MaxPool (output 30 x 14 x 14) (2 x 2 window) ->

Flatten the feature maps to serve as inputs to DNN (5880 inputs) ->
DNN (output 4)

**DNN Classification**
L1 (5880) ->
ReLU ->
L2 (1024) ->
ReLU ->
L3 (512) ->
ReLU ->
L4 (256) ->
ReLU ->
L5 (128) ->
ReLU ->
L6 (64) ->
ReLU ->
L7 (32) ->
ReLU ->
L8 (4) ->
Softmax ->

Output: Classification probabilities (4 x 1)

In [30]:
class channel_attention(nn.Module):
    def __init__(self, in_channels, reduction_ratio=4):
        super(channel_attention, self).__init__()

        self.avg_pool = nn.AdaptiveAvgPool2d(1)
        self.max_pool = nn.AdaptiveMaxPool2d(1)

        # We can use Conv2d instead of FCLs to simplify the operation and avoid having to flatten the layers.
        # The operation is essentially the same as in the CBAM paper but applied in a different way.
        self.fc = nn.Sequential(
            nn.Conv2d(in_channels, math.ceil(in_channels / reduction_ratio), 1, bias=False),
            nn.ReLU(),
            nn.Conv2d(math.ceil(in_channels / reduction_ratio), in_channels, 1, bias=False)
        )

    def forward(self, x):
        # As the network is trained, the channels (feature maps) that should be paid more attention to become more pronounced.
        # Example avg_o and max_o ==> input: [batch_size, 6, 512, 512] -> 2 x [batch_size, 6, 1, 1] -> 2 x [batch_size, 2, 1, 1] -> 2 x [batch_size, 6, 1, 1]
        avg_o = self.fc(self.avg_pool(x))
        max_o = self.fc(self.max_pool(x))
        # Here just add the two channel attentions and put it through a sigmoid function.
        # This will give the attention score for each channel.
        out = torch.sigmoid(avg_o + max_o)
        return out


class spatial_attention(nn.Module):
    def __init__(self, kernel_size=7):
        super(spatial_attention, self).__init__()
        self.conv = nn.Conv2d(2, 1, kernel_size, padding=kernel_size//2, bias=False)

    def forward(self, x):

        # Compress across the channel dimension by getting the average and max of all values across that dimension.
        # input: (batch_size, #channels, height, width) -> output: (batch_size, 1, height, width)
        avg_map = torch.mean(x, dim=1, keepdim=True)
        max_map, thr = torch.max(x, dim=1, keepdim=True)

        # Concat the two maps.
        # input: 2 x (batch_size, 1, height, width) -> output: (batch_size, 2, height, width)
        x = torch.cat([avg_map, max_map], dim=1)

        x = self.conv(x)
        out = torch.sigmoid(x)
        return out

class CBAM(nn.Module):
    def __init__(self, in_channels, reduction_ratio=4,sa_kernel_size=7):
        super(CBAM, self).__init__()
        self.channel = channel_attention(in_channels, reduction_ratio)
        self.spatial = spatial_attention(sa_kernel_size)
    def forward(self, x):
        x = x * self.channel(x)
        x = x * self.spatial(x)
        return x

class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.block1 = nn.Sequential(
            nn.Conv2d(1, 6, 3),
            CBAM(6),
            nn.MaxPool2d(2),
        )
        self.block2 = nn.Sequential(
            nn.Conv2d(6, 12, 3),
            CBAM(12),
            nn.MaxPool2d(2),
        )
        self.block3 = nn.Sequential(
            nn.Conv2d(12, 18, 3),
            CBAM(18),
            nn.MaxPool2d(2),
        )
        self.block4 = nn.Sequential(
            nn.Conv2d(18, 24, 3),
            CBAM(24),
            nn.MaxPool2d(2),
        )
        self.block5 = nn.Sequential(
            nn.Conv2d(24, 30, 3),
            CBAM(30),
            nn.MaxPool2d(2),
        )
    def forward(self, x):
        x = self.block1(x)
        x = self.block2(x)
        x = self.block3(x)
        x = self.block4(x)
        x = self.block5(x)
        return x

class DNN(nn.Module):
    def __init__(self):
        super(DNN, self).__init__()
        self.classify = nn.Sequential(
            nn.Linear(5880, 1024),
            nn.ReLU(),
            nn.Linear(1024, 512),
            nn.ReLU(),
            nn.Linear(512, 256),
            nn.ReLU(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 4)
        )
    def forward(self, x):
        return self.classify(x)

class BrainTumorClassifier(nn.Module):
    def __init__(self):
        super(BrainTumorClassifier, self).__init__()
        self.feature_extraction = CNN()
        self.classification = DNN()
    def forward(self, x):
        features = self.feature_extraction(x)
        flattened_features = features.view(features.size(0), -1)
        classification = self.classification(flattened_features)
        return classification


In [31]:
model = BrainTumorClassifier()
model.to(torch.float32)

device = torch.device("cpu")
if torch.cuda.is_available:
    torch.cuda.empty_cache()
    device = torch.device("cuda")

model = model.to(device)

dvc = next(model.parameters()).device
print("Model is on device:", dvc)

total_params = sum(p.numel() for p in model.parameters())

print(f"Total number of parameters: {total_params}")

num_parameters = 6736274
size_of_float32 = 4  # 4 bytes for float32
total_memory_bytes = num_parameters * size_of_float32
total_memory_mb = (total_memory_bytes / (2**20))

print(f"Total estimated memory usage: {total_memory_mb:.2f} MB")

Model is on device: cuda:0
Total number of parameters: 6736274
Total estimated memory usage: 25.70 MB


In [32]:
class ImageDataset(Dataset):
    def __init__(self, IO_pairs):
        self.IO_pairs = IO_pairs

    def __len__(self):
        return len(self.IO_pairs)

    def __getitem__(self, index):
        # Get the image
        image_class_name = self.IO_pairs[index][0]
        image_tensor_list = self.IO_pairs[index][1]
        classification_target = self.IO_pairs[index][2]

        return image_class_name, image_tensor_list, classification_target

In [7]:
num_epochs = 150
pt_path = "./content/train/train_datasets/train_dataset1.pt"
dataset = torch.load(pt_path)
dataloader = DataLoader(dataset, batch_size=5, shuffle=True)
model.train()

class_loss_function = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.003)
#scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=2, eta_min=0.00001)
scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.95)

correction_threshold = 0.5
max_repetitions = 6

for epoch in range(num_epochs):
    for i, j in enumerate(dataloader):
        
        image_class_name = j[0]
        image_tensor = j[1]
        image_class_tensor = j[2]

        image_tensor = image_tensor.to(torch.float32)
        image_class_tensor = image_class_tensor.to(torch.float32)

        image_tensor = image_tensor.to(device)
        image_class_tensor = image_class_tensor.to(device)
        
        optimizer.zero_grad()
        prediction = model(image_tensor)
        classification_loss = class_loss_function(prediction, image_class_tensor)
        classification_loss.backward()
        optimizer.step()

        print(f"Epoch [{epoch+1}/{num_epochs}], Progress: [{i+1}/{len(dataloader)}], Class_Loss: {round(classification_loss.item(), 5)}, Learning Rate: {scheduler.get_last_lr()[0]}")

        if round(classification_loss.item(), 5) > correction_threshold and epoch > 10:
            for m in range(min(math.ceil(epoch / 10), max_repetitions)):
                optimizer.zero_grad()
                prediction = model(image_tensor)
                classification_loss = class_loss_function(prediction, image_class_tensor)
                classification_loss.backward()
                optimizer.step()

                print(f"Epoch [{epoch+1}/{num_epochs}], Progress: [{i+1}/{len(dataloader)}, T{m}], Class_Loss: {round(classification_loss.item(), 5)}, Learning Rate: {scheduler.get_last_lr()[0]}")
    
    if epoch % 10 == 0:
        scheduler.step()

Epoch [1/150], Progress: [1/574], Class_Loss: 0.69743, Learning Rate: 0.003
Epoch [1/150], Progress: [2/574], Class_Loss: 0.69855, Learning Rate: 0.003
Epoch [1/150], Progress: [3/574], Class_Loss: 0.70638, Learning Rate: 0.003
Epoch [1/150], Progress: [4/574], Class_Loss: 0.69974, Learning Rate: 0.003
Epoch [1/150], Progress: [5/574], Class_Loss: 0.69311, Learning Rate: 0.003
Epoch [1/150], Progress: [6/574], Class_Loss: 0.68227, Learning Rate: 0.003
Epoch [1/150], Progress: [7/574], Class_Loss: 0.65484, Learning Rate: 0.003
Epoch [1/150], Progress: [8/574], Class_Loss: 0.63449, Learning Rate: 0.003
Epoch [1/150], Progress: [9/574], Class_Loss: 0.62032, Learning Rate: 0.003
Epoch [1/150], Progress: [10/574], Class_Loss: 0.51267, Learning Rate: 0.003
Epoch [1/150], Progress: [11/574], Class_Loss: 0.85947, Learning Rate: 0.003
Epoch [1/150], Progress: [12/574], Class_Loss: 0.53255, Learning Rate: 0.003
Epoch [1/150], Progress: [13/574], Class_Loss: 0.49546, Learning Rate: 0.003
Epoch [1

KeyboardInterrupt: 

In [34]:
if not os.path.exists("./content/models"):
    os.mkdir("./content/models")
input("Enter the model name: ")
torch.save(model, f"./content/models/{load_strs}.pt")

TypeError: 'Tensor' object is not callable

In [21]:
eval_select = input("Which model do you want to evaluate?")
loaded_model = torch.load(f"./content/models/{eval_select}.pt")
loaded_model.to(torch.float32)
loaded_model.to(device)
test_set = torch.load("./content/test/test_datasets/test_dataset1.pt")
train_set = torch.load("./content/train/train_datasets/train_dataset1.pt")
correct = 0
tested = 0
for i in range(train_set.__len__()):
    current_eval = train_set.__getitem__(i)
    inputTensor = current_eval[1]
    inputTensor = inputTensor.to(torch.float32)
    inputTensor = inputTensor.to(device)
    inputTensor = inputTensor.unsqueeze(0)
    target = current_eval[2]
    predicted_output = loaded_model(inputTensor)
    if (predicted_output.argmax() == target.argmax()):
        correct += 1
    tested += 1
    print(str(correct / tested))


1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
1.0
0.9615384615384616
0.9629629629629629
0.9642857142857143
0.9655172413793104
0.9666666666666667
0.967741935483871
0.96875
0.9696969696969697
0.9705882352941176
0.9714285714285714
0.9722222222222222
0.972972972972973
0.9736842105263158
0.9487179487179487
0.95
0.9512195121951219
0.9523809523809523
0.9534883720930233
0.9545454545454546
0.9555555555555556
0.9565217391304348
0.9574468085106383
0.9583333333333334
0.9591836734693877
0.96
0.9607843137254902
0.9615384615384616
0.9433962264150944
0.9444444444444444
0.9454545454545454
0.9464285714285714
0.9473684210526315
0.9482758620689655
0.9491525423728814
0.95
0.9508196721311475
0.9516129032258065
0.9523809523809523
0.953125
0.9538461538461539
0.9545454545454546
0.9552238805970149
0.9558823529411765
0.9565217391304348
0.9571428571428572
0.9577464788732394
0.9583333333333334
0.958904109589041
0.9594594594594594
0.96
0.9605263157894737
0.961038961