In [1]:
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets

# Define the paths to the input images folder, output images folder, model configuration file, and model weights file
input_folder = "C:/Users/arock/Image Data/SSD/Train"
output_folder = "C:/Users/arock/Image Data/SSD/Output"
config_file = "C:/Users/arock/Image Data/ssd_mobilenet_v3_large_coco_2020_01_14.pbtxt"
weights_file = "C:/Users/arock/Image Data/frozen_inference_graph.pb"

In [3]:
# Define the transforms to be applied to the images
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Load the dataset
train_dataset = datasets.ImageFolder("C:/Users/arock/Image Data/SSD/Train", transform=transform)
test_dataset = datasets.ImageFolder("C:/Users/arock/Image Data/SSD/Test", transform=transform)

In [4]:
# Define the batch size
batch_size = 32

# Create the data loaders
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Load the pre-trained ResNet50 model
resnet50 = models.resnet50(pretrained=True)

Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to C:\Users\arock/.cache\torch\hub\checkpoints\resnet50-0676ba61.pth


  0%|          | 0.00/97.8M [00:00<?, ?B/s]

In [5]:
# Freeze all the layers except the last few
for param in resnet50.parameters():
    param.requires_grad = False

# Add a Global Average Pooling (GAP) layer
resnet50.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))

# Add a fully connected layer
num_classes = len(train_dataset.classes)
resnet50.fc = nn.Sequential(
    nn.Linear(2048, num_classes),
    nn.Softmax(dim=1)
)

In [6]:
class Attention(nn.Module):
    def __init__(self, in_features, hidden_dim=256):
        super(Attention, self).__init__()
        self.fc1 = nn.Linear(in_features, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = nn.ReLU()(x)
        x = self.fc2(x)
        alpha = self.sigmoid(x)
        attended_x = torch.sum(alpha * x, dim=2)
        return attended_x


# Add the attention mechanism
num_features = resnet50.fc[0].in_features
resnet50.fc[0] = nn.Linear(num_features*2, num_classes)

attention = Attention(in_features=num_features)
resnet50.attention = attention

In [7]:
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(resnet50.parameters(), lr=0.001)

In [10]:
# Train the model
num_epochs = 10
for epoch in range(num_epochs):
    for images, labels in train_loader:
        optimizer.zero_grad()
        features = resnet50(images)
        attended_features = resnet50.attention(features)
        
        # add a linear layer to reduce the dimensionality of the features tensor
        features_fc = resnet50.fc_features(features)
        
        # concatenate the attended features and the reduced features tensor
        concatenated_features = torch.cat((features_fc, attended_features), dim=1)
        
        output = resnet50.fc(concatenated_features)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()

    # Evaluate the model
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            features = resnet50(images)
            attended_features = resnet50.attention(features)
            
            # add a linear layer to reduce the dimensionality of the features tensor
            features_fc = resnet50.fc_features(features)
            
            # concatenate the attended features and the reduced features tensor
            concatenated_features = torch.cat((features_fc, attended_features), dim=1)
            
            output = resnet50.fc(concatenated_features)
            _, predicted = torch.max(output.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Epoch {epoch + 1}/{num_epochs}, Test Accuracy: {accuracy:.2f}%')


RuntimeError: mat1 and mat2 shapes cannot be multiplied (32x2048 and 4096x2)