In [None]:
import torch
import torch.nn as nn

class TinyModel(nn.Module):

    def __init__(self):
        super(TinyModel, self).__init__()

        self.image_net = nn.Sequential(
            nn.Conv2d(3, 5, (5, 5), stride=(3, 3)),
            nn.ReLU(),
            nn.MaxPool2d((5, 5), stride=(5, 5)),
            nn.Conv2d(5, 10, (3, 3)),
            nn.ReLU(),
            nn.MaxPool2d((5, 5), stride=(5, 5)),
            nn.Flatten(),
            nn.Linear(400, 32)
        )

        self.embeddings_net = nn.Sequential(
            nn.Linear(384, 100),
            nn.ReLU(),
            nn.Linear(100, 32),
            nn.ReLU()
        )

        self.final_net = nn.Sequential(
            nn.Linear(64, 10),
            nn.ReLU(),
            nn.Linear(10, 2),
            nn.Sigmoid()
        )

    def forward(self, images, embeddings):
        image_features = self.image_net(images)
        embeddings_features = self.embeddings_net(embeddings)
        result = self.final_net(torch.concat((image_features, embeddings_features), axis=1))
        return result



In [None]:
import torch.optim as optim

DEVICE = "cpu"
images = torch.rand((1, 3, 640, 480), dtype=torch.float32, device=DEVICE)
embeddings = torch.rand((2, 384), dtype=torch.float32, device=DEVICE)
labels = torch.tensor([[0, 1], [1, 0]], dtype=torch.float32, device=DEVICE)

model = TinyModel()
model(images, embeddings[0].unsqueeze(0))

In [None]:
criterion = nn.BCELoss()
optimizer = optim.Adam(model.parameters())

for i in range(1000):
    for embedding, label in zip(embeddings, labels):
        pred = model(images, embedding.unsqueeze(0))
        loss = criterion(pred, label.unsqueeze(0))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        print(f"Epoch {i}: BCE: {loss}")

In [None]:
model(images, embeddings[0].unsqueeze(0)).argmax(), model(images, embeddings[1].unsqueeze(0)).argmax()  # Overfit :)