In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import cv2
import numpy as np
import matplotlib.pyplot as plt


def generate_data(num_samples=200):
    images = []
    targets = []
    for _ in range(num_samples):
        img = np.zeros((64, 64, 1), dtype=np.float32)
        
        x = np.random.randint(0, 40)
        y = np.random.randint(0, 40)
        w = np.random.randint(10, 20)
        h = np.random.randint(10, 20)
        
        img[y:y+h, x:x+w] = 1.0
        
        images.append(img.transpose(2, 0, 1))
        targets.append([x/64.0, y/64.0, w/64.0, h/64.0]) 
        
    return torch.tensor(np.array(images)), torch.tensor(np.array(targets), dtype=torch.float32)


class SimpleDetector(nn.Module):
    def __init__(self):
        super(SimpleDetector, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(1, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
        )
        self.regressor = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 8 * 8, 128),
            nn.ReLU(),
            nn.Linear(128, 4),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.features(x)
        x = self.regressor(x)
        return x


print("Generating Data...")
X_train, y_train = generate_data()
model = SimpleDetector()
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()

print("Starting Training...")
loss_history = []

for epoch in range(100):
    optimizer.zero_grad()
    outputs = model(X_train)
    loss = criterion(outputs, y_train)
    loss.backward()
    optimizer.step()
    
    loss_history.append(loss.item())
    if epoch % 10 == 0:
        print(f"Epoch {epoch}: Loss = {loss.item():.4f}")


print("\nTesting Model...")
test_img, _ = generate_data(1)
with torch.no_grad():
    pred = model(test_img)[0] * 64.0

img_vis = test_img[0].numpy().transpose(1, 2, 0)
img_vis = np.dstack((img_vis, img_vis, img_vis))

x, y, w, h = pred.int().tolist()
cv2.rectangle(img_vis, (x, y), (x+w, y+h), (1, 0, 0), 1)

plt.figure(figsize=(10,4))
plt.subplot(1, 2, 1)
plt.plot(loss_history)
plt.title("Training Loss")

plt.subplot(1, 2, 2)
plt.imshow(img_vis)
plt.title(f"Prediction: {x},{y},{w},{h}")
plt.show()