In [9]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from PIL import Image

In [17]:
class TinierYOLO(nn.Module):
    def __init__(self):
        super(TinierYOLO, self).__init__()
        self.features = nn.Sequential(
            nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(16, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=1),  # Note: stride 1 to match the output size
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU(),
            nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1),
            nn.ReLU()
        )
        self.classifier = nn.Sequential(
            nn.Linear(512 * 5 * 7, 128),
            nn.ReLU(),
            nn.Linear(128, 26)  # Number of classes
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.classifier(x)
        return x

# Initialize the model with the corrected input size for the fully connected layer
model = TinierYOLO()

In [18]:
# Define transformations for your dataset
transform = transforms.Compose([
    transforms.Resize((200, 266)),
    transforms.ToTensor(),
])

# Load your custom dataset
train_dataset = datasets.ImageFolder(root='train', transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

test_dataset = datasets.ImageFolder(root='test', transform=transform)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [20]:
# Initialize the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

# Training loop
for epoch in range(10):  # Number of epochs
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    print(f'Epoch {epoch+1}, Loss: {running_loss / len(train_loader)}')

# Save the trained model
torch.save(model.state_dict(), 'new_tinier_yolo_weights.pth')

Epoch 1, Loss: 3.261961979368713
Epoch 2, Loss: 3.261237118141783
Epoch 3, Loss: 3.2615067168978826
Epoch 4, Loss: 3.261639764703856


KeyboardInterrupt: 

In [16]:
import torch

# Initialize the model
model = TinierYOLO()

# Pass a dummy tensor through the convolutional layers to determine the output shape
dummy_input = torch.randn(1, 3, 200, 266)  # Batch size 1, 3 channels, height 200, width 266
output = model.features(dummy_input)
print(output.shape)


torch.Size([1, 512, 5, 7])
