In [4]:
!pip install torch torchvision -U




[notice] A new release of pip is available: 24.0 -> 24.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from PIL import Image
import os

In [6]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cpu


In [38]:
transform = transforms.Compose([
    transforms.Grayscale(),
    transforms.Resize((64, 128)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

dataset_path = 'test'  # Replace with your dataset path
dataset = datasets.ImageFolder(root=dataset_path, transform=transform)

# Split the dataset into training and test sets
train_size = int(0.8 * len(dataset))
test_size = len(dataset) - train_size
train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [29]:
# Check if the DataLoader is returning batches
for images, labels in train_loader:
    print(images.shape, labels.shape)
    break  # Just check the first batch


torch.Size([32, 1, 64, 128]) torch.Size([32])


In [39]:
class ALPRModel(nn.Module):
    def __init__(self, num_chars=7):
        super(ALPRModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 32 * 16, 512)
        self.fc2 = nn.Linear(512, num_chars * 36)  # 7 characters, each with 36 possible classes

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 64 * 32 * 16)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        x = x.view(-1, 7, 36)  # Reshape output to (batch_size, num_chars, num_classes)
        return x


In [40]:
model = ALPRModel(num_chars=7).to(device)
print(model)

ALPRModel(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=32768, out_features=512, bias=True)
  (fc2): Linear(in_features=512, out_features=252, bias=True)
)


In [43]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train(model, train_loader, criterion, optimizer, num_epochs=10):
    for epoch in range(num_epochs):
        print(f"Starting Epoch {epoch+1}/{num_epochs}")
        model.train()
        running_loss = 0.0
        
        for i, (images, labels) in enumerate(train_loader):
            # Move images and labels to the GPU if available
            images, labels = images.to(device), labels.to(device)
            
            # Reset the gradients
            optimizer.zero_grad()
            
            # Forward pass
            outputs = model(images)
            
            # Compute loss for each character in the sequence
            loss = 0
            for j in range(7):
                loss += criterion(outputs[:, j, :], labels[:, j])
            
            # Backward pass and optimization
            loss.backward()
            optimizer.step()
            
            # Accumulate running loss
            running_loss += loss.item()
        
        # Calculate average loss for the epoch
        avg_loss = running_loss / len(train_loader)
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {avg_loss:.4f}')

train(model, train_loader, criterion, optimizer, num_epochs=10)


Starting Epoch 1/10


IndexError: too many indices for tensor of dimension 1

In [33]:
def evaluate(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    print(f'Accuracy: {100 * correct / total:.2f}%')

# Evaluate the model
evaluate(model, test_loader)


RuntimeError: Input type (torch.FloatTensor) and weight type (torch.cuda.FloatTensor) should be the same or input should be a MKLDNN tensor and weight is a dense tensor

In [34]:
def predict(model, image):
    model.eval()
    with torch.no_grad():
        image = transform(image).unsqueeze(0).to(device)
        outputs = model(image)
        
        predicted_chars = []
        for i in range(7):
            _, predicted = torch.max(outputs[0, i, :], 0)
            predicted_chars.append(predicted.item())
        
        # Convert the predicted indices to corresponding characters (digits or letters)
        alphabet = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'
        predicted_label = ''.join([alphabet[i] for i in predicted_chars])
        return predicted_label

# Predict the label from a new image
image_path = 'A4721UT.png'
image = Image.open(image_path)
predicted_label = predict(model, image)
print(f'Predicted Label: {predicted_label}')


Predicted Label: EFDM1HF
