In [1]:
!unzip receipt_validation.zip -d data/

Archive:  receipt_validation.zip
   creating: data/receipt_validation/
   creating: data/receipt_validation/train/
   creating: data/receipt_validation/train/non_receipt/
  inflating: data/receipt_validation/train/non_receipt/1.jpg  
  inflating: data/receipt_validation/train/non_receipt/2.jpg  
  inflating: data/receipt_validation/train/non_receipt/22903.jpg  
  inflating: data/receipt_validation/train/non_receipt/22907.jpg  
  inflating: data/receipt_validation/train/non_receipt/22908.jpg  
  inflating: data/receipt_validation/train/non_receipt/22911.jpg  
  inflating: data/receipt_validation/train/non_receipt/22912.jpg  
  inflating: data/receipt_validation/train/non_receipt/22913.jpg  
  inflating: data/receipt_validation/train/non_receipt/22915.jpg  
  inflating: data/receipt_validation/train/non_receipt/22917.jpg  
  inflating: data/receipt_validation/train/non_receipt/22919.jpg  
  inflating: data/receipt_validation/train/non_receipt/22920.jpg  
  inflating: data/receipt_validat

In [2]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn as nn
from torchvision import models
import torch.optim as optim


In [3]:
def get_device():
    return torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [4]:
img_size=224
batch_size=4

In [5]:
train_transform=transforms.Compose([
    transforms.Resize((img_size,img_size)),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

val_transform=transforms.Compose([
    transforms.Resize((img_size,img_size)),
    transforms.ToTensor(),
    transforms.Normalize(
        mean=[0.485, 0.456, 0.406],
        std=[0.229, 0.224, 0.225]
    )
])

In [6]:
train_dataset = datasets.ImageFolder(
    root="data/receipt_validation/train",
    transform=train_transform
)

val_dataset = datasets.ImageFolder(
    root="data/receipt_validation/val",
    transform=val_transform
)



train_loader = DataLoader(
    train_dataset,
    batch_size=batch_size,
    shuffle=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=batch_size,
    shuffle=False
)

In [7]:
images, labels = next(iter(train_loader))

print("Image batch shape:", images.shape)
print("Labels:", labels)
print("Class mapping:", train_dataset.class_to_idx)

Image batch shape: torch.Size([4, 3, 224, 224])
Labels: tensor([1, 0, 1, 1])
Class mapping: {'non_receipt': 0, 'receipt': 1}


In [8]:
def build_model(num_classes=2, freeze_backbone=True):
    model = models.resnet18(pretrained=True)

    if freeze_backbone:
        for param in model.parameters():
            param.requires_grad = False

    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, num_classes)

    return model


In [9]:
def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for images, labels in loader:
        images = images.to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, preds = torch.max(outputs, 1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

    avg_loss = running_loss / len(loader)
    accuracy = correct / total

    return avg_loss, accuracy


In [10]:
def validate_one_epoch(model, loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    avg_loss = running_loss / len(loader)
    accuracy = correct / total

    return avg_loss, accuracy


In [11]:
def train_model(model, train_loader, val_loader, criterion, optimizer, device,epochs=5):

    for epoch in range(epochs):
        print(f"\nEpoch {epoch+1}/{epochs}")
        print("-" * 30)

        train_loss, train_acc = train_one_epoch(
            model, train_loader, criterion, optimizer, device
        )

        val_loss, val_acc = validate_one_epoch(
            model, val_loader, criterion, device
        )

        print(f"Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}")
        print(f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")



In [12]:
device = get_device()

model = build_model(num_classes=2, freeze_backbone=True)
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

train_model(
    model,
    train_loader,
    val_loader,
    criterion,
    optimizer,
    device,
    epochs=5
)




Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 181MB/s]



Epoch 1/5
------------------------------
Train Loss: 0.6189, Train Acc: 0.6400
Val Loss: 0.4133, Val Acc: 0.8667

Epoch 2/5
------------------------------
Train Loss: 0.4715, Train Acc: 0.7550
Val Loss: 0.2990, Val Acc: 0.9667

Epoch 3/5
------------------------------
Train Loss: 0.4060, Train Acc: 0.8500
Val Loss: 0.2991, Val Acc: 0.8833

Epoch 4/5
------------------------------
Train Loss: 0.3874, Train Acc: 0.8400
Val Loss: 0.2209, Val Acc: 0.9667

Epoch 5/5
------------------------------
Train Loss: 0.3195, Train Acc: 0.8700
Val Loss: 0.1737, Val Acc: 0.9833
