In [1]:
import time
import barcode
from barcode.writer import ImageWriter
import cv2
import numpy as np
import random
import os
import string

output_folder = 'dataset/train'
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

possible_characters = string.ascii_letters + string.digits

# Slant Effect (\\\ or ///)
def add_slant_effect(img, intensity = 0.3):
    rows, cols = img.shape
    slant = intensity if random.choice([True, False]) else -intensity
    M = np.float32([[1, slant, 0], [0, 1, 0]])
    new_cols = int(cols + abs(slant * rows))
    if slant < 0:
        M[0, 2] += abs(slant * rows)
    return cv2.warpAffine(img, M, (new_cols, rows), borderValue = 255)

# Bottle curvature
def add_cylinder_distortion(img, k_factor=0.00002):
    h, w = img.shape
    map_x = np.zeros((h, w), np.float32)
    map_y = np.zeros((h, w), np.float32)
    center_x, center_y = w // 2, h // 2
    for y in range(h):
        for x in range(w):
            dx, dy = x - center_x, y - center_y
            r2 = dx*dx + dy*dy 
            factor = 1 + k_factor * r2
            map_x[y, x] = center_x + dx * factor
            map_y[y, x] = center_y + dy * factor
    return cv2.remap(img, map_x, map_y, cv2.INTER_LINEAR, borderValue=255)

# General Generator
def generate_synthetic_data(filename, value):
    barcode_class = barcode.get_barcode_class('code128')
    my_barcode = barcode_class(value, writer=ImageWriter())
    temp_name = f"temp_{filename}"
    
    options = {
        'write_text': False, 
        'quiet_zone': 2.0, 
        'dpi': 300, 
        'module_height': 15.0 
    }
    my_barcode.save(temp_name, options=options)
    
    img = cv2.imread(temp_name + ".png", cv2.IMREAD_GRAYSCALE)
    if img is None: return

    # Inclination
    if random.random() > 0.8:
        img = add_slant_effect(img, intensity=random.uniform(0.05, 0.5))

    # Rotation
    angle = random.uniform(-4, 4)
    if random.random() > 0.5:
        angle += 180

    # Bottle
    if random.random() > 0.90:
        img = add_cylinder_distortion(img, k_factor=random.uniform(0.00001, 0.00010))

    h, w = img.shape
    M = cv2.getRotationMatrix2D((w//2, h//2), angle, 1)
    
    img = cv2.warpAffine(img, M, (w, h), borderMode=cv2.BORDER_CONSTANT, borderValue=255)
    
    cv2.imwrite(f"{output_folder}/{filename}.png", img)
    if os.path.exists(temp_name + ".png"): os.remove(temp_name + ".png")
 
# Dataset
labels_list = []
num_images = 10 #10000

for i in range(num_images):
    long = random.randint(6, 12)
    val_str = ''.join(random.choice(possible_characters) for _ in range(long))
    fname = f"barcode_{i}"
    
    generate_synthetic_data(fname, val_str)
    labels_list.append(f"{fname}.png,{val_str}")

csv_path = "dataset/labels.csv"
with open(csv_path, "w") as f:
    f.write("filename,value\n")
    for line in labels_list:
        f.write(line + "\n")

In [2]:
import torch
import torch.nn as nn
import string

alphabet = string.ascii_letters + string.digits
characters = "-" + alphabet 

char_to_int = {char: i for i, char in enumerate(characters)}
int_to_char = {i: char for i, char in enumerate(characters)}
num_classes = len(characters)

In [3]:
class BarcodeModel(nn.Module):
    def __init__(self, num_classes):
        super(BarcodeModel, self).__init__()
        
        # CNN
        self.cnn = nn.Sequential(
            nn.Conv2d(1, 64, kernel_size = 3, padding = 1), nn.BatchNorm2d(64), nn.ReLU(),
            nn.MaxPool2d(2, 2), 
            nn.Conv2d(64, 128, kernel_size = 3, padding = 1), nn.BatchNorm2d(128), nn.ReLU(),
            nn.MaxPool2d((2, 1), (2, 1)), 
            nn.Conv2d(128, 256, kernel_size = 3, padding = 1), nn.BatchNorm2d(256), nn.ReLU(),
            nn.Conv2d(256, 256, kernel_size = 3, padding = 1), nn.BatchNorm2d(256), nn.ReLU(),
            nn.MaxPool2d((2, 1), (2, 1)), 
        )
        
        # RNN (LSTM - Long Short-Term Memory) 
        self.rnn = nn.LSTM(input_size = 2048, hidden_size = 256, bidirectional = True, batch_first = False, num_layers = 2)
        
        # Result
        self.fc = nn.Linear(256 * 2, num_classes)

    def forward(self, x):
        # x = [number of photos, 1, height, width]
        features = self.cnn(x) 
        
        # The RNN needs the width to be the "time" of the sequence
        b, c, h, w = features.size()
        features = features.view(b, c * h, w) 
        features = features.permute(2, 0, 1) 
        
        output, _ = self.rnn(features)
        logits = self.fc(output)
        return logits

model = BarcodeModel(num_classes)
print(model)

In [4]:
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as T

class BarcodeDataset(Dataset):
    def __init__(self, csv_file, img_dir, char_to_int):
        self.df = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.char_to_int = char_to_int
        self.transform = T.Compose([
            T.Grayscale(),
            T.Resize((32, 320)),
            T.ToTensor(),
            T.Normalize((0.5,), (0.5,)) 
        ])

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = f"{self.img_dir}/{self.df.iloc[idx, 0]}"
        image = Image.open(img_name)
        image = self.transform(image)
        
        label_str = str(self.df.iloc[idx, 1])
        label = [self.char_to_int[c] for c in label_str]
        
        return image, torch.IntTensor(label), len(label)

In [5]:
def collate_fn(batch):
    imgs, labels, label_lengths = zip(*batch)
    imgs = torch.stack(imgs, 0)
    labels = torch.cat(labels, 0) 
    label_lengths = torch.IntTensor(label_lengths)
    return imgs, labels, label_lengths

In [6]:
import torch
import gc
gc.collect()
torch.cuda.empty_cache()

In [None]:
from torch import optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Entrenant amb: {device}")

model = BarcodeModel(num_classes).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.001, amsgrad=True)
criterion = nn.CTCLoss(blank=0, reduction='mean').to(device)

dataset = BarcodeDataset("dataset/labels.csv", "dataset/train", char_to_int)
train_loader = DataLoader(dataset, batch_size=128, shuffle=True, collate_fn=collate_fn)

print(f"Caràcters: {characters}")
print(f"Índex del blanc (blank): {char_to_int[characters[0]]}")

for epoch in range(500):
    model.train()
    total_loss = 0
    for i, (imgs, labels, label_lengths) in enumerate(train_loader):
        imgs = imgs.to(device)
        labels = labels.to(device)
        
        optimizer.zero_grad()
        logits = model(imgs)
        logits_log_softmax = logits.log_softmax(2)
        
        input_lengths = torch.full(size=(imgs.size(0),), 
                                   fill_value=logits.size(0), 
                                   dtype=torch.long).to(device)
        
        loss = criterion(logits_log_softmax, labels, input_lengths, label_lengths)
        
        if torch.isnan(loss) or torch.isinf(loss):
            print("Loss invàlida!")
            continue

        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5)
        
        optimizer.step()
        total_loss += loss.item()
        
        if i % 10 == 0:
            print(f"Batch {i} | Loss: {loss.item():.4f}")

    print(f"--- Epoch {epoch+1}/50 Finalizada | Loss Mitjana: {total_loss/len(train_loader):.4f} ---")
    torch.save(model.state_dict(), 'model_barcodes_ultim.pth')
    print("S'ha guardat el model de l'Epoch", epoch+1)
    model.eval()