# Install

In [1]:
%pip install qrcode[pil] Pillow tqdm

# Import

In [2]:
import os
import csv
import random
import uuid
import math
import numpy as np
import qrcode
from PIL import Image
import pandas as pd
from tqdm import tqdm

# Create
## A set of QR codes become a control group

In [3]:
# Directories for QR codes
png_dir = "data/png"
os.makedirs(png_dir, exist_ok=True)

# Candidate character set (digits 0-9)
character_set = list("0123456789")

def generate_qr_png(data, file_path, version=1, error_correction=qrcode.constants.ERROR_CORRECT_L, box_size=10, border=4):
    qr = qrcode.QRCode(
        version=version,
        error_correction=error_correction,
        box_size=box_size,
        border=border,
    )
    qr.add_data(data)
    qr.make(fit=True)
    img = qr.make_image(fill_color="black", back_color="white")
    img.save(file_path)

# Generate QR codes with progress
print("Generating QR codes...")
for char in tqdm(character_set, desc="QR Codes"):
    png_path = os.path.join(png_dir, f"{char}.png")
    generate_qr_png(char, png_path)

# Show file sizes
print("\nFile sizes for PNG QR codes:")
for char in character_set:
    png_path = os.path.join(png_dir, f"{char}.png")
    size = os.path.getsize(png_path)
    print(f"{char}.png: {size} bytes")

# Define metadata path
metadata = "data/metadata.csv"

# Create metadata CSV
files = [f for f in os.listdir(png_dir) if f.endswith('.png')]
with open(metadata, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["filename", "label", "filesize (Bytes)"])
    for f in tqdm(files, desc="Creating metadata"):
        label = os.path.splitext(f)[0]
        file_path = os.path.join(png_dir, f)
        filesize = os.path.getsize(file_path)
        writer.writerow([file_path, label, filesize])
print(f"\nMetadata file created at {metadata}")

Generating QR codes...


can't start new thread
  for char in tqdm(character_set, desc="QR Codes"):
QR Codes: 100%|██████████| 10/10 [00:02<00:00,  3.98it/s]



File sizes for PNG QR codes:
0.png: 439 bytes
1.png: 423 bytes
2.png: 441 bytes
3.png: 451 bytes
4.png: 442 bytes
5.png: 445 bytes
6.png: 439 bytes
7.png: 444 bytes
8.png: 432 bytes
9.png: 444 bytes


Creating metadata: 100%|██████████| 10/10 [00:00<00:00, 59.88it/s]



Metadata file created at data/metadata.csv


# Create
## Two distortions per image, becoming a test group.

In [4]:
# Augmentation setup
augmented_dir = "data/augmented"
os.makedirs(augmented_dir, exist_ok=True)

ROTATION_RANGE = (-10, 10)
TRANSLATION_RANGE = (-5, 5)
SCALING_RANGE = (0.9, 1.1)
SHEAR_RANGE = (-10, 10)

def get_affine_matrix(rotation, tx, ty, scaling, shear, center):
    theta = math.radians(rotation)
    shear_rad = math.radians(shear)
    R = np.array([
        [math.cos(theta), -math.sin(theta)],
        [math.sin(theta),  math.cos(theta)]
    ])
    S = np.array([
        [scaling, 0],
        [0, scaling]
    ])
    Sh = np.array([
        [1, math.tan(shear_rad)],
        [0, 1]
    ])
    A = R.dot(S).dot(Sh)
    cx, cy = center
    T = np.array([cx, cy]) - A.dot(np.array([cx, cy])) + np.array([tx, ty])
    a, b = A[0, 0], A[0, 1]
    d, e = A[1, 0], A[1, 1]
    c, f = T[0], T[1]
    return (a, b, c, d, e, f)
    
num_augmentations = 2
aug_log_file = "data/augmented_log.csv"

# Create
## The test group metadata file is created

In [5]:
# Read metadata
df = pd.read_csv(metadata)
orig_image_paths = df['filename'].tolist()
orig_labels = df['label'].tolist()

with open(aug_log_file, 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerow(["augmented_filename", "orig_filename", "orig_label", "rotation", "tx", "ty", "scaling", "shear"])
    
    for orig_path, label in tqdm(zip(orig_image_paths, orig_labels), total=len(orig_image_paths), desc="Augmenting"):
        orig_basename = os.path.basename(orig_path)
        img = Image.open(orig_path).convert("L")
        center = (img.size[0] / 2, img.size[1] / 2)
        
        for i in tqdm(range(num_augmentations), desc=f"{label}.png", leave=False):
            rotation = random.uniform(*ROTATION_RANGE)
            tx = random.uniform(*TRANSLATION_RANGE)
            ty = random.uniform(*TRANSLATION_RANGE)
            scaling = random.uniform(*SCALING_RANGE)
            shear = random.uniform(*SHEAR_RANGE)
            
            matrix = get_affine_matrix(rotation, tx, ty, scaling, shear, center)
            augmented_img = img.transform(img.size, Image.AFFINE, matrix, resample=Image.BICUBIC)
            aug_filename = f"{os.path.splitext(orig_basename)[0]}_aug_{i}.png"
            aug_filepath = os.path.join(augmented_dir, aug_filename)
            augmented_img.save(aug_filepath)
            writer.writerow([aug_filename, orig_basename, label, rotation, tx, ty, scaling, shear])

print("\nAugmented images saved and CSV log written.")

Augmenting:   0%|          | 0/10 [00:00<?, ?it/s]
[Ang:   0%|          | 0/2 [00:00<?, ?it/s]
[Ang:  50%|█████     | 1/2 [00:00<00:00,  1.31it/s]
[Ang: 100%|██████████| 2/2 [00:01<00:00,  2.18it/s]
Augmenting:  10%|█         | 1/10 [00:01<00:10,  1.15s/it]
[Ang:   0%|          | 0/2 [00:00<?, ?it/s]
[Ang:  50%|█████     | 1/2 [00:00<00:00,  4.72it/s]
[Ang: 100%|██████████| 2/2 [00:00<00:00,  4.87it/s]
Augmenting:  20%|██        | 2/10 [00:01<00:06,  1.20it/s]
[Ang:   0%|          | 0/2 [00:00<?, ?it/s]
[Ang:  50%|█████     | 1/2 [00:00<00:00,  5.59it/s]
[Ang: 100%|██████████| 2/2 [00:00<00:00,  4.72it/s]
Augmenting:  30%|███       | 3/10 [00:02<00:05,  1.39it/s]
[Ang:   0%|          | 0/2 [00:00<?, ?it/s]
[Ang:  50%|█████     | 1/2 [00:00<00:00,  4.57it/s]
[Ang: 100%|██████████| 2/2 [00:00<00:00,  4.36it/s]
Augmenting:  40%|████      | 4/10 [00:02<00:04,  1.47it/s]
[Ang:   0%|          | 0/2 [00:00<?, ?it/s]
[Ang:  50%|█████     | 1/2 [00:00<00:00,  4.69it/s]
[Ang: 100%


Augmented images saved and CSV log written.


# Calculate
## Distorted files are counted with their average sizes for reference

In [6]:
# Calculate average augmented image size
print("\nCalculating average file size for Augmented images...")
aug_files = [f for f in os.listdir(augmented_dir) if f.endswith('.png')]
total_size = 0
for f in tqdm(aug_files, desc="File Size Calc"):
    file_path = os.path.join(augmented_dir, f)
    total_size += os.path.getsize(file_path)

if aug_files:
    avg_size = total_size / len(aug_files)
    print(f"Total distortions files: {len(aug_files)}")
    print(f"Average file size: {avg_size:.2f} bytes")
else:
    print("No Augmented PNG files found in the directory.")


Calculating average file size for Augmented images...


File Size Calc: 100%|██████████| 20/20 [00:00<00:00, 60.61it/s]


Total distortions files: 20
Average file size: 8125.15 bytes
