# Imports

In [3]:
%load_ext autoreload
%autoreload 2

import pathlib
import PIL
from torchvision.transforms import ToTensor, ToPILImage

from imageaugment import augment
import PIL.Image
import pathlib
import json
import numpy as np
import skimage
import os

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Create Noisy Templates

In order to train a VAE for denoising problem, the clean templates need to have noise added. For this purpose, the `imageaugment` package is used and the transform parameters are set as below.

## Set Transform Parameters

In [4]:
image_transform = augment.get_random_faxify(
    gamma=(.8, 1.0),
    #angle_final=(0, 3),
    #angle_transient=(0, 3),
    #shift=(.005, .01),
    angle_final=(0, 0),
    angle_transient=(0, 0),
    shift=(0, 0),
    scale=(1.0, 1.0),
    threshold=(.65, .80),
    brightness=(1.0, 1.3),
    ditherprob=0.0,
    flipprob=0.0,
    vlineprob=.5,
    maxvlines=2,
    linewidth=(0.001, 0.002),
    particledensity=(.001, .01),
    particlesize=(.0001, .001)
)

In [7]:
def save_faxified_templates(image_transform, image_folder_path, save_directory_path, n_samples, add_gaussian=False):
    image_path_list = sorted(image_folder_path.rglob("*.png"))
    image_path_list = image_path_list[0:n_samples]
    for image_path in image_path_list:
        image = PIL.Image.open(image_path).convert("L")
        faxified = image_transform(image)
        #os.remove(image_path, *, dir_fd=None)
        if add_gaussian:
            faxified = PIL.Image.fromarray((skimage.util.random_noise(np.array(faxified), mode='gaussian', seed=None, clip=True, var=.05))*255).convert("L")
        faxified.save(save_directory_path/ image_path.name)

## Create Training Data

In [8]:
training_images_folder_path = pathlib.Path("/home/fahad/master_thesis/data/1024x1536/simulated_clean_templates/train/")
training_images_save_path = pathlib.Path("/home/fahad/master_thesis/data/1024x1536/simulated_noisy_templates_without_gaussian_noise/train/")

save_faxified_templates(
    image_transform=image_transform,
    image_folder_path=training_images_folder_path,
    save_directory_path=training_images_save_path,
    n_samples=1000
)

## Create Validation Data

In [9]:
val_images_folder_path = pathlib.Path("/home/fahad/master_thesis/data/1024x1536/simulated_clean_templates/val/")
val_images_save_path = pathlib.Path("/home/fahad/master_thesis/data/1024x1536/simulated_noisy_templates_without_gaussian_noise/val/")

save_faxified_templates(
    image_transform=image_transform,
    image_folder_path=test_images_folder_path,
    save_directory_path=test_images_save_path,
    n_samples=100
)

# Create Crops

For a dataset where only handwritten crops are used for denoising application, we extract the crops from the already available noisy templates and train them against the clean counterpart. Both the noisy and clean template samples are used for extraction of the crops.

In [None]:
def create_crops(image_folder_path, graph_annotation_folder_path, save_directory, clean_crop):
    images = sorted(image_folder_path.rglob("*.png"))
    graph_annotations = sorted(graph_annotation_folder_path.rglob("*.json"))
    index = 0
    for image_path, annotation_path in zip(images, graph_annotations):
        image = PIL.Image.open(image_path)
        with open(annotation_path) as f:
            annotations = json.load(f)
        for annotation in annotations["NODES"]:
            if annotation["category"]=="numeric":
                x_top_left = annotation['origin_x']
                y_top_left = annotation['origin_y']
                x_bottom_right = annotation['origin_x'] + annotation['width']
                y_bottom_right = annotation['origin_y'] + annotation['height']
                crop = image.crop((x_top_left, y_top_left, x_bottom_right, y_bottom_right))
                crop = crop.resize((150,100))
                if clean_crop:
                    crop = crop.convert('1')
                crop.save(save_directory + str(index) + ".png")
                index += 1

## Create Clean Crops

### Training Crops

In [None]:
image_folder_path = pathlib.Path("/home/fahad/training_data_with_bbox/train/documents")
graph_annotation_folder_path = pathlib.Path("/home/fahad/training_data_with_bbox/train/graph_annotations/")
save_directory = "/home/fahad/master_thesis/data/crops/clean_crops/train/"
create_crops(
    image_folder_path=image_folder_path,
    graph_annotation_folder_path=graph_annotation_folder_path,
    save_directory=save_directory,
    clean_crop=True,
)

### Validation Crops

In [None]:
image_folder_path = pathlib.Path("/home/fahad/training_data_with_bbox/val/documents")
graph_annotation_folder_path = pathlib.Path("/home/fahad/training_data_with_bbox/val/graph_annotations/")
save_directory = "/home/fahad/master_thesis/data/crops/clean_crops/val/"
create_crops(
    image_folder_path=image_folder_path,
    graph_annotation_folder_path=graph_annotation_folder_path,
    save_directory=save_directory,
    clean_crop=True,
)

## Create Noisy Crops

### Training Crops

In [None]:
image_folder = pathlib.Path("/home/fahad/master_thesis/data/simulated_noisy_templates/train/")
graph_annotation_folder = pathlib.Path("/home/fahad/training_data_with_bbox/train/graph_annotations/")
save_directory = "/home/fahad/master_thesis/data/crops/noisy_crops/train/"
create_crops(
    image_folder_path=image_folder_path,
    graph_annotation_folder_path=graph_annotation_folder_path,
    save_directory=save_directory,
    clean_crop=False,
)

### Validation Crops

In [None]:
image_folder = pathlib.Path("/home/fahad/master_thesis/data/simulated_noisy_templates/val/")
graph_annotation_folder = pathlib.Path("/home/fahad/training_data_with_bbox/val/graph_annotations/")
save_directory = "/home/fahad/master_thesis/data/crops/noisy_crops/val/"
create_crops(
    image_folder_path=image_folder_path,
    graph_annotation_folder_path=graph_annotation_folder_path,
    save_directory=save_directory,
    clean_crop=False,
)