##### Imports

In [None]:
import random
import cv2
from matplotlib import pyplot as plt
import albumentations as A
import os
from os import listdir
import imghdr
from PIL import Image
from PIL.ExifTags import TAGS
import numpy as np
import pandas as pd
from tqdm import tqdm
import uuid
from copy import deepcopy

##### Util methods

In [None]:
# Extract metadata from image
def get_image_info(image):
    return {
    "Original_Image": image.filename,
    "Height": image.height,
    "Width": image.width,
    "Format": image.format,
    "Size": image.size,
    "Model": getattr(image, "model_name", 'none'),
    "Sampler": getattr(image, "sampler", ''),
    "Prompt": getattr(image, "prompt", ''),
    # TODO: for now use this field instead of: Model, Sampler & Prompt
    "Parameters": image.info.get('parameters','')
}

# Get a list of image files in a given folder
def get_image_files(folder):
    return [img for img in os.listdir(folder) if imghdr.what(os.path.join(folder, img))]

# Removes exif data from an image
def remove_exif(image):
    data = list(image.getdata())
    image_without_exif = Image.new(image.mode, image.size)
    image_without_exif.putdata(data)    
    return image_without_exif

# Converts an image to JPEG
def to_jpeg(image):
    image = image.convert('RGB')  # Convert to RGB color space
    if image.format != 'JPEG':
        image = image.save("temp.jpg")
        image = Image.open("temp.jpg")
    return image

def get_clean_image(source_folder, file):
    image_path = os.path.join(source_folder, file)
    # Read the image data using PIL
    image = Image.open(image_path)
    # Get the original EXIF data
    original_exif_data = get_image_info(image)      
    # Apply constant transformations
    processed_image = remove_exif(image)
    # Convert image to np array for albumentations transformations
    processed_image = np.array(processed_image.convert('RGB'))
    
    return processed_image, original_exif_data

# Persist a given image with unique ID
def save_new_image(processed_image, target_folder, file):
    # create new guid for the processed image
    new_file = uuid.uuid4().hex + '.' + file.split('.')[-1]
    # Save the processed image
    target_path = os.path.join(target_folder, new_file)    
    # Convert array back to image
    processed_image = Image.fromarray(processed_image)
    processed_image.save(target_path)
    
    return target_path

##### Variables

In [None]:
image_transformers = {
    # deprecated # Decrease Jpeg compression of an image.
    # 'jpeg_compression': lambda img: A.JpegCompression(always_apply=False, p=1.0, quality_lower=80, quality_upper=100)(image=img)['image'],
    # Blur the input image using a random-sized kernel.
    'blur': lambda img: A.Blur(always_apply=False, p=1.0, blur_limit=(3, 7))(image=img)['image'],
    # Crop a random part of the input and rescale it to some size.
    # TODO: don't uncomment for now, needs size adjusting per processed image
    # 'random_size_crop': lambda img: A.RandomSizedCrop(always_apply=False, p=1.0, min_max_height=(405, 810), height=810, width=1080, w2h_ratio=1.0, interpolation=0)(image=img)['image'],
    # Decreases image quality by downscaling and upscaling back.
    'downscale': lambda img: A.Downscale(always_apply=False, p=1.0, scale_min=0.25, scale_max=0.25, interpolation=0)(image=img)['image'],
    # Flip the input either horizontally, vertically or both horizontally and vertically.
    'flip': lambda img: A.Flip(always_apply=False, p=1.0)(image=img)['image'],
    # Apply gaussian noise to the input image.
    'guass_noise': lambda img: A.GaussNoise(always_apply=False, p=1.0, var_limit=(10.0, 50.0))(image=img)['image'],
    # Apply camera sensor noise.
    'iso_noise': lambda img: A.ISONoise(always_apply=False, p=1.0, intensity=(0.1, 0.5), color_shift=(0.01, 0.05))(image=img)['image'],
    # Decrease Jpeg, WebP compression of an image.
    'image_compression': lambda img: A.ImageCompression(always_apply=False, p=1.0, quality_lower=80, quality_upper=100, compression_type=0)(image=img)['image'],
    # Randomly rotate the input by 90 degrees zero or more times
    'random_rotate_90': lambda img: A.RandomRotate90(always_apply=False, p=1.0)(image=img)['image'],
    # Convert the input RGB image to grayscale. If the mean pixel value for the resulting image is greater than 127, invert the resulting grayscale image.(image=img)['image'],
    'to_gray': lambda img: A.ToGray(always_apply=False, p=1.0)(image=img)['image'],
   
    # # Elastic deformation of images as described in [Simard2003]_ (with modifications). Based on https://gist.github.com/erniejunior/601cdf56d2b424757de5
    'elastic_transform': lambda img: A.ElasticTransform(always_apply=False, p=1.0, alpha=1.0, sigma=50.0, alpha_affine=50.0, interpolation=0, border_mode=0, value=(0, 0, 0), mask_value=None, approximate=False)(image=img)['image'],
    # # Apply Contrast Limited Adaptive Histogram Equalization to the input image.
    'clahe': lambda img: A.CLAHE(always_apply=False, p=1.0, clip_limit=(1, 4), tile_grid_size=(8, 8))(image=img)['image'],
    # # Equalize the image histogram.
    'equalize': lambda img: A.Equalize(always_apply=False, p=1.0, mode='cv', by_channels=True)(image=img)['image'],
    # # 
    'grid_distortion': lambda img: A.GridDistortion(always_apply=False, p=1.0, num_steps=5, distort_limit=(-0.3, 0.3), interpolation=0, border_mode=0, value=(0, 0, 0), mask_value=None)(image=img)['image'],
    # # Multiply image to random number or array of numbers.
    'multiplacative_nose': lambda img: A.MultiplicativeNoise(always_apply=False, p=1.0, multiplier=(0.9, 1.1), per_channel=True, elementwise=True)(image=img)['image'],
    # # Reduce the number of bits for each color channel.
    'optical_distortion': lambda img: A.OpticalDistortion(always_apply=False, p=1.0, distort_limit=(-0.3, 0.3), shift_limit=(-0.05, 0.05), interpolation=0, border_mode=0, value=(0, 0, 0), mask_value=None)(image=img)['image'],
    # # Randomly change brightness and contrast of the input image.
    'random_brightness': lambda img: A.RandomBrightnessContrast(always_apply=False, p=1.0, brightness_limit=(-0.2, 0.2), contrast_limit=(-0.2, 0.2), brightness_by_max=True)(image=img)['image'],
    # # Simulates fog for the image.
    'random_fog': lambda img: A.RandomFog(always_apply=False, p=1.0, fog_coef_lower=0.1, fog_coef_upper=0.2, alpha_coef=0.08)(image=img)['image'],
    # # Randomly change contrast of an image by raising its values to the power:math:`\gamma
    'random_gamma': lambda img: A.RandomGamma(always_apply=False, p=1.0, gamma_limit=(80, 120), eps=1e-07)(image=img)['image'],
}

all_transformations = list(image_transformers.keys())

##### Process images

In [None]:
# Appiy a sequence of transformations to a given image
def apply_transformations(image, transformations):
    apply_transformations = []
    for transformation in transformations:
        transform_function = image_transformers.get(transformation)
        if transform_function:
            image = transform_function(image)
            apply_transformations.append(transformation)
    return image, apply_transformations

def apply_random_transformations(processed_image):
    # Randomly select transformations to be applied to image    
    transformation_num = random.randint(0,6)
    transformations_to_apply = random.sample(all_transformations, transformation_num)
    
    #Apply transformations to image
    processed_image, applied_transformations = apply_transformations(processed_image, transformations_to_apply)
    applied_transformations.insert(0,'remove_exif')
    applied_transformations = ', '.join(applied_transformations)
    
    return processed_image, applied_transformations
    

# Apply transformations to a given image, and persist the processed image
def transform_image(image, target_folder, file, original_exif):
    # Apply randomely selected transformations
    processed_image, applied_transformations = apply_random_transformations(image)
    # Save the processed image
    target_path = save_new_image(processed_image, target_folder, file)
    
    new_exif = deepcopy(original_exif)
    new_exif['Transformations'] = applied_transformations
    new_exif['Processed_image'] = target_path

    return new_exif

# Process the given list of image files, as follows:
# Per each image, generate 10 random transformation permutations
# Persist every processed image
# Log every transformation performed
def process_images(image_files, source_folder, target_folder):
    process_data = []
    print(f'Number of available transformations, aside from removing exif data, is: {len(all_transformations)}')

    # Loop through the images in the source folder
    for file in tqdm(image_files, desc="Processing Images"):

        # Read the image and clean exif data
        processed_image, original_exif_data = get_clean_image(source_folder, file)
        
        # Create 10 different transofrmation permutations from the image
        for i in range(10):
            new_exif = transform_image(processed_image, target_folder, file, original_exif_data)
            process_data.append(new_exif)
    
    # Log the process and changes done
    process_data = pd.DataFrame(process_data)
    process_data.to_csv(target_folder + '/process_data.csv', index=False)
    
    return pd.DataFrame(process_data)

# Create a target folder for the processed images, if it does not exist
def create_target_folder(source_folder):
    target_folder = source_folder + '_processed'
    # Ensure the output folder exists or create it
    if not os.path.exists(target_folder):
        os.makedirs(target_folder)
        
    return target_folder

##### Run process

In [None]:
source_folder = 'mid'
target_folder = create_target_folder(source_folder)

image_files = get_image_files(source_folder)
process_data = process_images(image_files, source_folder, target_folder)