In [1]:
from PIL import Image
import os
import json

In [2]:
VERTICAL_LETTERS = 32

# Loading Shreds

In [3]:
def load_shreds(directory="./shreds", start_idx=0, end_idx=199):
    images = []
    for idx in range(start_idx, end_idx + 1):
        file_path = os.path.join(directory, f"shred_{idx}.bmp")
        with Image.open(file_path) as img:
            images.append((idx, img.copy()))

    return images

In [4]:
shreds = load_shreds()

## Classifying shreds

In [5]:
def grayscale_distance(pixel1, pixel2):
    gray1 = sum(pixel1[:3]) / 3
    gray2 = sum(pixel2[:3]) / 3
    return abs(gray1 - gray2)

def is_white(pixel, threshold=224):
    return grayscale_distance(pixel, (255, 255, 255)) > threshold


def is_black(pixel, threshold=224):
    return not is_white(pixel, threshold=threshold)

In [6]:
def classify_shred(shred):
    # Get pixel data
    pixels = list(shred.getdata())
    width, height = shred.size

    # Check leftmost column
    for y in range(height):
        pixel = pixels[y * width]
        if is_white(pixel):
            break
    else:
        return 'left'  # All leftmost pixels are white

    # Check rightmost column
    for y in range(height):
        pixel = pixels[y * width + (width - 1)]
        if is_white(pixel):
            break
    else:
        return 'right'  # All rightmost pixels are white

    return 'unknown'  # Shouldn't happen

In [7]:
# assert classify_shred(shreds[1][1]) == "left"

In [8]:
# assert classify_shred(shreds[0][1]) == "right"

In [9]:
left_shreds = [(idx, shred) for idx, shred in shreds if classify_shred(shred) == 'left']
right_shreds = [(idx, shred) for idx, shred in shreds if classify_shred(shred) == 'right']
unknown_shreds = [(idx, shred) for idx, shred in shreds if classify_shred(shred) == 'unknown']

In [10]:
assert len(left_shreds) == 100
assert len(right_shreds) == 100
assert len(unknown_shreds) == 0

## Reconstructing columns 
**column**: a merged pair of shred forming a column of letters

In [11]:
def get_left_pixel(pixels, row, width):
    return pixels[row * width]


def get_right_pixel(pixels, row, width):
    return pixels[row * width + (width - 1)]


def hamming_distance(str1, str2):
    if len(str1) != len(str2):
        raise ValueError("Strings must be of equal length")
    return sum(char1 != char2 for char1, char2 in zip(str1, str2))


def shred_similarity(shred_a, shred_b, threshold=2):
    # Dimensions
    width_a, height = shred_a.size
    width_b, _ = shred_b.size
    # Get pixel data
    pixels_a_shred = list(shred_a.getdata())
    pixels_b_shred = list(shred_b.getdata())

    left = ""
    right = ""
    for y in range(height):
        # get the left pixel from the right shred
        pixel_left = get_right_pixel(pixels_a_shred, y, width=width_a)
        # get the right pixel from the left shred
        pixel_right = get_left_pixel(pixels_b_shred, y, width=width_b)

        right += "0" if is_white(pixel_left) else "1"
        left += "0" if is_white(pixel_right) else "1"

    letter_distances = []
    vertical_pixels = int(height / VERTICAL_LETTERS)
    for letter_idx in range(VERTICAL_LETTERS):
        start = letter_idx * vertical_pixels
        end = (letter_idx + 1) * vertical_pixels
        hdist = hamming_distance(left[start:end], right[start:end])
        letter_distances.append(hdist)

    sim = sum(list(map(lambda d: 1 if d < threshold else 0, letter_distances)))
    return sim

In [12]:
def match_shred_pairs(l_shreds, r_shreds):
    matches = []
    lshreds_copy = l_shreds.copy()
    rshreds_copy = r_shreds.copy()

    for rshred in rshreds_copy:
        similarities = []
        for lshred in lshreds_copy:
            sim = shred_similarity(lshred[1], rshred[1])
            similarities.append((lshred, rshred, sim))

        match = max(similarities, key=lambda x: x[2])
        lshreds_copy.remove(match[0])
        matches.append((match[0],match[1]))

    return matches

In [13]:
def merge_shred_pair(shred_left, shred_right):
    # Ensure the images have the same height
    if shred_left.height != shred_right.height:
        raise ValueError("The images must have the same height to merge them.")

    # Create a new image with a width equal to the sum of the widths and the same height
    merged_width = shred_left.width + shred_right.width
    merged_height = shred_left.height
    merged_image = Image.new("RGB", (merged_width, merged_height))

    # Paste the two images into the new image
    merged_image.paste(shred_left, (0, 0))
    merged_image.paste(shred_right, (shred_left.width, 0))

    return merged_image

In [14]:
def export_shred_pairs(
        pairs,
        shred_pairs_dir,
        pair_indices_json_path,
        to_basename=lambda lidx_ridx: f"merged_{lidx_ridx[0]}_{lidx_ridx[1]}.bmp"
):
    for (lidx, left_shred), (ridx, right_shred) in pairs:
        merged_pair = merge_shred_pair(left_shred, right_shred)
        os.makedirs(shred_pairs_dir, exist_ok=True)
        merged_pair.save(os.path.join(shred_pairs_dir, to_basename((lidx, ridx))), format="BMP")
    pairs_indices = [(lidx, ridx)for (lidx, _), (ridx, _) in pairs]

    with open(pair_indices_json_path, 'w') as jf:
        json.dump(pairs_indices, jf, indent=4)

In [15]:
shred_pairs = match_shred_pairs(left_shreds, right_shreds)

In [None]:
#shred_pairs[0]

In [16]:
export_shred_pairs(shred_pairs, shred_pairs_dir="./out/shred_pairs/", pair_indices_json_path="./out/shred_pairs.json")