In [1]:
import csv
import os
import random

from tqdm import tqdm
from PIL import Image
from typing import Tuple

In [2]:
def is_square(image: Image) -> bool:
    width, height = image.size

    return height == width


def is_dataset_square(dataset_path: str) -> bool:
    files = os.listdir(dataset_path)
    incidences = list()

    for file in files:
        if file == ".DS_Store":
            continue
        with Image.open(os.path.join(dataset_path, file)) as img:
            if not is_square(img):
                incidences.append((file, img.size))
    
    if incidences:
        print(incidences)
        return False
    
    return True

In [3]:
INPUT_DATASET_PATH = "/Users/dabetm/Pictures/Astrofotografía/Luna/dataset-lunas-500"

print("Checking dims...")
assert is_dataset_square(INPUT_DATASET_PATH)

Checking dims...


In [4]:
METADATA_FILE = "lunar.csv"


def get_average_per_channel(img: Image, n: int = None) -> Tuple[float, float, float]:
    r, g, b = 0, 0, 0

    if n:
        sample_size = min(n, img.width * img.height)
        margin = 100 if img.width > 1000 else 50
        
        for _ in range(sample_size):
            x = random.randint(0, img.width-margin)
            y = random.randint(0, img.height-margin)
            pixel = img.getpixel((x, y))

            r += pixel[0]
            g += pixel[1]
            b += pixel[2]
        
        return (r / n, g / n, b / n)

    area = img.width * img.height

    for y in range(img.height):
        for x in range(img.width):
            pixel = img.getpixel((x, y))

            r += pixel[0]
            g += pixel[1]
            b += pixel[2]
    
    if not area:
        area = 1

    return (r / area, g / area, b / area)


def generate_metadata():    
    headers = ["file", "r_avg", "g_avg", "b_avg"]
    metadata = []
    files = os.listdir(INPUT_DATASET_PATH)

    for file in tqdm(files):
        if file == ".DS_Store":
            continue
        with Image.open(os.path.join(INPUT_DATASET_PATH, file)) as img:
            r_avg, g_avg, b_avg = get_average_per_channel(img, n=500)
        filename = file.split(".")[0] + ".png"
        metadata.append(
            {
                "file": filename,
                "r_avg": r_avg,
                "g_avg": g_avg,
                "b_avg": b_avg
            }
        )
    
    with open(METADATA_FILE, "w") as output_file:
        writer = csv.DictWriter(output_file, fieldnames=headers)
        writer.writeheader()
        writer.writerows(metadata)

In [None]:
print("Generating metadata...")
generate_metadata()

Generating metadata...


In [6]:
import pandas as pd


df = pd.read_csv(METADATA_FILE)
df

Unnamed: 0,file,r_avg,g_avg,b_avg
0,0071.png,151.098,118.026,98.258
1,0065.png,109.566,98.164,85.068
2,0059.png,67.752,87.416,110.338
3,0515.png,156.416,99.500,62.310
4,0273.png,94.652,88.444,90.542
...,...,...,...,...
524,0083.png,107.618,101.336,99.714
525,0097.png,17.762,92.006,163.654
526,0040.png,15.350,14.060,13.498
527,0054.png,74.660,74.350,82.124


In [9]:
df.sort_values(by="r_avg")


Unnamed: 0,file,r_avg,g_avg,b_avg
326,0356.png,0.110,105.142,187.566
356,0427.png,1.320,66.592,189.030
273,0420.png,2.200,37.622,123.008
150,0403.png,4.790,57.848,133.530
295,0227.png,4.862,4.588,4.488
...,...,...,...,...
308,0425.png,195.888,123.190,70.238
215,0407.png,196.332,132.096,79.560
148,0371.png,197.264,152.670,115.312
288,0384.png,204.414,137.030,84.524


In [7]:
def format_img_filename(filename: str):
    name = filename.split(".")[0]
    return f"{name}.png"


def redim_image_dataset(dim: int, input_path, output_path):
    files = os.listdir(input_path)

    for file in tqdm(files):
        if file == ".DS_Store":
            continue
        with Image.open(os.path.join(input_path, file)) as img:
            img.thumbnail((dim, dim), Image.Resampling.LANCZOS)
            img.save(os.path.join(output_path, format_img_filename(file)), format="PNG")

In [8]:
NEW_DATASET_PATH = "/Users/dabetm/Pictures/Astrofotografía/Luna/lunas-500-square-64x64"
DIM_WINDOW = 64

print("Resizing images of dataset...")
redim_image_dataset(
    DIM_WINDOW,
    input_path=INPUT_DATASET_PATH,
    output_path=NEW_DATASET_PATH
)

Resizing images of dataset...


100%|██████████| 530/530 [00:04<00:00, 127.93it/s]


In [12]:
get_avg_ = lambda row : (row["r_avg"] + row["g_avg"] + row["b_avg"]) / 3


df.loc[:, "avg"] = df.apply(get_avg_, axis=1)
df

Unnamed: 0,file,r_avg,g_avg,b_avg,avg
0,0071.png,151.098,118.026,98.258,122.460667
1,0065.png,109.566,98.164,85.068,97.599333
2,0059.png,67.752,87.416,110.338,88.502000
3,0515.png,156.416,99.500,62.310,106.075333
4,0273.png,94.652,88.444,90.542,91.212667
...,...,...,...,...,...
524,0083.png,107.618,101.336,99.714,102.889333
525,0097.png,17.762,92.006,163.654,91.140667
526,0040.png,15.350,14.060,13.498,14.302667
527,0054.png,74.660,74.350,82.124,77.044667


In [9]:
ASSETS_PATH = "assets2"

## Create mosaic

In [11]:
import numpy as np

from math import sqrt
from typing import List, Union


images_in_memory = dict()

def open_dataset():
    files = os.listdir(path=NEW_DATASET_PATH)

    for file in files:
        if file == ".DS_Store":
            continue
        images_in_memory[file] = Image.open(os.path.join(NEW_DATASET_PATH, file))


def compute_euclidian_distance(a: List[Union[int, float]], b: List[Union[int, float]]):
    acc = 0.0

    for a_i, b_i in zip(a, b):
        acc += (a_i - b_i)**2
    
    return sqrt(acc)


def compute_nearest_images(metadata: pd.DataFrame, avg_per_channel: tuple, k: int = 3):
    distances = list() # list of tuples(distance, index)
    assert len(avg_per_channel) == 3

    distances_serie = metadata.apply(
        lambda row: (
            compute_euclidian_distance(
                a=list(avg_per_channel),
                b=[row["r_avg"], row["g_avg"], row["b_avg"]]
            ), 
            row.name
        ),
        axis=1
    )
    
    distances = list(distances_serie)
    distances.sort()

    nearest_images = list()
    for i in range(k):
        index = distances[i][1]
        nearest_images.append(metadata.iloc[index]["file"])

    return nearest_images

def resize_image(img: Image.Image, max_size: int = 1500) -> Image.Image:
    """Resize a PIL Image so its longest side is exactly max_size, preserving aspect ratio.
    This will upscale images smaller than max_size and downscale images larger than max_size.
    """
    width, height = img.size
    # Compute scaling factor so that the longest side becomes max_size
    scale = max_size / float(max(width, height))
    new_size = (int(round(width * scale)), int(round(height * scale)))

    # Use high-quality resampling filter
    return img.resize(new_size, Image.LANCZOS)


def get_best_fit(img: Image.Image, nearest_image_names: list) -> str:
    total = (img.height * img.width) // 2
    min_distance_ = np.inf
    best_fit_img_name = None

    sample_points = dict()

    #print(img.height, img.width, f"{total=}")

    for _ in range(total):
        x = random.randint(0, img.width-1)
        y = random.randint(0, img.height-1)
        #print(f"{x=}", f"{y=}")
        pixel = img.getpixel((x, y))

        sample_points[(x, y)] = pixel

    for img_name in nearest_image_names:
        curr_distance = 0
        for (x, y), pixel_val in sample_points.items():
            ref_pix = images_in_memory[img_name].getpixel((x, y))
            curr_distance += compute_euclidian_distance(pixel_val, ref_pix)
        
        if curr_distance < min_distance_:
            min_distance_ = curr_distance
            best_fit_img_name = img_name

    return best_fit_img_name

In [13]:
open_dataset()

k = 3

testing_images = [
    # "01_sunset_and_venus.jpg",
    # #"02_sunset_unidad_01.JPG",
    # #"03_sunset_unidad_02.JPG",
    # #"04_sunset_unidad_03.JPG",
    # # "05_sunset_palmeras.jpg",
    # # "06_sunset_gasolinera.jpg",
    # # "07_sunset_unidad_04.jpg",
    # # "08_sunset_unidad_05.jpg",
    # # "09_sunset_playa.png",
    # # "10_sunset_playa_2.png",
    # "11_mona_lisa.jpg",
    "14_noche_estrellada_4k.jpg",
    # "luna_junio.jpg",
    # "luna_sept.jpg",
    # "luna_oct.jpg",
    # "luna_nov.jpg",
    # "luna_dic.png",
    #"moon-3.webp",
    # "moon-4.jpg",
    # "moon-5.jpg",
    # "moon-6.jpg",
    #"moon-8.jpg"
]


for img_path in testing_images:
    print(img_path)
    with Image.open(os.path.join(ASSETS_PATH, img_path)) as img:
        #img = resize_image(img, 4200)
        n, m = img.height, img.width
        for y in tqdm(range(0, n, DIM_WINDOW)):
            for x in range(0, m, DIM_WINDOW):
                left = x
                upper = y
                right = min(m-1, x+DIM_WINDOW)
                lower = min(n-1, y+DIM_WINDOW)
                window = img.crop((left, upper, right, lower))

                avg = get_average_per_channel(window)
                nearest_image_names = compute_nearest_images(df, avg, k=k)

                tmp = random.randint(0, 10)
                if tmp <= 8:
                    nearest_image_name = get_best_fit(window, nearest_image_names)
                else:
                    nearest_image_name = random.choice(nearest_image_names)

                for i in enumerate(range(upper, lower)):
                    for j in enumerate(range(left, right)):
                        new_pixel = images_in_memory[nearest_image_name].getpixel(
                            (j[0], i[0])
                        )
                        img.putpixel((j[1], i[1]), value=new_pixel)

        img.save(os.path.join(ASSETS_PATH, f"out_k{k}_v5_{img_path}"))



14_noche_estrellada_4k.jpg


100%|██████████| 68/68 [02:16<00:00,  2.01s/it]


## Create collage

In [14]:
def get_base_img(width: int = 600, height: int = 600) -> Image:
    return Image.new("RGB", (width, height), color="black")

In [48]:
def create_img(df_data: pd.DataFrame) -> Image:
    img = get_base_img(width=1472, height=1472)
    #df_sorted = df.sort_values(by="r_avg")
    #df_sorted = df.sort_values(by="avg")
    idx = 0

    #df_shuffled = df.sample(frac=1)

    n, m = img.height, img.width

    for y in tqdm(range(0, n, DIM_WINDOW)):
        for x in range(0, m, DIM_WINDOW):
            left = x
            upper = y
            right = min(m-1, x+DIM_WINDOW)
            lower = min(n-1, y+DIM_WINDOW)
            #window = img.crop((left, upper, right, lower))

            #print(f"{idx=}")
            ref_filename = df_data.iloc[idx].to_dict()["file"]
            idx += 1

            ref_img = images_in_memory[ref_filename]

            # ref_img_path = f"{NEW_DATASET_PATH}/{ref_filename}"
            # with Image.open(ref_img_path) as ref_img:
            for i in enumerate(range(upper, lower)):
                for j in enumerate(range(left, right)):
                    #print(j[0], i[0], ref_img.height, ref_img.width)
                    new_pixel = ref_img.getpixel(
                        (j[0], i[0])
                    )
                    img.putpixel((j[1], i[1]), value=new_pixel)
    #print("hola")
    return img


In [18]:
df[["file", "avg"]]

Unnamed: 0,file,avg
0,0071.png,122.460667
1,0065.png,97.599333
2,0059.png,88.502000
3,0515.png,106.075333
4,0273.png,91.212667
...,...,...
524,0083.png,102.889333
525,0097.png,91.140667
526,0040.png,14.302667
527,0054.png,77.044667


In [50]:
frames_base_path = "frames_bubble_sort"
img_id = 1
open_dataset()

base_df = df.sample(frac=1)

img = create_img(base_df)

img.save(f"{frames_base_path}/{img_id}.jpg")

img_id += 1

100%|██████████| 23/23 [00:03<00:00,  6.08it/s]


In [39]:
from typing import List


def sort_bubble(values: list) -> List[int]:
    all_positions = list()

    n = len(values)

    for idx in range(n):
        for jdx in range(n-idx-1):
            if values[jdx][1] > values[jdx+1][1]:
                values[jdx+1], values[jdx] = values[jdx], values[jdx+1]
        all_positions.append([index for index, avg in values])

    return all_positions

In [51]:
all_positions = sort_bubble(list(base_df["avg"].items()))


In [52]:
len(all_positions)

529

In [54]:
img_id = 2

for positions in all_positions:
    df_data = base_df.loc[positions]
    img = create_img(df_data)
    img.save(f"{frames_base_path}/{img_id}.jpg")

    img_id += 1

100%|██████████| 23/23 [00:03<00:00,  6.05it/s]
100%|██████████| 23/23 [00:03<00:00,  6.08it/s]
100%|██████████| 23/23 [00:03<00:00,  6.04it/s]
100%|██████████| 23/23 [00:03<00:00,  5.99it/s]
100%|██████████| 23/23 [00:03<00:00,  5.99it/s]
100%|██████████| 23/23 [00:03<00:00,  6.04it/s]
100%|██████████| 23/23 [00:03<00:00,  6.01it/s]
100%|██████████| 23/23 [00:03<00:00,  6.01it/s]
100%|██████████| 23/23 [00:03<00:00,  6.01it/s]
100%|██████████| 23/23 [00:03<00:00,  5.96it/s]
100%|██████████| 23/23 [00:04<00:00,  5.73it/s]
100%|██████████| 23/23 [00:04<00:00,  5.56it/s]
100%|██████████| 23/23 [00:04<00:00,  5.57it/s]
100%|██████████| 23/23 [00:03<00:00,  5.80it/s]
100%|██████████| 23/23 [00:03<00:00,  5.84it/s]
100%|██████████| 23/23 [00:03<00:00,  5.92it/s]
100%|██████████| 23/23 [00:04<00:00,  5.66it/s]
100%|██████████| 23/23 [00:03<00:00,  5.92it/s]
100%|██████████| 23/23 [00:03<00:00,  5.85it/s]
100%|██████████| 23/23 [00:04<00:00,  5.60it/s]
100%|██████████| 23/23 [00:03<00:00,  5.

In [16]:
img.show()

In [63]:
img.save(f"{ASSETS_PATH}/lunar_collage_527_moons.png")