In [3]:
# to avoid to restart kernel when external modules are modified
%load_ext autoreload
%autoreload 2

In [4]:
import os

import pandas as pd
from PIL import Image
from tqdm import tqdm

from constants import DIM_WINDOW, INPUT_DATASET_PATH, OUTPUT_DATASET_PATH, SAMPLE_NUM_PIXELS
from images import redim_image_dataset, get_average_per_channel

### Generate dataset of mini images

[Download dataset from Google Drive](https://drive.google.com/file/d/1utFE04RWIAcxFK0uV5_2RB4bUrnF76lO/view?usp=sharing)

In [7]:
print("Resizing images of dataset...")

redim_image_dataset(
    DIM_WINDOW,
    input_dataset_path=INPUT_DATASET_PATH,
    output_dataset_path=OUTPUT_DATASET_PATH,
)

Resizing images of dataset...


100%|██████████| 260/260 [00:00<00:00, 586.24it/s]


### Generate metadata file

In [5]:
"""
file,    r_avg, g_avg, b_avg
001.png, 105.5, 34.42, 23.12
...
"""


def generate_metadata(dataset_path: str) -> pd.DataFrame:
    metadata = []
    files = os.listdir(dataset_path)

    for file in tqdm(files):
        if file == ".DS_Store": # optional, only macos
            continue

        with Image.open(os.path.join(dataset_path, file)) as img:
            r_avg, g_avg, b_avg = get_average_per_channel(img, n=SAMPLE_NUM_PIXELS)

        filename = file.split(".")[0] + ".png"
        metadata.append(
            {
                "file": filename,
                "r_avg": r_avg,
                "g_avg": g_avg,
                "b_avg": b_avg
            }
        )

    return pd.DataFrame(metadata)

In [6]:
metadata_df = generate_metadata(INPUT_DATASET_PATH)

metadata_df

100%|██████████| 260/260 [00:00<00:00, 668.13it/s]


Unnamed: 0,file,r_avg,g_avg,b_avg
0,00126.png,118.698,100.774,99.150
1,0065.png,95.676,62.844,101.232
2,0071.png,106.256,102.552,97.600
3,00132.png,132.876,96.642,86.988
4,0059.png,105.326,74.384,100.042
...,...,...,...,...
255,00103.png,109.800,94.938,110.604
256,0040.png,137.962,119.184,109.276
257,0068.png,173.590,112.658,86.904
258,0097.png,137.990,106.416,106.818


In [7]:
metadata_df.to_csv("sunset_metadata.csv", index=False)