In [46]:
# Uncomment and run the below commands (Unix-based systems only) to download and unzip the dataset.
# !wget -O Flickr2K.tar "https://cv.snu.ac.kr/research/EDSR/Flickr2K.tar"
# !tar -xvf Flickr2K.tar

# DIV2K Dataset link: https://drive.google.com/drive/folders/1B-uaxvV9qeuQ-t7MFiN1oEdA6dKnj2vW
# !unzip "DIV2K_train_HR.zip"
# !unzip "DIV2K_valid_HR.zip"

In [47]:
import numpy as np
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
from tqdm import tqdm
import os

In [48]:
def create_dataset_from_HR_images(src_path="./DIV2K/DIV2K_train_HR/", 
                                  dest_path="./DIV2K/DIV2K_train_LR/", out_filename="DIV2K_train.csv"):
    
    file_list= [src_path + file for file in os.listdir(src_path) if file.endswith('.png')]
    col_names = ["LR_filename", "LR_filepath", "LR_dimensions",
                 "HR_filename", "HR_filepath", "HR_dimensions", 
                 "downsample_method"]
    data = {col:[] for col in col_names}
    N = len(file_list)
    for i, image_path in enumerate(tqdm(file_list)):
        image = Image.open(image_path)
        image_name = image_path.split("/")[-1]
        width, height = image.size
        
        # https://pillow.readthedocs.io/en/stable/handbook/concepts.html#concept-filters
        resize_method = Image.NEAREST
        resize_method_str = "NEAREST"
        if i in range((5*N//6), N):
            resize_method = Image.BOX
            resize_method_str = "BOX"
        if i in range((4*N//6), (5*N//6)):
            resize_method = Image.BILINEAR
            resize_method_str = "BILINEAR"
        if i in range((3*N//6), (4*N//6)):
            resize_method = Image.HAMMING
            resize_method_str = "HAMMING"
        if i in range((2*N//6), (3*N//6)):
            resize_method = Image.BICUBIC
            resize_method_str = "BICUBIC"
        if i in range((N//6), (2*N//6)):
            resize_method = Image.LANCZOS
            resize_method_str = "LANCZOS"
        
        resized_image = image.resize((width//2, height//2), resample=resize_method)
        
        resized_image.save(dest_path + "train_" + image_name)
        data["LR_filename"].append("train_" + image_name)
        data["LR_filepath"].append(dest_path + "train_" + image_name)
        data["LR_dimensions"].append(f"{width//2}x{height//2}")
        data["HR_filename"].append(image_name)
        data["HR_filepath"].append(src_path + image_name)
        data["HR_dimensions"].append(f"{width}x{height}")
        data["downsample_method"].append(resize_method_str)

    data = pd.DataFrame(data)
    data.to_csv(out_filename)
    

In [42]:
create_dataset_from_HR_images(src_path="./DIV2K/DIV2K_train_HR/", 
                              dest_path="./DIV2K/DIV2K_train_LR/", 
                              out_filename="DIV2K_train.csv")

100%|██████████| 800/800 [08:03<00:00,  1.66it/s]


In [45]:
create_dataset_from_HR_images(src_path="./DIV2K/DIV2K_valid_HR/", 
                              dest_path="./DIV2K/DIV2K_valid_LR/", 
                              out_filename="DIV2K_valid.csv")

100%|██████████| 100/100 [00:58<00:00,  1.70it/s]
