# 01 - Danish Fungi - Resize Images

In [1]:
import os

os.chdir('..')

In [2]:
import math
import os

import numpy as np
import pandas as pd

import PIL
from PIL import ImageFile

ImageFile.LOAD_TRUNCATED_IMAGES = True


def get_smaller_size(h, w, min_size=500):
    if h < min_size and w < min_size:
        new_h, new_w = h, w
    else:
        if min(h, w) > min_size:
            mag = min_size / min(h, w)
        else:
            mag = min_size / max(h, w)
        new_h = math.floor(h * mag + 1e-8)
        new_w = math.floor(w * mag + 1e-8)
    return new_h, new_w


DATA_DIR = 'data/danish_fungi_dataset/'
TRAIN_SET_DIR = 'train'
TRAIN_SET_RESIZED_DIR = 'train_resized'

In [3]:
# load metadata
train_df = pd.read_csv(DATA_DIR + 'DF20-train_metadata_PROD.csv')
# train_mini_df = pd.read_csv(DATA_DIR + 'DF20M-train_metadata_PROD.csv')

## Example of Resize Method

In [4]:
# show example of iamge sizes
fungi_img_sizes = []
for x in train_df['image_path'].sample(20):
    path = os.path.join(DATA_DIR, TRAIN_SET_DIR, x)
    img = PIL.Image.open(path).convert('RGB')
    w, h = img.size
    fungi_img_sizes.append((h, w))
fungi_img_sizes = np.array(fungi_img_sizes)

fungi_img_sizes

array([[1024, 1280],
       [1200,  899],
       [1000, 1333],
       [ 800,  600],
       [ 800, 1066],
       [3024, 4032],
       [ 347,  533],
       [1200, 1600],
       [1200,  675],
       [4032, 2268],
       [ 325,  365],
       [3264, 2448],
       [ 793, 1024],
       [1033, 1800],
       [1050, 1400],
       [ 638,  850],
       [ 533,  800],
       [3024, 4032],
       [ 932, 1500],
       [1200, 1600]])

In [5]:
# show resized dimensions
np.array([get_smaller_size(h, w) for h, w in fungi_img_sizes])

array([[500, 625],
       [667, 499],
       [500, 666],
       [666, 500],
       [500, 666],
       [500, 666],
       [325, 500],
       [500, 666],
       [888, 500],
       [888, 500],
       [325, 365],
       [666, 500],
       [500, 645],
       [500, 871],
       [500, 666],
       [500, 666],
       [500, 750],
       [500, 666],
       [499, 804],
       [500, 666]])

## Resize Dataset

In [None]:
from shutil import copyfile
from tqdm import tqdm


img_dir = os.path.join(DATA_DIR, TRAIN_SET_DIR)
resized_img_dir = os.path.join(DATA_DIR, TRAIN_SET_RESIZED_DIR)
imgs = pd.Series(os.listdir(img_dir))
resized_imgs = os.listdir(resized_img_dir)
imgs_todo = imgs[~imgs.isin(resized_imgs)]

for name in tqdm(imgs_todo):
    if name not in resized_imgs:
        src = os.path.join(img_dir, name)
        dst = os.path.join(resized_img_dir, name)
        try:
            img = PIL.Image.open(src).convert('RGB')
            w, h = img.size
            img = img.resize(get_smaller_size(w, h))
            img.save(dst)
        except Exception as e:
            print(e)
            copyfile(src, dst)