# 01 - SnakeCLEF - Resizing Images in the Test Set

In [1]:
import os

os.chdir('..')

In [2]:
import math
import os

import numpy as np
import pandas as pd

import PIL
from PIL import ImageFile

ImageFile.LOAD_TRUNCATED_IMAGES = True


def get_smaller_size(h, w, min_size=500):
    if h < min_size and w < min_size:
        new_h, new_w = h, w
    else:
        if min(h, w) > min_size:
            mag = min_size / min(h, w)
        else:
            mag = min_size / max(h, w)
        new_h = math.floor(h * mag + 1e-8)
        new_w = math.floor(w * mag + 1e-8)
    return new_h, new_w


DATA_DIR = 'data/snake_clef2021_dataset/'
TRAIN_SET_DIR = 'train'

In [3]:
# load metadata
# train_df = pd.read_csv(DATA_DIR + 'DF20-train_metadata_PROD.csv')
# train_mini_df = pd.read_csv(DATA_DIR + 'DF20M-train_metadata_PROD.csv')
valid_df = pd.read_csv(DATA_DIR + 'SnakeCLEF2021_test_metadata_cleaned.csv')

## Example of Resize Method

In [4]:
# show example of iamge sizes
img_size = []
for x in valid_df['image_path'].sample(20):
    path = os.path.join(DATA_DIR, TRAIN_SET_DIR, x)
    img = PIL.Image.open(path).convert('RGB')
    w, h = img.size
    img_size.append((h, w))
img_size = np.array(img_size)

img_size

array([[281, 500],
       [500, 666],
       [500, 743],
       [500, 751],
       [386, 500],
       [500, 749],
       [500, 751],
       [500, 749],
       [500, 666],
       [375, 500],
       [500, 749],
       [746, 500],
       [500, 749],
       [500, 699],
       [500, 757],
       [500, 699],
       [500, 716],
       [500, 750],
       [500, 888],
       [500, 749]])

In [5]:
# show resized dimensions
np.array([get_smaller_size(h, w) for h, w in img_size])

array([[281, 500],
       [375, 500],
       [336, 500],
       [332, 500],
       [386, 500],
       [333, 500],
       [332, 500],
       [333, 500],
       [375, 500],
       [375, 500],
       [333, 500],
       [500, 335],
       [333, 500],
       [357, 500],
       [330, 500],
       [357, 500],
       [349, 500],
       [333, 500],
       [281, 500],
       [333, 500]])

## Resize Dataset

In [None]:
from shutil import copyfile
from tqdm import tqdm


img_dir = os.path.join(DATA_DIR, TRAIN_SET_DIR + '/test')
resized_img_dir = os.path.join(DATA_DIR, TRAIN_SET_DIR + '/test_resized')
imgs = pd.Series(os.listdir(img_dir))
resized_imgs = os.listdir(resized_img_dir)
imgs_todo = imgs[~imgs.isin(resized_imgs)]

for name in tqdm(imgs_todo):
    if name not in resized_imgs:
        src = os.path.join(img_dir, name)
        dst = os.path.join(resized_img_dir, name)
        try:
            img = PIL.Image.open(src).convert('RGB')
            w, h = img.size
            img = img.resize(get_smaller_size(w, h))
            img.save(dst)
        except Exception as e:
            print(e)
            copyfile(src, dst)