In [13]:
import os
from PIL import Image
import math
import glob
from pathlib import Path
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm

In [34]:
def resize_img(fname, targ, path, new_path):
    dest = os.path.join(path,new_path,str(targ),fname)
    if os.path.exists(dest): return
    im = Image.open(os.path.join(path, fname)).convert('RGB')
    r,c = im.size
    ratio = targ/min(r,c)
    sz = (scale_to(r, ratio, targ), scale_to(c, ratio, targ))
    os.makedirs(os.path.split(dest)[0], exist_ok=True)
    im.resize(sz, Image.LINEAR).save(dest)

def resize_imgs(fnames, targ, path, new_path):
    if not os.path.exists(os.path.join(path,new_path,str(targ),fnames[0])):
        with ThreadPoolExecutor(16) as e:
            ims = e.map(lambda x: resize_img(x, targ, path, new_path), fnames)
            for x in tqdm(ims, total=len(fnames), leave=False): pass
    return os.path.join(path,new_path,str(targ))

In [3]:
def read_dir(path, folder):
    full_path = os.path.join(path, folder)
    fnames = glob(f"{full_path}/*.*")
    if any(fnames):
        return [os.path.relpath(f,path) for f in fnames]
    else:
        raise FileNotFoundError("{} folder doesn't exist or is empty".format(folder))

def read_dirs(path, folder):
    labels, filenames, all_labels = [], [], []
    full_path = os.path.join(path, folder)
    for label in sorted(os.listdir(full_path)):
        if label not in ('.ipynb_checkpoints','.DS_Store'):
            all_labels.append(label)
            for fname in os.listdir(os.path.join(full_path, label)):
                filenames.append(os.path.join(folder, label, fname))
                labels.append(label)
    return filenames, labels, all_labels

In [4]:
def scale_to(x, ratio, targ): return max(math.floor(x*ratio), targ)

### Get filenames

In [19]:
filenames, labels, all_labels = read_dirs(Path.cwd()/'Data/CLS-LOC', 'val'); len(filenames)

50000

In [20]:
train_filenames, train_labels, train_all_labels = read_dirs(Path.cwd()/'Data/CLS-LOC', 'train'); len(train_filenames)

1281167

### 64x64

In [9]:
1.25*64

80.0

In [10]:
resize_imgs(filenames, 80, Path.cwd()/'Data/CLS-LOC', 'resized_output')

                                                       

'/home/ubuntu/ILSVRC/Data/CLS-LOC/resized_output/80'

In [12]:
resize_imgs(train_filenames, 80, Path.cwd()/'Data/CLS-LOC', 'resized_output')

  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
                                                          

'/home/ubuntu/ILSVRC/Data/CLS-LOC/resized_output/80'

In [28]:
import matplotlib.pyplot as plt

In [29]:
%matplotlib inline

In [23]:
filenames[0]

'val/n01440764/ILSVRC2012_val_00002138.JPEG'

In [32]:
img = plt.imread('/home/ubuntu/ILSVRC/Data/CLS-LOC/resized_output/80/val/n01440764/ILSVRC2012_val_00002138.JPEG'); img.shape

(80, 106, 3)

### 128x128

In [21]:
1.25*128

160.0

In [22]:
resize_imgs(filenames, 160, Path.cwd()/'Data/CLS-LOC', 'resized_output')

                                                      

'/home/ubuntu/ILSVRC/Data/CLS-LOC/resized_output/160'

In [33]:
resize_imgs(train_filenames, 160, Path.cwd()/'Data/CLS-LOC', 'resized_output')

  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
                                                          

'/home/ubuntu/ILSVRC/Data/CLS-LOC/resized_output/160'

In [37]:
# sanity check
rfiles, _, _ = read_dirs(Path.cwd()/'Data/CLS-LOC/resized_output/160', 'train'); len(rfiles)
# '/home/ubuntu/ILSVRC/Data/CLS-LOC/resized_output/160'

1281167

### 256x256

In [38]:
1.25*256

160.0

In [39]:
resize_imgs(filenames, 320, Path.cwd()/'Data/CLS-LOC', 'resized_output')

                                                      

'/home/ubuntu/ILSVRC/Data/CLS-LOC/resized_output/320'

In [40]:
resize_imgs(train_filenames, 320, Path.cwd()/'Data/CLS-LOC', 'resized_output')

  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
  " Skipping tag %s" % (size, len(data), tag))
 23%|██▎       | 298394/1281167 [06:33<21:34, 758.97it/s]

KeyboardInterrupt: 

 23%|██▎       | 298394/1281167 [06:50<22:31, 727.19it/s]

### 299x299

In [None]:
1.25*299

In [None]:
resize_imgs(filenames, 375, Path.cwd()/'Data/CLS-LOC', 'resized_output')

In [None]:
resize_imgs(train_filenames, 375, Path.cwd()/'Data/CLS-LOC', 'resized_output')