In [44]:
# this solution is taken from
# https://stackoverflow.com/questions/52940369/is-it-possible-to-resize-an-image-by-its-bytes-rather-than-width-and-height

In [45]:
from PIL import Image
import os
import io
import multiprocessing as mp
from itertools import repeat

In [46]:
# define constants
maxFileSize = 500000 # 500kb
fileSizeTolerance = 5 # 5 percent tolerance in filesize

In [48]:

def limitImgSize(srcDir, targetDir, filename, target_filesize, tolerance=5):
    img_filename = srcDir+'/'+filename
    img_target_filename = targetDir+'/'+filename
    print(img_filename)
    print(img_target_filename)
    
    
    try: 
        img = img_orig = Image.open(img_filename)
        aspect = img.size[0] / img.size[1]

        while True:
            with io.BytesIO() as buffer:
                img.save(buffer, format="JPEG")
                data = buffer.getvalue()
            filesize = len(data)    
            size_deviation = filesize / target_filesize
            print("size: {}; factor: {:.3f}".format(filesize, size_deviation))

            if size_deviation <= (100 + tolerance) / 100:
                # filesize fits
                with open(img_target_filename, "wb") as f:
                    f.write(data)
                break
            else:
                # filesize not good enough => adapt width and height
                # use sqrt of deviation since applied both in width and height
                new_width = img.size[0] / size_deviation**0.5    
                new_height = new_width / aspect
                # resize from img_orig to not lose quality
                img = img_orig.resize((int(new_width), int(new_height)))
    except BaseException as err:
        print(type(err))
        if(type(err) == KeyboardInterrupt):
            raise Exception("stopped process")
        print(f"Unexpected {err=}, {type(err)=}")

In [49]:
def downscaleImgClass(className, threadCount):
    print('downscaling img class >> '+className)
    srcPath = 'srcImg/'+className
    
    # create target path if not exists
    targetPath = 'downscaledImg/'+className
    if not(os.path.exists(targetPath)): os.mkdir(targetPath)
    
    # list images in dir
    images = os.listdir(srcPath)
    
    #define thread pool 
    pool = mp.Pool(threadCount)
    
    #for img in images:
    pool.starmap(limitImgSize, zip(repeat(srcPath), repeat(targetPath), images, repeat(maxFileSize), repeat(fileSizeTolerance)))
    #pool.starmap(processImage, [dir, cls, (images), targetPath])
        #processImage(dir, cls, img, 'hello', targetPath)
    
    print('DONE downscaling img class >> '+className)
    pool.close()

In [50]:
# create target dir if not exists
downscaledPath = 'downscaledImg'
if not(os.path.exists(downscaledPath)): os.mkdir(downscaledPath)
    
#define number of threads
threadCount = mp.cpu_count()
    
classes = os.listdir('srcImg')
for className in classes:
    if(os.path.isdir('srcImg/'+className)):
        downscaleImgClass(className, threadCount)

downscaling img class >> class1
srcImg/class1/20170108_150203.jpg
srcImg/class1/20160625_201819.jpg
srcImg/class1/200033500874_126958.jpg
srcImg/class1/20170623_141252.jpg
srcImg/class1/20190501_171845.jpg
srcImg/class1/20190107_072403.jpg
srcImg/class1/20190901_082116.jpg
srcImg/class1/20190827_075134.jpg
srcImg/class1/20190820_144818.jpg
srcImg/class1/20190810_084954.jpg
downscaledImg/class1/20170108_150203.jpg
srcImg/class1/IMG-20190810-WA0001.jpg
srcImg/class1/20190609_202246.jpg
downscaledImg/class1/200033500874_126958.jpg
downscaledImg/class1/20160625_201819.jpg
downscaledImg/class1/20170623_141252.jpg
downscaledImg/class1/20190501_171845.jpg
downscaledImg/class1/20190107_072403.jpg
downscaledImg/class1/20190901_082116.jpg
downscaledImg/class1/20190810_084954.jpg
downscaledImg/class1/20190827_075134.jpg
downscaledImg/class1/IMG-20190810-WA0001.jpg
downscaledImg/class1/20190820_144818.jpg
downscaledImg/class1/20190609_202246.jpg
size: 21096; factor: 0.042
size: 47332; factor: 0.09