In [92]:
import pandas as pd
import os
import shutil
from PIL import Image

IMAGE_DIR = "imagesorig"
MIN_SIZE = 256
FOLDERS = ["happy", "sad", "relaxed", "angry"]
VALID_EXTENSIONS = ["jpg", "png", "gif"]
TARGET_EXTENSION = "jpg"

In [95]:
invalid_images = []

for folder in FOLDERS:
    fname = os.path.join(IMAGE_DIR, folder)
    
    for im in os.listdir(fname):
        impath = os.path.join(fname, im)
        
        extension = im.split(".")[-1]
        if extension not in VALID_EXTENSIONS:
            invalid_images.append(impath)
            continue
            
        try:
            img = Image.open(impath)
            bands = img.getbands()
            if (min(img.size)) < MIN_SIZE or len(bands) != 3:
                invalid_images.append(impath)
            
        except Exception:
            invalid_images.append(impath)

In [96]:
invalid_images

['imagesorig/happy/2788372463_2daa375cc8_b.jpg',
 'imagesorig/happy/33378088884_cda0e08877_b.jpg',
 'imagesorig/happy/4503012656_e232c980bc_b.jpg',
 'imagesorig/happy/46028886485_d3a159ae18_b.jpg',
 'imagesorig/sad/8115030896_8b5249f585_b.jpg',
 'imagesorig/sad/3406988390_f1e7cb024a_b.jpg',
 'imagesorig/sad/36081715960_7d4c40a24d_b.jpg',
 'imagesorig/sad/7020697207_aab34aecba_b.jpg',
 'imagesorig/sad/14573872817_5801754389_b.jpg',
 'imagesorig/sad/45396884262_32c3416328_b.jpg',
 'imagesorig/sad/5691449791_a2879b4662_b.jpg',
 'imagesorig/sad/1538312484_47904347c3_b.jpg',
 'imagesorig/sad/13301504003_a58072192c_b.jpg',
 'imagesorig/sad/17907537739_6e1738f4ac_b.jpg',
 'imagesorig/sad/2806366348_c98f67cbae_b.jpg',
 'imagesorig/sad/32822834967_ee2f332119_b.jpg',
 'imagesorig/sad/42709079890_f414c72de5_b.jpg',
 'imagesorig/relaxed/43535780982_a09920a105_b.jpg',
 'imagesorig/relaxed/13357865393_98e496a43d_b.jpg',
 'imagesorig/relaxed/37344586465_6527d86d3b_b.jpg',
 'imagesorig/relaxed/3502644

In [97]:
def remove_items(items):
    for item in items:
        try:
            os.remove(item)
        except PermissionError:
            shutil.rmtree(item)
        except FileNotFoundError:
            print(item)

In [98]:
remove_items(invalid_images)

In [53]:
to_delete = []

for folder in FOLDERS:
    fname = os.path.join(IMAGE_DIR, folder)
    for im in os.listdir(fname):
        impath = os.path.join(fname, im)
        extension = im.split(".")[-1]
        
        new_path = impath.replace(f".{extension}", ".jpg")
        
        img = Image.open(impath)
        if img.size[0] != img.size[1]:
            img_min_size = min(img.size)
            try:
                img_resized = img.crop(((img.size[0] - img_min_size) // 2,
                             (img.size[1] - img_min_size) // 2,
                             (img.size[0] + img_min_size) // 2,
                             (img.size[1] + img_min_size) // 2))

            except Exception:
                to_delete.append(impath)
        else:
            img_resized = img
        
        img_resized = img_resized.convert("RGB")
        img_resized.thumbnail((MIN_SIZE, MIN_SIZE), Image.Resampling.LANCZOS)

        img_resized.save(new_path)
        if new_path != impath:
            to_delete.append(impath)

In [55]:
len(to_delete)

0

In [110]:
remove_items(to_delete)