In [2]:
from glob import glob

path = '/fastdata/Satellogic/data/tifs/satellogic'

images = glob(f'{path}/*.tif')

len(images)

298376

In [3]:
from tqdm import tqdm
import rasterio as rio
from concurrent.futures import ProcessPoolExecutor, as_completed

def check_image(image):
    try:
        rio.open(image).read((1,2,3))
        return None
    except Exception as e:
        return image

errors = []
with ProcessPoolExecutor(max_workers=10) as executor:
    # Submit all tasks
    future_to_image = {executor.submit(check_image, image): image for image in images}
    
    # Process completed tasks with progress bar
    for future in tqdm(as_completed(future_to_image), total=len(images)):
        result = future.result()
        if result is not None:
            errors.append(result)

len(errors)


100%|██████████| 298376/298376 [05:32<00:00, 898.53it/s]


8

In [5]:
errors

['/fastdata/Satellogic/data/tifs/satellogic/20221028_183042_SN18_11N_389109_3758823_TOA.tif',
 '/fastdata/Satellogic/data/tifs/satellogic/20221103_182524_SN9_11N_399093_3746151_TOA.tif',
 '/fastdata/Satellogic/data/tifs/satellogic/20220925_185425_SN24_11N_493044_3632196_TOA.tif',
 '/fastdata/Satellogic/data/tifs/satellogic/20220823_222648_SN20_11N_387573_3734247_TOA.tif',
 '/fastdata/Satellogic/data/tifs/satellogic/20220707_184429_SN16_11N_314898_4523501_TOA.tif',
 '/fastdata/Satellogic/data/tifs/satellogic/20220826_222628_SN20_11N_387189_3743079_TOA.tif',
 '/fastdata/Satellogic/data/tifs/satellogic/20220809_181857_SN9_11N_389627_3740498_TOA.tif',
 '/fastdata/Satellogic/data/tifs/satellogic/20221106_191522_SN24_11N_380795_3786578_TOA.tif']

In [7]:
import os
import shutil

os.makedirs('/fastdata/Satellogic/data/tifs/satellogic_errors', exist_ok=True)

for error in errors:
    shutil.move(error, f'/fastdata/Satellogic/data/tifs/satellogic_errors/{error.split("/")[-1]}')