In [None]:
# PYTHON IMPORTS
import os, glob, zipfile, shutil
from tqdm.notebook import tqdm
from datetime import datetime

# IMAGE IMPORTS 
from PIL import Image

# DATA IMPORTS 
import numpy as np

# NN IMPORTS 
import torch, ultralytics

# PREFERENCES
Image.MAX_IMAGE_PIXELS = 933120000

In [2]:
model_weights = r"C:\Users\fhacesga\Downloads\best (1).pt"

zip_folder    = r"D:\FloodChange\AAA_HistoricalDownload\ZIP\\"
input_folder  = r"D:\FloodChange\AAA_HistoricalDownload\Files\\"


output_folder = r"D:\FloodChange\Outputs\\"
output_folder = f"{output_folder}\\{datetime.now().strftime('%m-%d-%Y_%H-%M-%S')}\\"
index_folder  = f"{output_folder}00_identifiedIndices\\"
infer_folder  = f"{output_folder}01_infered\\"

os.makedirs(index_folder, exist_ok=True)
os.makedirs(infer_folder,  exist_ok=True)

Unzip all the files, and undo TIFF multi-paging

In [3]:
def undoMultiPageTIFFs(input_folder):
    '''
    Some images are saved as Multi-page TIFF files. These need to be exported into individual images, which is what this function does
    '''
    for filename in tqdm(glob.glob(input_folder + "\\*.tif*")):
        tiff_file = os.path.join(input_folder, filename)
        try:
        # Check if the file is a multi-page TIFF
            with Image.open(tiff_file) as img:
                if img.is_animated:    
                    for i in range(img.n_frames):
                        try:
                            img.seek(i)
                            output_filename = f"{os.path.splitext(tiff_file)[0]}_{i+1}{os.path.splitext(tiff_file)[1]}"
                            img.save(output_filename, format=img.format)
                        except:
                            print(f"Error with {tiff_file} page {i}")
                            continue
        except:
            print(f"Error opening {tiff_file}")
            continue

        os.remove(tiff_file)

def unzip_all_zips(input_dir, output_dir):
    # Create the output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Iterate through all files in the input directory
    for filename in os.listdir(input_dir):
        if filename.endswith(".zip"):
            zip_file_path = os.path.join(input_dir, filename)
            # Open the ZIP file
            with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
                # Extract all contents to the output directory
                zip_ref.extractall(output_dir)
                print(f"Contents of {zip_file_path} extracted to {output_dir}")

    

if not os.path.exists(input_folder):
    unzip_all_zips(zip_folder, input_folder)
    undoMultiPageTIFFs(input_folder)

Identify the TileIndices through known heuristics

In [1]:
patterns = ["IND", "_1."]
index_tiles = [file for pattern in patterns for file in glob.glob(input_folder + "\\*" + pattern + "*")]

for file in tqdm(index_tiles):
    shutil.copy(file, os.path.join(index_folder, os.path.basename(file)))

NameError: name 'glob' is not defined

These are the parameters we define to run BBNN

In [5]:
target_size = 1024
original_shapes = []

# COCO DATASET PARAMS
category_labels = {
    0 : "County",
    1 : "Tile",
    2 : "Box",
    3 : "Legend"
}

categories=[0, 1]

In [6]:
model = ultralytics.YOLO(model_weights).to("cuda")

files = glob.glob(index_folder+"\\*")
outputs = []
batch_size = 5


for i in tqdm(np.arange(0, len(files), batch_size)):
    outputs.extend(model(files[i:i+batch_size], imgsz=(1920, 1920), verbose=False, conf=0.7))

model = model.to("cpu")
torch.cuda.empty_cache()

  0%|          | 0/26 [00:00<?, ?it/s]

In [7]:
# Process results list
for result in outputs:
    fn = os.path.join(infer_folder, os.path.basename(result.path))
    result.save(filename=os.path.join(infer_folder, os.path.basename(result.path)))

In [8]:
if False:
    indexes_to_copy = ["480035A_1", "480036A_1", "480037A_1", "480038A_1", "480039_1", "480040A_1", "480041_1", "480041A_1", "480043_1", "480045IND0_0186", "480045IND0_0791", "480046_1", "480047A_1", "480049A_1", "480077_1", "480077A_1", "480243IND0_0583", "480269_1", "480287B_1", "480289_1", "480290A_1", "480293_1", "480293A_1", "480296A_1", "480297A_1", "480298A_1", "480303A_1", "480304_1", "480305A_1", "480307A_1", "480311A_1", "480424B_1", "480710_1", "481141_1", "485466B_1", "485468B_1", "485469B_1", "485470B_1", "485470IND0_1190", "485487C_1", "485491C_1", "485516_1"]
    copy_folder = r"D:\FloodChange\Outputs\redo\\"

    for i in indexes_to_copy:
        fn = glob.glob(f'{input_folder}/{i}.*')[0]
        shutil.copy(fn, os.path.join(copy_folder, os.path.basename(fn)))