In [1]:
# NOTEBOOK IMPORTS
import os, glob, zipfile
import numpy as np
from tqdm.notebook import tqdm
from shutil import copyfile
from datetime import datetime

# IMAGE IMPORTS
import cv2
from PIL import Image

# GIS IMPORTS
import fiona, pyproj
from affine import Affine
from shapely.geometry import shape, mapping, Point, LineString
from shapely.ops import transform, nearest_points, snap
import pandas as pd
import geopandas as gpd
import rasterio as rio
from rasterio.mask import mask
from scipy.spatial import cKDTree

# PLOTTING IMPORTS
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# CUSTOM UTILITIES
from WorldFileUtils import *
from GeometryUtils import *
from icp import *

Image.MAX_IMAGE_PIXELS = 933120000

In [2]:
def extractZipFiles(zip_dir, extract_dir):
    # Loop through all files in the ZIP directory
    for filename in os.listdir(zip_dir):
        if filename.endswith('.zip'):
            # Construct the full path for the ZIP file
            zip_path = os.path.join(zip_dir, filename)

            # Open and extract the contents of the ZIP file
            with zipfile.ZipFile(zip_path, 'r') as zip_ref:
                zip_ref.extractall(extract_dir)

            print(f'Extracted: {filename} to {extract_dir}')
            
# Define a function to extract the first consecutive numerical characters from a string
def extract_numerical_chars(text):
    numerical_chars = ''
    for char in text:
        if char.isdigit():
            numerical_chars += char
        else:
            break
    return numerical_chars

def getGEOID(CID,):
    # DEALING WITH A COMMUNITY ID (CID)
    if CID >= 9e4:
        output = CIDs[CIDs["CID"] == CID]["GEOID_p"].to_numpy()
    else: # DEALING WITH A COUNTY
        output = np.asarray([CID])
    
    if output.size == 0:
        return None
    return output[0]

def getGeometry(geoid,):
    # DEALING WITH A COMMUNITY ID (CID)
    if geoid >= 9e4:
        output = places[places["GEOID"] == geoid]["geometry"].to_numpy()
    else: # DEALING WITH A COUNTY
        output = counties[counties["GEOID"] == geoid]["geometry"].to_numpy()
    if output.size == 0:
        return None    
    return output[0]

IO dirs

In [3]:
base_input_path   = r"D:\FloodChange\AAA_HistoricalDownload"
base_output_path  = r"C:\Users\fhacesga\Desktop\FIRMsDigitizing\processing"

ref_dir  = r"C:\Users\fhacesga\OneDrive - University Of Houston\AAA_RECTDNN\data\ReferenceDatasets\\"
CIDs     = pd.read_csv(f"{ref_dir}CountyCIDs.csv", index_col=0)
counties = gpd.read_file(f"{ref_dir}Counties.shp")
places   = gpd.read_file(f"{ref_dir}Places.shp")

counties["GEOID"] = counties["GEOID"].astype(np.int32)
places["GEOID"]   = places["GEOID"].astype(np.int32)

Create working dir and unzip all files

In [4]:
datetime_str = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
proc_dir     = os.path.join(base_output_path, datetime_str)
os.makedirs(proc_dir)
extractZipFiles(base_input_path, proc_dir)

Extracted: 48201C_HISTORIC_FIRM_PANEL_1681931578876.zip to C:\Users\fhacesga\Desktop\FIRMsDigitizing\processing\2023-09-08_16-35-58
Extracted: 48201C_HISTORIC_FIRM_PANEL_1681931578876_2_.zip to C:\Users\fhacesga\Desktop\FIRMsDigitizing\processing\2023-09-08_16-35-58
Extracted: 48201C_HISTORIC_FIRM_PANEL_1681931578876_3_.zip to C:\Users\fhacesga\Desktop\FIRMsDigitizing\processing\2023-09-08_16-35-58
Extracted: 48201C_HISTORIC_FIRM_PANEL_1681931578876_4_.zip to C:\Users\fhacesga\Desktop\FIRMsDigitizing\processing\2023-09-08_16-35-58
Extracted: 48201C_HISTORIC_FIRM_PANEL_1681931578876_5_.zip to C:\Users\fhacesga\Desktop\FIRMsDigitizing\processing\2023-09-08_16-35-58
Extracted: 48201C_HISTORIC_FIRM_PANEL_1681931578876_6_.zip to C:\Users\fhacesga\Desktop\FIRMsDigitizing\processing\2023-09-08_16-35-58


In [7]:
image_files = glob.glob(f"{proc_dir}/*")
filtered_files = [file for file in image_files if len(os.path.basename(file)) < 12]
index_files = glob.glob(f"{proc_dir}/*IND*")

index_files.extend(filtered_files)
index_files = pd.DataFrame(index_files, columns=["FilePath"])

In [11]:
index_files["Basename"] = [os.path.basename(file) for file in index_files["FilePath"].to_list()]
index_files["Location"] = index_files["Basename"].apply(extract_numerical_chars).astype(np.int32)
index_files["GEOID"] = index_files["Location"].apply(getGEOID)
index_files["geometry"] = index_files["GEOID"].apply(getGeometry)

In [12]:
index_files.head(40)

  iter(obj)  # Can iterate over it.
  len(obj)  # Has a length associated with it.
  s = iter(seq)
  for i in range(min(nitems, len(seq)))
  if nitems < len(seq):
  iter(obj)  # Can iterate over it.
  len(obj)  # Has a length associated with it.
  s = iter(seq)
  for i in range(min(nitems, len(seq)))
  if nitems < len(seq):


Unnamed: 0,FilePath,Basename,Location,GEOID,geometry
0,C:\Users\fhacesga\Desktop\FIRMsDigitizing\proc...,480233IND0_0382.jpg,480233,4869908.0,"(POLYGON ((-95.55697000000001 29.601077, -95.5..."
1,C:\Users\fhacesga\Desktop\FIRMsDigitizing\proc...,480287IND0_0281.jpg,480287,48201.0,"POLYGON ((-95.85685099999999 29.874578, -95.85..."
2,C:\Users\fhacesga\Desktop\FIRMsDigitizing\proc...,480287IND0_0288.jpg,480287,48201.0,"POLYGON ((-95.85685099999999 29.874578, -95.85..."
3,C:\Users\fhacesga\Desktop\FIRMsDigitizing\proc...,480287IND0_0382.jpg,480287,48201.0,"POLYGON ((-95.85685099999999 29.874578, -95.85..."
4,C:\Users\fhacesga\Desktop\FIRMsDigitizing\proc...,480287IND0_0985.jpg,480287,48201.0,"POLYGON ((-95.85685099999999 29.874578, -95.85..."
5,C:\Users\fhacesga\Desktop\FIRMsDigitizing\proc...,480296IND0_0982.jpg,480296,4835000.0,"(POLYGON ((-95.17341999999999 29.807811, -95.1..."
6,C:\Users\fhacesga\Desktop\FIRMsDigitizing\proc...,480296IND0_0985.jpg,480296,4835000.0,"(POLYGON ((-95.17341999999999 29.807811, -95.1..."
7,C:\Users\fhacesga\Desktop\FIRMsDigitizing\proc...,480296IND0_0987.jpg,480296,4835000.0,"(POLYGON ((-95.17341999999999 29.807811, -95.1..."
8,C:\Users\fhacesga\Desktop\FIRMsDigitizing\proc...,480296IND0_1279.jpg,480296,4835000.0,"(POLYGON ((-95.17341999999999 29.807811, -95.1..."
9,C:\Users\fhacesga\Desktop\FIRMsDigitizing\proc...,480304IND0_0182.jpg,480304,4848804.0,"POLYGON ((-95.595237 29.515272, -95.59519 29.5..."
