### NAIP On AWS

This Jupyter notebook provides code to scrape the [NAIP on AWS](https://registry.opendata.aws/naip/) manifest and create a GPKG file that provides a geospatial footprint for all available imagery.

Then, if we are interested in a given OSM ID, it's easy to look up which NAIP images intersect this ID and download them directly.

In [1]:
import pathlib

from tqdm import tqdm
from osgeo import ogr, osr
ogr.UseExceptions()

from rsc.common import aws_naip
from rsc.common.aws_naip import AWS_PATH

# Get NAIP manifest
manifest = aws_naip.get_naip_manifest()

# Filter out shapefiles to download
shp = [e for e in manifest if e.split('.')[-1].lower() in \
    ('shp', 'dbf', 'shx', 'prj', 'sbn')]

# Fetch all the shapefiles
for object_name in tqdm(shp):
    aws_naip.get_naip_file(object_name)

100%|██████████| 1150/1150 [00:00<00:00, 125232.36it/s]


In [2]:
def conv(s: str) -> str:
    """ Convert object paths as seen in manifest to those that might be seen in the shapefiles.
        It's silly they don't match."""
    return '%s.tif' % '_'.join(s.split('_')[:6])

# Read the manifest, and convert the TIF files to those that might be seen in the shapefile metadata
with open(AWS_PATH / 'manifest.txt', 'r') as f:
    mani = {conv(pathlib.Path(p).stem): p for p in (e.strip() for e in f.readlines()) if p.endswith('.tif')}

In [3]:
# Do all the work!

# Create SRS (EPSG:4326: WGS-84 decimal degrees)
srs = osr.SpatialReference()
srs.ImportFromEPSG(4326)
srs.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER)

# Create GPKG file for writing
driver: ogr.Driver = ogr.GetDriverByName('GPKG')
ds_w: ogr.DataSource = driver.CreateDataSource(str(AWS_PATH / 'naip_on_aws.gpkg'))
layer_w: ogr.Layer = ds_w.CreateLayer('footprints', srs=srs, geom_type=ogr.wkbPolygon)

# Define output fields
state_field = ogr.FieldDefn('STATE', ogr.OFTString)
band_field = ogr.FieldDefn('BAND', ogr.OFTString)
usgs_id_field = ogr.FieldDefn('USGSID', ogr.OFTString)
src_img_date_field = ogr.FieldDefn('SRCIMGDATE', ogr.OFTString)
filename_field = ogr.FieldDefn('FILENAME', ogr.OFTString)
object_field = ogr.FieldDefn('OBJECT', ogr.OFTString)

# Create output fields in layer
layer_w.CreateField(state_field)
layer_w.CreateField(band_field)
layer_w.CreateField(usgs_id_field)
layer_w.CreateField(src_img_date_field)
layer_w.CreateField(filename_field)
layer_w.CreateField(object_field)

# Get layer feature definition to load in features
feat_defn = layer_w.GetLayerDefn()

# Loop through all fetched shapefiles
for p in AWS_PATH.rglob('*.shp'):

    # Load them in OGR, get layer and spatial reference
    ds_r: ogr.DataSource = ogr.Open(str(p))
    layer_r: ogr.Layer = ds_r.GetLayer()
    srs_r: osr.SpatialReference = layer_r.GetSpatialRef()
    srs_r.SetAxisMappingStrategy(osr.OAMS_TRADITIONAL_GIS_ORDER)

    # Loop throught the features in the layer
    for _ in range(layer_r.GetFeatureCount()):
        feat_r: ogr.Feature = layer_r.GetNextFeature()

        # Quickly crosscheck with manifest. Skip if not in there
        filename = feat_r.GetFieldAsString('FileName')
        filename_conv = conv(filename.split('.')[0])
        if not filename_conv in mani:
            continue

        # Parse remaining metadata
        try:
            state = feat_r.GetFieldAsString('ST')
        except RuntimeError:
            state = feat_r.GetFieldAsString('QUADST')
        band = feat_r.GetFieldAsString('Band')
        usgs_id = feat_r.GetFieldAsString('USGSID')
        src_img_date = feat_r.GetFieldAsString('SrcImgDate')

        # Fetch geometry and convert to desired spatial reference
        trans = osr.CoordinateTransformation(srs_r, srs)
        geom = ogr.CreateGeometryFromWkt(feat_r.GetGeometryRef().ExportToWkt())
        geom.Transform(trans)

        # Create our new feature
        feat_w = ogr.Feature(feat_defn)
        feat_w.SetGeometry(geom)
        feat_w.SetField('STATE', state)
        feat_w.SetField('BAND', band)
        feat_w.SetField('USGSID', usgs_id)
        feat_w.SetField('SRCIMGDATE', src_img_date)
        feat_w.SetField('FILENAME', filename)
        feat_w.SetField('OBJECT', mani[filename_conv])

        # Save!
        layer_w.CreateFeature(feat_w)

        # Cleanup features
        feat_w = None
        feat_r = None

    # Cleanup read dataset
    layer_r = None
    ds_r = None

# Cleanup write dataset
layer_w = None
ds_w = None