In [None]:
import pystac_client
import planetary_computer

# Initialize STAC client for Sentinel-2 data access
catalog = pystac_client.Client.open("https://planetarycomputer.microsoft.com/api/stac/v1/", modifier=planetary_computer.sign_inplace)

# Example query to get Sentinel-2 item
search = catalog.search(collections=["sentinel-2-l2a"], bbox=[], limit=1) # E.g., [-78.6, 35.3, -78.0, 35.8]
items = list(search.items())

# Display CRS, bounding box, and resolution
if items:
    item = items[0]
    print("Sentinel-2 CRS:", item.properties['proj:epsg'])  # EPSG code
    print("Sentinel-2 Bounding Box:", item.bbox)  # Spatial extent
    print("Sentinel-2 Resolution:", item.properties['gsd'])  # Ground sampling distance in meters
else:
    print("No Sentinel-2 data found for the specified region.")

In [None]:
def query_available_dates(tile_geometry, catalog, start, end, query, collections=["sentinel-2-l2a"], limit=1000):
    # Get the bounds of the tile in WGS84
    tile_wgs84 = gpd.GeoSeries([tile_geometry], crs="EPSG:32617").to_crs("EPSG:4326").iloc[0]
    minx, miny, maxx, maxy = tile_wgs84.bounds
    bbox = [minx, miny, maxx, maxy]

    # Perform the search
    search = catalog.search(
        collections=collections,
        bbox=bbox,
        datetime=f"{start}/{end}",
        limit=limit,
        query=query
    )

    # Get the items from the search results and collect dates
    items = list(search.items())
    available_dates = [item.datetime.date() for item in items]
    return available_dates

def select_dates_best_spread(dates_list, num_per_year=4):
    # Sort the dates list
    dates_list.sort()

    # Define target months based on the desired number per year
    target_months = {
        4: [3, 6, 9, 12],  # Default quarters: March, June, September, December
        3: [4, 8, 12],     # For 3 dates per year: April, August, December
        2: [6, 12],        # For 2 dates per year: June, December
        1: [6]             # For 1 date per year: June
    }.get(num_per_year, [6])  # Default to June if an unexpected `num_per_year` is given

    # Group dates by year
    dates_by_year = defaultdict(list)
    for d in dates_list:
        dates_by_year[d.year].append(d)
    
    selected_dates = []

    # Iterate over each year and select dates with the best spread
    for year, available_dates in dates_by_year.items():
        yearly_selected = set()
        
        for month in target_months:
            target_date = date(year, month, 1)
            
            # Find the closest date to the target date in available_dates, if any
            closest_date = min(
                (d for d in available_dates if d not in yearly_selected),
                key=lambda d: abs(d - target_date),
                default=None
            )
            
            # Append the closest date if available
            if closest_date:
                yearly_selected.add(closest_date)
        
        # Extend selected dates with unique entries for the year
        selected_dates.extend(sorted(yearly_selected))
    
    # Ensure final selection is sorted
    selected_dates.sort()
    return selected_dates

catalog = pystac_client.Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1/",
    modifier=planetary_computer.sign_inplace,
)
query = {"eo:cloud_cover": {"lt": 1}}

collections=["sentinel-2-l2a"]

# Dates to query
start = "2016-01-01"
end = "2024-08-31"

if not os.path.exists("../data/available_dates.pkl"):
    available_dates = query_available_dates(tiles_gdf.iloc[0].geometry, catalog, start, end, query)
    with open("../data/available_dates.pkl", "wb") as f:
        pkl.dump(available_dates, f)
else:
    with open("../data/available_dates.pkl", "rb") as f:
        available_dates = pkl.load(f)

print("Number of available dates for processing:", len(available_dates))

buffer_days = 45
not_available_dates = [date(2022, 4, 3), date(2022, 2, 14), date(2021, 9, 27),  date(2021, 8, 8),
                       date(2020, 12, 1), date(2020, 10, 7), date(2019, 4, 4), date(2019, 2, 5), 
                       date(2018, 11, 27), date(2018, 10, 1)]
omit_dates = [date(2024, 6, 11), date(2022, 6, 17), date(2021, 4, 30), date(2020, 7, 12), 
              date(2019, 7, 30), date(2018, 5, 11), date(2017, 9, 8)]
selected_dates = select_dates_best_spread([d for d in available_dates if d !=  date(2017, 5, 16) and d != date(2024, 8, 27)
                                           and all(not (nd - timedelta(days=buffer_days) <= d <= nd + timedelta(days=buffer_days)) 
                                                   for nd in not_available_dates)], 3) 
selected_dates = [d for d in selected_dates if d not in omit_dates]
print("Number of selected dates for processing:", len(selected_dates))

In [2]:
import rasterio

# Path to NLCD urban imperviousness data
nlcd_path = "D:/data/nlcd_2016_impervious_l48_20210604/nlcd_2016_impervious_l48_20210604.img" 

# Open the NLCD data to retrieve spatial information
with rasterio.open(nlcd_path) as nlcd:
    print("NLCD CRS:", nlcd.crs)
    print("NLCD Spatial Extent (bounds):", nlcd.bounds)
    print("NLCD Resolution:", nlcd.res)  # Resolution in (x, y) format


NLCD CRS: PROJCS["Albers_Conical_Equal_Area",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Albers_Conic_Equal_Area"],PARAMETER["latitude_of_center",23],PARAMETER["longitude_of_center",-96],PARAMETER["standard_parallel_1",29.5],PARAMETER["standard_parallel_2",45.5],PARAMETER["false_easting",0],PARAMETER["false_northing",0],UNIT["meters",1],AXIS["Easting",EAST],AXIS["Northing",NORTH]]
NLCD Spatial Extent (bounds): BoundingBox(left=-2493045.0, bottom=177285.0, right=2342655.0, top=3310005.0)
NLCD Resolution: (30.0, 30.0)


In [1]:
import geopandas as gpd

# Path to Johnston County boundary shapefile
county_path = "../data/county_boundary.shp"  # Update with actual path

# Load county boundary and print CRS and bounds
county_boundary = gpd.read_file(county_path)
print("Johnston County Boundary CRS:", county_boundary.crs)
print("Johnston County Boundary Spatial Extent (bounds):", county_boundary.total_bounds)


Johnston County Boundary CRS: EPSG:32617
Johnston County Boundary Spatial Extent (bounds): [ 707750.12914638 3904356.29395498  765959.87280919 3967194.75168451]
