In [1]:
import os
import geopandas as gpd
import ee
import geemap
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from multiprocessing import cpu_count
from shapely.geometry import box

In [2]:
class ImageryCollector:
    def __init__(self, base_path, output_dir, segment_type, buffer_sizes, cloud_cover_threshold=20):
        self.base_path = base_path
        self.output_dir = output_dir
        self.segment_type = segment_type
        self.buffer_sizes = buffer_sizes
        self.cloud_cover_threshold = cloud_cover_threshold

    def load_segments(self, gpkg_file):
        """Load and validate the road segments data"""
        df = gpd.read_file(gpkg_file)
        if df.crs is None:
            print("Warning: Input data has no CRS specified. Assuming UTM.")
            df.set_crs(epsg=32619, inplace=True)
        df = df.to_crs(epsg=4326)
        print(f"Loaded {len(df)} segment-year combinations")
        return df

    def create_buffers(self, geometry, sizes):
        """Create buffers in UTM projection for accurate distances"""
        geom_utm = gpd.GeoSeries([geometry], crs=4326).to_crs(32619)
        buffers = {}
        for size in sizes:
            buffer_utm = geom_utm.buffer(size)
            buffer_wgs84 = buffer_utm.to_crs(4326)
            buffers[f'{size}m'] = buffer_wgs84[0]
        return buffers

    def get_landsat_collection(self, year):
        """Retrieve Landsat image collection for fall season"""
        start_date = f'{year}-09-01'
        end_date = f'{year}-10-30'

        if year < 2012:  # Landsat 5
            collection = ee.ImageCollection('LANDSAT/LT05/C02/T1_L2')
        elif year < 2014:  # Landsat 7
            collection = ee.ImageCollection('LANDSAT/LE07/C02/T1_L2')
        else:  # Landsat 8
            collection = ee.ImageCollection('LANDSAT/LC08/C02/T1_L2')

        # Filter by date and cloud cover
        collection = collection.filterDate(start_date, end_date).filter(ee.Filter.lt('CLOUD_COVER', self.cloud_cover_threshold))
        return collection

    def process_segment(self, segment_row):
        """Process a single segment"""
        try:
            segment_base_dir = os.path.join(
                self.output_dir,
                f"First 6000 segments ({self.segment_type} aggregation)",
                f"segment_{segment_row[f'{self.segment_type}_segment_id']}_{segment_row['Year']}"
            )
            os.makedirs(segment_base_dir, exist_ok=True)

            # Create buffers for the segment
            buffers = self.create_buffers(segment_row.geometry, self.buffer_sizes)

            for buffer_size in self.buffer_sizes:
                buffer_dir = os.path.join(segment_base_dir, f"{buffer_size}m")
                os.makedirs(buffer_dir, exist_ok=True)

                buffer_geom = buffers[f'{buffer_size}m']
                ee_geometry = ee.Geometry.Polygon(
                    [[[x, y] for x, y in buffer_geom.exterior.coords]]
                )

                # Get Landsat collection
                collection = self.get_landsat_collection(segment_row['Year'])
                image = collection.filterBounds(ee_geometry).sort('CLOUD_COVER').first()

                if image:
                    # Apply sensor-specific scaling
                    if segment_row['Year'] >= 2014:  # Landsat 8
                        scaled_image = image.select(['SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B6', 'SR_B7']).multiply(0.0000275).add(-0.2)
                    else:  # Landsat 5/7
                        scaled_image = image.select(['SR_B1', 'SR_B2', 'SR_B3', 'SR_B4', 'SR_B5', 'SR_B7']).multiply(0.0001)

                    output_filename = "landsat.tif"
                    geemap.ee_export_image(
                        scaled_image,
                        filename=os.path.join(buffer_dir, output_filename),
                        scale=10,
                        region=ee_geometry,
                        crs='EPSG:4326'
                    )
                    print(f"Downloaded image for segment {segment_row[f'{self.segment_type}_segment_id']}, {buffer_size}m buffer")

        except Exception as e:
            print(f"Error processing segment {segment_row[f'{self.segment_type}_segment_id']}: {str(e)}")

    def run_collection(self, gpkg_file):
        """Run the collection process with parallel execution"""
        segments = self.load_segments(gpkg_file)

        with ThreadPoolExecutor(max_workers=cpu_count()) as executor:
            list(executor.map(self.process_segment, [row for _, row in segments.iterrows()]))


In [3]:
# Execute Imagery Collection

def run_imagery_collection():
    # Initialize Earth Engine
    try:
        ee.Initialize()
    except Exception as e:
        print("Please run ee.Authenticate() first if you haven't already")
        raise e

    # Base configuration
    base_dir = "C:\\Users\\gmoor\\Documents\\REVISED CAPSTONE PROJECT\\Data"

    # Configuration for 100m segments
    config_100m = {
        'gpkg_file': os.path.join(base_dir, "first200_aggregate_segments (100meters).gpkg"),
        'output_base_dir': base_dir,
        'segment_type': '100m',
        'buffer_sizes': [100, 200, 300]
    }

    # Create collector instance for 100m segments
    collector_100m = ImageryCollector(
        base_path=None,
        output_dir=config_100m['output_base_dir'],
        segment_type=config_100m['segment_type'],
        buffer_sizes=config_100m['buffer_sizes']
    )

    # Load segments and process them
    segments_100m = collector_100m.load_segments(config_100m['gpkg_file'])
    print(f"\nProcessing {len(segments_100m)} segments for 100m configuration...")
    
    # Process each segment
    for _, segment in segments_100m.iterrows():
        collector_100m.process_segment(segment)

    # Configuration for 200m segments
    config_200m = {
        'gpkg_file': os.path.join(base_dir, "first200_aggregate_segments (200meters).gpkg"),
        'output_base_dir': base_dir,
        'segment_type': '200m',
        'buffer_sizes': [200, 300, 400]
    }

    # Create collector instance for 200m segments
    collector_200m = ImageryCollector(
        base_path=None,
        output_dir=config_200m['output_base_dir'],
        segment_type=config_200m['segment_type'],
        buffer_sizes=config_200m['buffer_sizes']
    )

    # Load segments and process them
    segments_200m = collector_200m.load_segments(config_200m['gpkg_file'])
    print(f"\nProcessing {len(segments_200m)} segments for 200m configuration...")
    
    # Process each segment
    for _, segment in segments_200m.iterrows():
        collector_200m.process_segment(segment)

if __name__ == "__main__":
    run_imagery_collection()

Loaded 736 segment-year combinations

Processing 736 segments for 100m configuration...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/149715335715/thumbnails/bbabb411fd41eef987165becf8f92421-d50ff9331f4323ed64b26b5976cf4941:getPixels
Please wait ...
Data downloaded to C:\Users\gmoor\Documents\REVISED CAPSTONE PROJECT\Data\First 6000 segments (100m aggregation)\segment_1_2007\100m\landsat.tif
Downloaded image for segment 1, 100m buffer
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/149715335715/thumbnails/8bb105f92a0734a33342ccd6c6c2f06a-958123cbd1f387585d3af6ee62396d67:getPixels
Please wait ...
Data downloaded to C:\Users\gmoor\Documents\REVISED CAPSTONE PROJECT\Data\First 6000 segments (100m aggregation)\segment_1_2007\200m\landsat.tif
Downloaded image for segment 1, 200m buffer
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/149715335715/thumbnails/86e5f8dce63d72