In [34]:
import ee
import zipfile
import geopandas as gpd
import ee
import pandas as pd

ee.Initialize()
ee.Authenticate

<function ee.Authenticate(authorization_code: Optional[str] = None, quiet: Optional[bool] = None, code_verifier: Optional[str] = None, auth_mode: Optional[str] = None, scopes: Optional[Sequence[str]] = None, force: bool = False) -> Optional[bool]>

In [35]:
shapefile_path = "C:/Users/pc/My Drive/2025/Uni/TCC/Shapes/contorno_area_total/contorno_area_total.shp"

# Check if the path is a .zip file
if shapefile_path.endswith('.zip'):
    # Try to read shapefile from a zip archive
    try:
        # Check if the .zip file exists and open it
        with zipfile.ZipFile(shapefile_path, 'r') as zip_ref:
            zip_ref.printdir()  # Optional: Print contents of the zip to debug
            # Try to find the .shp file inside the zip
            shapefile_found = False
            for file in zip_ref.namelist():
                if file.endswith('.shp'):
                    shapefile_found = True
                    shapefile_within_zip = file
                    break

            if shapefile_found:
                # Read shapefile directly from the zip file
                oi = gpd.read_file(f'zip://{shapefile_path}/{shapefile_within_zip}')
                print(f"Successfully loaded shapefile from {shapefile_path}.")
            else:
                print("No .shp file found inside the zip archive.")
                #
    except Exception as e:
        print(f"Error reading shapefile from zip archive: {e}")
        #
else:
    # If not a .zip, assume it is a regular shapefile
    try:
        # Read the shapefile normally
        aoi = gpd.read_file(shapefile_path)
        print(f"Successfully loaded shapefile from {shapefile_path}.")
    except Exception as e:
        print(f"Error reading shapefile: {e}")


# After loading, check if the GeoDataFrame is not empty
if not aoi.empty:
    # If the GeoDataFrame contains multiple geometries, dissolve them into one
    if len(aoi) > 1:
        aoi = aoi.dissolve()

    # Extract the first geometry from the dissolved GeoDataFrame
    geometry = aoi.geometry.iloc[0]

    # Check if the geometry is a Polygon or MultiPolygon
    if geometry.geom_type in ['Polygon', 'MultiPolygon']:
        # Convert the geometry to GeoJSON format
        geojson = geometry.__geo_interface__

        # Remove the third dimension from the coordinates if it exists
        if geojson['type'] == 'Polygon':
            geojson['coordinates'] = [list(map(lambda coord: coord[:2], ring)) for ring in geojson['coordinates']]
        elif geojson['type'] == 'MultiPolygon':
            geojson['coordinates'] = [[list(map(lambda coord: coord[:2], ring)) for ring in polygon] for polygon in geojson['coordinates']]

        # Create an Earth Engine geometry object from the GeoJSON coordinates
        ee_geometry = ee.Geometry(geojson)

        # Convert the Earth Engine geometry to a Feature
        feature = ee.Feature(ee_geometry)

        # Create a FeatureCollection with the feature
        aoi = ee.FeatureCollection([feature])

        print("AOI defined successfully.")

        # check_next_button()
    else:
        
        print("The geometry is not a valid type (Polygon or MultiPolygon).")
else:
    print("The shapefile does not contain any geometries.")        #

Successfully loaded shapefile from C:/Users/pc/My Drive/2025/Uni/TCC/Shapes/contorno_area_total/contorno_area_total.shp.
AOI defined successfully.


In [36]:
# Define the start and end dates for filtering the image collection

import pandas as pd
from datetime import datetime
inicio = '2024-01-11'
final = '2025-01-11'
nuvem = 40

# Load the Sentinel-2 image collection and filter by date, location, and cloud coverage
sentinel2 = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED') \
    .filterDate(inicio, final) \
    .filterBounds(aoi) \
    .filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', nuvem)) \
    .map(lambda image: image.set('date', image.date().format('YYYY-MM-dd')))

# Get the number of images in the collection
count = sentinel2.size().getInfo()
print(f"Number of images in collection: {count}")

Number of images in collection: 175


In [40]:
scl_classes_behavior = {
    0: True,  # No data
    1: True,  # Saturated/defective
    2: False,   # Dark features
    3: True,  # Cloud shadows
    4: False,   # Vegetation
    5: False,   # Bare soils
    6: False,   # Water
    7: True,   # Cloud low probability
    8: True,  # Cloud medium probability
    9: True,  # Cloud high probability
    10: True, # Thin cirrus
    11: False  # Snow or ice
}

def filter_within_AOI(sentinel2):
    def mask_cloud_and_shadows(image):
        scl = image.select('SCL')
        # Start with an all-inclusive mask
        mask = ee.Image.constant(1)
        # Apply exclusions
        for class_value, include in scl_classes_behavior.items():
            if include:
                mask = mask.And(scl.neq(class_value))
        
        masked_image = image.updateMask(mask)
        
        # Calculate the percentage of valid pixels
        total_pixels = image.select(0).reduceRegion(
            reducer=ee.Reducer.count(),
            geometry=aoi,
            scale=10
        ).get('B1')
        
        valid_pixels = masked_image.select(0).reduceRegion(
            reducer=ee.Reducer.count(),
            geometry=aoi,
            scale=10
        ).get('B1')
        
        percentage_valid = ee.Number(valid_pixels).divide(total_pixels).multiply(100)
        
        # Add the percentage of valid pixels as a property
        return masked_image.set('percentage_valid_pixels', percentage_valid)

        # Apply the cloud and shadow mask function to the image collection
    sentinel2_masked =  sentinel2.map(mask_cloud_and_shadows)

    # Define the valid pixel threshold
    valid_pixel_threshold = 90

    # Filter the collection based on the valid pixel threshold
    filtered_collection = sentinel2_masked.filter(ee.Filter.gte('percentage_valid_pixels', valid_pixel_threshold))

    # Get the number of images in the filtered collection
    filtered_count = filtered_collection.size().getInfo()

    masked_timestamps = filtered_collection.aggregate_array('system:time_start').getInfo()

    return sentinel2.filter(
        ee.Filter.inList('system:time_start', ee.List(masked_timestamps))
    )

sentinel2_filtered = filter_within_AOI(sentinel2)

# Get the number of images in the filtered collection
filtered_count = sentinel2_filtered.size().getInfo()
print(f"Number of images in filtered collection: {filtered_count}") 



Number of images in filtered collection: 154
