In [2]:
#1) Import all necessary packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime
import ee
import geemap
import geopandas as gpd



In [3]:
import os
import sys

# Get the current working directory
current_dir = os.path.abspath('')

# Search for the 'constants.py' file starting from the current directory and moving up the hierarchy
project_root = current_dir
while not os.path.isfile(os.path.join(project_root, 'constants.py')):
    project_root = os.path.dirname(project_root)

# Add the project root to the Python path
sys.path.append(project_root)




In [4]:
# Import SHAPEFILE_PATH from constants
from constants import SHAPEFILE_PATH, SRC_PATH, PROJECT_PATH

# Import Hansen Dataset and Study Boundary

https://developers.google.com/earth-engine/datasets/catalog/UMD_hansen_global_forest_change_2021_v1_9

Hansen, M. C., P. V. Potapov, R. Moore, M. Hancher, S. A. Turubanova, A. Tyukavina, D. Thau, S. V. Stehman, S. J. Goetz, T. R. Loveland, A. Kommareddy, A. Egorov, L. Chini, C. O. Justice, and J. R. G. Townshend. 2013. "High-Resolution Global Maps of 21st-Century Forest Cover Change." Science 342 (15 November): 850-53. 10.1126/science.1244693 Data available on-line at: https://glad.earthengine.app/view/global-forest-change.

In [5]:
#ee.Authenticate()
ee.Initialize()

In [6]:
# load Hansen Global Forest Change v1.9 
imagery = ee.Image("UMD/hansen/global_forest_change_2021_v1_9")


In [7]:
imagery.getInfo()


{'type': 'Image',
 'bands': [{'id': 'treecover2000',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min': 0,
    'max': 255},
   'dimensions': [1440000, 560000],
   'crs': 'EPSG:4326',
   'crs_transform': [0.00025, 0, -180, 0, -0.00025, 80]},
  {'id': 'loss',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min': 0,
    'max': 255},
   'dimensions': [1440000, 560000],
   'crs': 'EPSG:4326',
   'crs_transform': [0.00025, 0, -180, 0, -0.00025, 80]},
  {'id': 'gain',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min': 0,
    'max': 255},
   'dimensions': [1440000, 560000],
   'crs': 'EPSG:4326',
   'crs_transform': [0.00025, 0, -180, 0, -0.00025, 80]},
  {'id': 'lossyear',
   'data_type': {'type': 'PixelType',
    'precision': 'int',
    'min': 0,
    'max': 255},
   'dimensions': [1440000, 560000],
   'crs': 'EPSG:4326',
   'crs_transform': [0.00025, 0, -180, 0, -0.00025, 80]},
  {'id': 'first_b30',
   'data_type': {'type': 'Pixe

In [8]:
Map = geemap.Map()

Map

Map(center=[20, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(children=(Togg…

In [9]:
Map.addLayer(imagery,{}, 'Hansen')

In [9]:
shapefile_path = SHAPEFILE_PATH
study_boundary = gpd.read_file(shapefile_path)


In [10]:
ee_boundary = geemap.geopandas_to_ee(study_boundary)
ee_boundary


In [13]:
clipped_hansen_dataset = imagery.clip(ee_boundary)


In [14]:
Map = geemap.Map()
Map.centerObject(ee_boundary, zoom=10)
Map.addLayer(clipped_hansen_dataset, {'bands': ['lossyear'], 'palette': ['000000', '00FF00'], 'max': 100}, 'Hansen Dataset Clipped')
Map


Map(center=[-21.999008038350034, -59.795791441565456], controls=(WidgetControl(options=['position', 'transpare…

In [16]:
num_bands = len(clipped_hansen_dataset.bandNames().getInfo())
print(f"Number of bands: {num_bands}")
# Get the band names from the clipped raster
band_names = clipped_hansen_dataset.bandNames().getInfo()

# Print the band names
print("Band names:")
for i, band in enumerate(band_names, 1):
    print(f"{i}. {band}")


Number of bands: 13
Band names:
1. treecover2000
2. loss
3. gain
4. lossyear
5. first_b30
6. first_b40
7. first_b50
8. first_b70
9. last_b30
10. last_b40
11. last_b50
12. last_b70
13. datamask


**Dataset Details**
This global dataset is divided into 10x10 degree tiles, consisting of seven files per tile. All files contain unsigned 8-bit values and have a spatial resolution of 1 arc-second per pixel, or approximately 30 meters per pixel at the equator.

**Tree canopy cover for year 2000 (treecover2000)**
Tree cover in the year 2000, defined as canopy closure for all vegetation taller than 5m in height. Encoded as a percentage per output grid cell, in the range 0–100.

**Global forest cover gain 2000–2012 (gain)**
Forest gain during the period 2000–2012, defined as the inverse of loss, or a non-forest to forest change entirely within the study period. Encoded as either 1 (gain) or 0 (no gain).

**Year of gross forest cover loss event (lossyear)**
Forest loss during the period 2000–2020, defined as a stand-replacement disturbance, or a change from a forest to non-forest state. Encoded as either 0 (no loss) or else a value in the range 1–20, representing loss detected primarily in the year 2001–2020, respectively.


**Data mask (datamask)**
Three values representing areas of no data (0), mapped land surface (1), and permanent water bodies (2).


**Circa year 2000 Landsat 7 cloud-free image composite (first)**
Reference multispectral imagery from the first available year, typically 2000. If no cloud-free observations were available for year 2000, imagery was taken from the closest year with cloud-free data, within the range 1999–2012.


**Circa year 2020 Landsat cloud-free image composite (last)**
Reference multispectral imagery from the last available year, typically 2020. If no cloud-free observations were available for year 2020, imagery was taken from the closest year with cloud-free data.


Reference composite imagery are median observations from a set of quality assessed growing season observations in four spectral bands, specifically Landsat bands 3, 4, 5, and 7. Normalized top-of-atmosphere (TOA) reflectance values (ρ) have been scaled to an 8-bit data range using a scale factor (g):

DN = ρ · g + 1
The g factor was chosen independently for each band to preserve the band-specific dynamic range, as shown in the following table:

Landsat Band	       | g
-----------------------|----
Red (0.66 micrometers) | 508
NIR (0.86 micrometers) | 254
SWIR1 (1.6 micrometers)| 363
SWIR2 (2.2 micrometers)| 423



In [17]:
export_params = {
    'scale': 30, # Resolution in meters
    'region': ee_boundary, # Export only the region of interest
    'crs': 'EPSG:4326', # Coordinate reference system (optional)
    'fileFormat': 'GeoTIFF', # Export format (GeoTIFF or other supported formats)
    'fileNamePrefix': 'clipped_hansen_dataset', # Prefix for the exported file name
}
# Get the geometry and scale (resolution) of the clipped raster
geometry = ee_boundary.geometry()
scale = export_params['scale']

# Compute the pixel dimensions of the exported raster
dimensions = ee.Image.pixelLonLat().reproject(geometry.projection().atScale(scale)).reduceRegion(
    reducer=ee.Reducer.minMax(),
    geometry=geometry,
    scale=scale,
    maxPixels=1e10
).getInfo()

pixel_width = dimensions['longitude_max'] - dimensions['longitude_min']
pixel_height = dimensions['latitude_max'] - dimensions['latitude_min']

# Estimate the number of pixels
num_pixels = pixel_width * pixel_height
print(f"Number of pixels: {num_pixels}")

# Estimate the file size in bytes (assuming 4 bytes per pixel per band for GeoTIFF format)``
file_size_bytes = num_pixels * num_bands * 4
print(f"Estimated file size (bytes): {file_size_bytes}")

# Convert the file size to a more human-readable format
#file_size_megabytes = file_size_bytes / (1024 * 1024)
#print(f"Estimated file size (MB): {file_size_megabytes:.2f}")




Number of pixels: 33.31658415137021
Estimated file size (bytes): 1732.4623758712507


# Export Deforestation Files

You can create individual files for deforestation between 2011 and 2021 using the Hansen dataset. In the dataset, the 'loss' band contains the binary forest loss information. The 'lossyear' band contains the year of the forest loss event. You can filter the loss events for each year from 2011 to 2021 and export them as separate files.

In [21]:
geemap.ee_export_image(clipped_hansen_dataset.select('lossyear'),
 filename='lossyear.tif',
  scale=scale, region=ee_boundary.geometry(),
   file_per_band=False,
   unmask_value = -1 )

Generating URL ...
An error occurred while downloading.
Total request size (1376518752 bytes) must be less than or equal to 50331648 bytes.


In [None]:
# Extract the first feature from the FeatureCollection
ee_boundary_feature = ee_boundary.first()

# Get the geometry of the feature
ee_boundary_geometry = ee_boundary_feature.geometry()

# Get the coordinates of the geometry
ee_boundary_coordinates = ee_boundary_geometry.coordinates().getInfo()

# Update the export_params dictionary
export_params = {
    'scale': 30,
    'region': ee_boundary_coordinates,
    'crs': 'EPSG:4326',
    'fileFormat': 'GeoTIFF',
    'fileNamePrefix': 'clipped_hansen_lossyear',
}

# Your existing export task code
export_task = ee.batch.Export.image.toDrive(
    image=clipped_hansen_dataset.select('lossyear'),
    description='lossyear',
    folder='hansen_data',
    maxPixels=1e10,
    **export_params
)

export_task.start()


In [28]:
export_params = {
    'scale': 30, # Resolution in meters
    'region': ee_boundary, # Export only the region of interest
    'crs': 'EPSG:4326', # Coordinate reference system (optional)
    'fileFormat': 'GeoTIFF', # Export format (GeoTIFF or other supported formats)
    'fileNamePrefix': 'clipped_hansen_dataset', # Prefix for the exported file name
}

export_task = ee.batch.Export.image.toDrive(
    image=clipped_hansen_dataset.select('treecover2000'),
    description='treecover2000',
    folder='hansen_data',  # Optional: specify a folder in your Google Drive
    maxPixels=1e10,
    **export_params
)
# Start the export task
export_task.start()


In [29]:
# Save to local
def export_deforestation_year(year):
    # Filter the deforestation events for the given year
    deforestation_year = clipped_hansen_dataset.select('treecover2000').eq(year)
    
    # Set up the export task
    export_params_year = export_params.copy()
    export_params_year['fileNamePrefix'] = f'deforestation_{year}'
    
    # Define the output directory and file path
    output_dir = os.path.join(PROJECT_PATH, 'src', 'data_loading', 'hansen_data')
    output_file = os.path.join(output_dir, f'deforestation_{year}.tif')

    geemap.ee_export_image(deforestation_year, filename=output_file, scale=scale, region=ee_boundary.geometry(), file_per_band=False)

    

    
    print(f"Exporting deforestation data for {year}...")

# limitation of file size causes download to fail,
# likely will work if study area is smaller


The limitation you encountered is related to the geemap.ee_export_image() function, which is part of the geemap library. This function tries to download the image directly to your local machine, but it has a size limitation.

Google Earth Engine itself does not have this limitation when exporting images. When you use Earth Engine's native export functions, like ee.batch.Export.image.toDrive() or ee.batch.Export.image.toCloudStorage(), you can export much larger datasets without issues. These methods handle large datasets efficiently by splitting the export task into smaller chunks and processing them in parallel. However, downloading images directly to your local machine or loading them in-browser is subject to size limitations, mostly because of memory and network constraints.

In summary, the limitation is not inherent to Earth Engine itself but rather to the specific export method you were using in the geemap library. Using the native Earth Engine export functions should resolve the issue.

In [36]:
def export_deforestation_year(year):
    # Filter the deforestation events for the given year
    deforestation_year = clipped_hansen_dataset.select('lossyear').eq(year)
    
    # Set up the export task
    export_params_year = export_params.copy()
    export_params_year['fileNamePrefix'] = f'deforestation_{year}'
    

    #Since study_boundary is a GeoDataFrame, we need to access the geometry using 
    # the iloc method to get the first feature in the GeoDataFrame
    
    # Convert the study area boundary to a list of coordinates
    study_area_coordinates =  study_boundary.geometry.iloc[0].__geo_interface__['coordinates']

    # Create a polygon from the list of coordinates
    polygon = ee.Geometry.Polygon(study_area_coordinates)

    # Clip the deforestation_year image to the study area polygon
    clipped_deforestation_year = deforestation_year.clip(polygon)

    export_task = ee.batch.Export.image.toDrive(
        image=clipped_deforestation_year,
        description=f'Export_deforestation_{year}',
        folder='hansen_data', # Optional: specify a folder in your Google Drive
        **export_params_year
    )

    # Start the export task
    export_task.start()
    
    print(f"Exporting deforestation data for {year}...")


A new empty folder will appear in the google drive associated with Earth Engine account.
The export process takes a considerable amount of time, progress can be monitored at
https://code.earthengine.google.com/tasks

In [37]:
for year in range(2011, 2022):
    export_deforestation_year(year)


Exporting deforestation data for 2011...
Exporting deforestation data for 2012...
Exporting deforestation data for 2013...
Exporting deforestation data for 2014...
Exporting deforestation data for 2015...
Exporting deforestation data for 2016...
Exporting deforestation data for 2017...
Exporting deforestation data for 2018...
Exporting deforestation data for 2019...
Exporting deforestation data for 2020...
Exporting deforestation data for 2021...
