This script uses the package "geemap" to download the satellite images for the grids we created. More specifically, Sentinel2 images for 2019 are being downloaded.

We will only download a part of the images as I put the whole set of images in a zip file so you can use it from there for the next session.

Install geemap package. All other packages that are necessary are already installed and will be loaded below.

In [None]:
!pip install geemap

Import packages needed to download the satellite images through Google Earth Engine.

In [None]:
import ee
import os
import pandas as pd
import time
import geemap
from multiprocessing import Pool
from google.colab import drive

Connect to Google Earth Engine.

In [None]:
ee.Authenticate()
ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com', project="ee-tballatore")

Also connect to Google Drive.

In [None]:
drive.mount('/content/gdrive', force_remount=True)

Download Sentinel2 satellite imagery for 2019 for East Java.

More information on the Sentinel2 dataset can be found [here](https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S2_SR_HARMONIZED).

In [None]:
# change to root dir
root_dir = '/content/gdrive/MyDrive/adb_poverty_elearning_module/data'
indonesia_sentinel_dir =  root_dir + "/satellite_imagery/idn_east_java_s2_2019"

# Key variables
filter_date_begin = '2019-01-01'
filter_date_end = '2019-12-31'

# control + c to copy it, control + v to paste it!

# Set the resolution (in meters per pixel) and the image size (256x256).
resolution = 10
image_size = 256
# Also add a buffer of 5 pixel in each direction to be on the safe side.
buffer = 10
# Set the cloud percentage to 60%. See more information below.
cloud_percentage = 60
# Specify the name of the satellite dataset.
satellite_dataset = "COPERNICUS/S2_SR_HARMONIZED"

# Calculate the buffer from the centroid of each grid.
image_buffer = (resolution * (image_size + buffer)) / 2

Read the grids from the csv we created earlier.

In [None]:
grid_csv = root_dir + "/grids/indonesia_grid.csv"

grid_df = pd.read_csv(grid_csv)

# Create a six digit ID string (with leading zeroes) to make matching images and grids easier later on.
grid_df["id_string"] = grid_df.apply(lambda x: f"{{:06d}}".format(x["id"]), axis=1)

# Convert the data frame to a list so we can easily loop through the entries.
grid_list = grid_df.values.tolist()

Visualize first ten observations of our data frame.

In [None]:
grid_df.head(10)

Function to mask clouds for Sentinel2 imagery.

More information can be found [here](https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S2_SR_HARMONIZED#colab-python).

In [None]:
def mask_s2_clouds(image):
  """Masks clouds in a Sentinel-2 image using the QA band.

  Args:
      image (ee.Image): A Sentinel-2 image.

  Returns:
      ee.Image: A cloud-masked Sentinel-2 image.
  """
  qa = image.select('QA60')

  # Bits 10 and 11 are clouds and cirrus, respectively.
  cloud_bit_mask = 1 << 10
  cirrus_bit_mask = 1 << 11

  # Both flags should be set to zero, indicating clear conditions.
  mask = (
      qa.bitwiseAnd(cloud_bit_mask)
      .eq(0)
      .And(qa.bitwiseAnd(cirrus_bit_mask).eq(0))
  )

  return image.updateMask(mask).divide(10000)

Define a function to create a bounding box from points.

In [None]:
def bounding_box(points):
    x_coordinates, y_coordinates = zip(*points)

    return [(min(x_coordinates), min(y_coordinates)), (max(x_coordinates), max(y_coordinates))]

Now change the directory to our image directory.

In [None]:
os.chdir(indonesia_sentinel_dir)

Define the function to download the Sentinel2 images and the call it with "pool" (so multiple images are downloaded in parallel) to speed up the process.

In [None]:
def download_sentinel_images(current_grid):
    # Specify the dimension of the image.
    param_dimensions = 256 + 10
    # Specify the format of the image. We want a jpg.
    param_format = "jpg"

    # current_grid is an element of the list grid_list. Extract the different parts that we need.
    # The first element is the grid ID.
    current_grid_id = current_grid[0]
    # The second element is the longitude coordinate.
    c_lon = current_grid[1]
    # Third element is the latitude coordinate.
    c_lat = current_grid[2]
    # Fourth element is the six digit ID (plus ".jpg" gives the file name).
    file_name = current_grid[4]

    # Create a buffer around our centroid.
    geometry = ee.Geometry.Point([c_lon, c_lat]).buffer(image_buffer)
    # Extract minimum and maximum values for x and y.
    geometry = geometry.getInfo()['coordinates'][0]
    # Create a bounding box based on the extraced values.
    geometry_bb = bounding_box(geometry)

    # Use BBox() to create a rectangle based on the bounding box.
    bBox = ee.Geometry.BBox(geometry_bb[0][0], geometry_bb[1][1], geometry_bb[1][0], geometry_bb[0][1])

    # And lastly get the bounds of that object again (which is now "compatible" with the Earth Engine filter).
    bBoxBounds = bBox.bounds()

    # Specify the dataset.
    current_dataset = ee.ImageCollection(satellite_dataset).filterDate(filter_date_begin, filter_date_end).filterBounds(bBoxBounds).filter(ee.Filter.lt('CLOUDY_PIXEL_PERCENTAGE', cloud_percentage)).map(mask_s2_clouds)

    # Specifiy the visualization parameters (similar to the GEE code we used earlier).
    vis_params = {'min': 0.0, 'max': 0.3, 'bands': ['B4', 'B3', 'B2'],}

    # Check whether there are any images with the specified parameters (dataset, time frame, cloud cover)?
    if current_dataset.size().getInfo() > 0:
        # If the answer is yes, crop the image to our bounding box.
        image = current_dataset.median().clip(bBoxBounds)
        # File name is the six digit ID with .jpg at the end.
        file_name = file_name + "." + param_format

        # Download the image.
        tsk = geemap.get_image_thumbnail(image, file_name, vis_params, dimensions = param_dimensions, region = geometry, format=param_format)


if __name__ == '__main__':
    # Save the starting time.
    t0 = time.time()

    # Do ten parallel processes.
    pool = Pool(10)
    # Only download the first 100 images. Should take about a minute.
    pool.map(download_sentinel_images, grid_list[0:100])

    # Calculate and print the time it took to download the images.
    print("Done after {} minutes".format((time.time() - t0)/60))

    pool.close()
    pool.terminate()