# Download Training Data

The goal of this notebook is to download the training data from the Sentinel 2 satellite, based on manually labeled data points, saving them locally in a dedicated directory for usage in the training of the model

In [3]:
from sentinelhub import SHConfig, BBox, CRS
import datetime
from dateutil.relativedelta import relativedelta
from sentinelhub import (
    CRS,
    SentinelHubCatalog,
    filter_times,
    BBox,
    DataCollection,
    MimeType,
    SentinelHubDownloadClient,
    SentinelHubRequest
)
import json
import math
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


Secrets used for accessing Sentinel satellite data

In [9]:
config = SHConfig()
config.sh_client_id = "44df92bf-6c45-4917-b365-cbddc4eef953"
config.sh_client_secret = "UtChFP9QyBYMgspXOQhBdMLUwGM0Y4rB"

Define the request that we will make to the Sentinel Hub

In [10]:
forest_stress = """
// Detects anomalies in the moisture of the biomass
const moistureRamps = [
    [-0.8, 0x800000],
    [-0.24, 0xff0000],
    [-0.032, 0xffff00],
    [0.032, 0x00ffff],
    [0.24, 0x0000ff],
    [0.8, 0x000080]
  ];

function setup() {
  return {
    input: ["B8A", "B11", "SCL", "dataMask", "B03", "B08"],
    output: [
      { id: "default", bands: 1 },
      { id: "index", bands: 1, sampleType: "FLOAT32" },
      { id: "eobrowserStats", bands: 2, sampleType: "FLOAT32" },
      { id: "dataMask", bands: 1 },
    ],
  };
}

function evaluatePixel(samples) {
  let moisture = index(samples.B8A, samples.B11);
  let NVDI_index = index(samples.B03, samples.B08);

  // if it's not biomass mark it as non relevant by manipulating the moisture value
  if (NVDI_index > -0.8){
    moisture = 0.8
  }


  // The library for tiffs works well only if there is only one channel returned.
  // So we encode the "no data" as NaN here and ignore NaNs on frontend.
  const indexVal = samples.dataMask === 1 ? moisture : NaN;
  return {
    default: [moisture],
    index: [indexVal],
    eobrowserStats: [moisture, isCloud(samples.SCL) ? 1 : 0],
    dataMask: [samples.dataMask],
  };
}

function isCloud(scl) {
  if (scl == 3) {
    // SC_CLOUD_SHADOW
    return false;
  } else if (scl == 9) {
    // SC_CLOUD_HIGH_PROBA
    return true;
  } else if (scl == 8) {
    // SC_CLOUD_MEDIUM_PROBA
    return true;
  } else if (scl == 7) {
    // SC_CLOUD_LOW_PROBA
    return false;
  } else if (scl == 10) {
    // SC_THIN_CIRRUS
    return true;
  } else if (scl == 11) {
    // SC_SNOW_ICE
    return false;
  } else if (scl == 1) {
    // SC_SATURATED_DEFECTIVE
    return false;
  } else if (scl == 2) {
    // SC_DARK_FEATURE_SHADOW
    return false;
  }
  return false;
}

"""

For each data point, we will go back by 6 monhts.

In [11]:
today = datetime.date.today()

six_months_ago = today - relativedelta(months=6)

time_interval = six_months_ago, today

catalog = SentinelHubCatalog(config=config)

size = (100, 100)

Define the function that, given a list of centers of bounding boxes, returns the concrete bounding boxes

In [12]:
def get_bounding_box_from_center(bounding_boxes_left_corner, bounding_box_side_size_in_metres=100):
      # Earth radius in meters
    R = 6378137.0

    half_side = bounding_box_side_size_in_metres / 2

    lat_offset = half_side / R * (180 / math.pi)

    lon_offset = half_side / (R * math.cos(math.radians(bounding_boxes_left_corner[1]))) * (180 / math.pi)

    top_left = [bounding_boxes_left_corner[1] + lat_offset, bounding_boxes_left_corner[0] - lon_offset]
    bottom_right = [bounding_boxes_left_corner[1] - lat_offset, bounding_boxes_left_corner[0] + lon_offset]


    return BBox(bbox=top_left + bottom_right, crs=CRS.WGS84)

Define function that, given a file path, saves the corresponding data on disk

In [13]:
def get_training_data_for_class(filename, class_name):
    # Open the file
    with open(filename, "r") as training_file:
        polygons = training_file.readlines()

    # Read all the polygons as JSON and extract just the first point

    class_data = []

    for polygon in polygons:
        bounding_box_center = json.loads(polygon)["coordinates"][0][0]

        bounding_box = get_bounding_box_from_center(bounding_box_center)

        search_iterator = catalog.search(
            DataCollection.SENTINEL2_L2A,
            bbox=bounding_box,
            time=time_interval,
            filter="eo:cloud_cover < 30",
            fields={"include": ["id", "properties.datetime"], "exclude": []},
        )

        all_timestamps = search_iterator.get_timestamps()

        time_difference = datetime.timedelta(hours=1)

        unique_acquisitions = filter_times(all_timestamps, time_difference)

        process_requests = []

        for timestamp in unique_acquisitions:
            request = SentinelHubRequest(
                evalscript=forest_stress,
                input_data=[
                    SentinelHubRequest.input_data(
                        data_collection=DataCollection.SENTINEL2_L2A,
                        time_interval=(timestamp - time_difference, timestamp + time_difference),
                    )
                ],
                responses=[SentinelHubRequest.output_response("default", MimeType.PNG)],
                bbox=bounding_box,
                size=size,
                config=config,
            )
            process_requests.append(request)


        client = SentinelHubDownloadClient(config=config)

        download_requests = [request.download_list[0] for request in process_requests]

        data = np.array(client.download(download_requests))
        print(data.shape)
        if data.shape[0] < 31:
            continue

        data = data[-31:, :, :]

        if data.shape != (31, 100, 100):
            continue

        class_data.append(data)
        
    class_data = np.array(class_data)
    
    np.save(f"dataset/{class_name}", class_data)

Read the images related with the points having not the beetle.

In [14]:
get_training_data_for_class("dataset/bark_beetle.txt", "bark_beetle")

(34, 100, 100)
(34, 100, 100)
(34, 100, 100)
(34, 100, 100)
(34, 100, 100)
(34, 100, 100)
(34, 100, 100)
(34, 100, 100)
(34, 100, 100)
(34, 100, 100)
(34, 100, 100)
(34, 100, 100)
(34, 100, 100)
(34, 100, 100)
(34, 100, 100)




(34, 100, 100)




(34, 100, 100)




(34, 100, 100)




(34, 100, 100)




(34, 100, 100)




(34, 100, 100)




(34, 100, 100)




(34, 100, 100)




(34, 100, 100)




(34, 100, 100)




(34, 100, 100)




(34, 100, 100)




(34, 100, 100)




(34, 100, 100)




(62, 100, 100)




(62, 100, 100)




(34, 100, 100)




(34, 100, 100)




(34, 100, 100)




(37, 100, 100)




(37, 100, 100)




(37, 100, 100)




(37, 100, 100)




(37, 100, 100)




(34, 100, 100)




(34, 100, 100)




(37, 100, 100)




(37, 100, 100)




(37, 100, 100)




(34, 100, 100)




(34, 100, 100)




(34, 100, 100)




(34, 100, 100)




(34, 100, 100)




(34, 100, 100)




(31, 100, 100)


In [15]:
get_training_data_for_class("dataset/no_bark_beetle.txt", "no_bark_beetle")

(29, 100, 100)
(28, 100, 100)
(62, 100, 100)
(62, 100, 100)
(62, 100, 100)
(62, 100, 100)
(34, 100, 100)
(35, 100, 100)
(35, 100, 100)




(35, 100, 100)




(35, 100, 100)




(35, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(31, 100, 100)




(35, 100, 100)




(35, 100, 100)




(35, 100, 100)




(35, 100, 100)




(35, 100, 100)




(35, 100, 100)




(35, 100, 100)




(35, 100, 100)




(35, 100, 100)
