<a href="https://colab.research.google.com/github/agdoko/deep_green_learning/blob/master/Baseline_Model_Notebook%20with%20ARB%20sample.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Imports

In [4]:
# Required imports
import google
import ee
import folium
import requests
import io
import sklearn
import numpy as np

from google.colab import auth
from folium import plugins
from sklearn.metrics import f1_score


# geemap needs to be installed first in this colab environment
!pip install geemap
import geemap

Collecting jedi>=0.16 (from ipython>=4.0.0->ipywidgets->ipyfilechooser>=0.6.0->geemap)
  Downloading jedi-0.19.0-py2.py3-none-any.whl (1.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.6/1.6 MB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: jedi
Successfully installed jedi-0.19.0


# Authentication flow

In [5]:
# Standard authentication cell
auth.authenticate_user()
credentials, project_id = google.auth.default()
ee.Initialize(credentials, project='semiotic-garden-395711')

In [6]:
# Testing the authentication worked:
print(ee.Image("NASA/NASADEM_HGT/001").get("title").getInfo())

NASADEM: NASA NASADEM Digital Elevation 30m


# NEW DEFINING CUSTOM FUNCTIONS

## Add Layer Function

In [None]:
# Defining the required add.layer function, which is not native to folium
def add_ee_layer(self, ee_image_object, vis_params, name):
    map_id_dict = ee.Image(ee_image_object).getMapId(vis_params)
    folium.raster_layers.TileLayer(
        tiles=map_id_dict['tile_fetcher'].url_format,
        attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
        name=name,
        overlay=True,
        control=True
    ).add_to(self)

# Add Earth Engine drawing method to folium.
folium.Map.add_ee_layer = add_ee_layer

## Clipping Function

In [None]:
# Defining the clipping feature — this is required because filter by Bounds only
# limits the image collection to images that intersetc the Geometry defined, but
# does not actually cut out the Geometry from the images that intersect
def clip_img(img):
    return img.clip(square)

## Majority Pooling Function

In [None]:
# Defining the majority pooling function
def majority_pool(array, new_shape=(2, 2, 6)):
    # Initialize the new pooled array
    pooled_array = np.zeros(new_shape, dtype=array.dtype)

    # Calculate the step sizes for x and y dimensions
    step_x = array.shape[0] // new_shape[0]
    step_y = array.shape[1] // new_shape[1]

    # Loop through each dimension
    for z in range(array.shape[2]):
        for new_x in range(new_shape[0]):
            for new_y in range(new_shape[1]):
                x_start = new_x * step_x
                y_start = new_y * step_y
                quadrant = array[x_start:x_start+step_x, y_start:y_start+step_y, z]
                majority = np.sum(quadrant) > (step_x * step_y // 2)
                pooled_array[new_x, new_y, z] = majority

    return pooled_array

# NEW ACCEPTING THE COORDINATES FROM ANA's + FELIX's SAMPLING
*   FELIX PROVIDES A POINT
*   ANA PROVIDES 4 CORNER COORDINATES

In [11]:
# Defining start and end dates. Note the target dataset is annual, while the
# features are updated every 5-10 days

# Initial year of interest (inclusive). Picked because features only go to
# 2015 and start in june, the earliest january images for features is 2016.
i_date = '2016'

# Final year of interest (exclusive). Picked because target only goes to 2021.
# So we should have 6 matches in total between features and targets (whole globe)
f_date = '2022'

In [13]:
# TO DO - need to adjust this to accept either a point from Felix or Ana 4 coordinates

# specify these coordinates, then expand a 500 x 500m square around, somewhere in Amazon rainforest
import random
# Adjust these ranges to suit the geographical area of interest - South America (47.7% of SA is known to be covered by forests, including one of the largest tropical forests in the world in the Amazon area)
min_lat = 55.05 # Southernmost point of SA
max_lat = 12.44  # Northernmost point of SA
min_lon = -81.28 # Westernmost point of SA
max_lon = -34.47  # Easternmost point of SA
num_points = 100
# Generate a list of random latitude and longitude points
coordinates = []
for _ in range(num_points):
    lat = random.uniform(min_lat, max_lat)
    lon = random.uniform(min_lon, max_lon)
    coordinates.append((lon, lat))
# Print the list of coordinates
for i, (lon, lat) in enumerate(coordinates):
    print(f"({lat}, {lon}),")

(13.45555639594199, -54.44045226080955),
(36.14676818709064, -43.180266907414826),
(42.99905927897668, -42.1325837805427),
(54.933825631807, -64.65448650314612),
(47.49660202992008, -56.80269204541623),
(40.99399531328745, -46.70148993251093),
(13.866584859600984, -49.637526391504835),
(17.79789494115967, -45.81381809781966),
(48.60813636362735, -47.70228945170309),
(48.00382070030193, -55.59803109169262),
(35.75155827687374, -35.047994404590746),
(49.158789995205346, -51.433930320631305),
(25.946154632684678, -62.84104862443571),
(26.565389110844492, -77.88580186047754),
(39.360058428530465, -72.15769484099808),
(24.716133762988036, -52.57815995922864),
(34.57825073115286, -47.75602182000067),
(53.58350816350808, -53.806630697756916),
(45.92947709667748, -78.91992883561065),
(46.71786871089591, -58.94520027509928),
(13.962336786433767, -34.48440091132063),
(49.902580208409574, -50.477606924235715),
(14.652436631456268, -52.685056533928574),
(52.78016741031922, -60.7480366939013),
(53.

In [23]:
# Define a list of center coordinates (lon, lat)
coordinates = [
(22.842507125144124, -47.000583368501665),
(34.87986315019446, -36.26358682480643),
(51.91796178679286, -68.34134396299893),
(48.47372266739815, -49.66564182773825),
(36.358128292863455, -64.98509603263899),
(49.87660128689118, -78.18161874085979),
(23.454323691046703, -72.19338307546867),
(20.825026743564464, -56.20382259383819),
(38.08209192853465, -62.689649787357055),
(14.594492604399576, -66.27152609310598),
(13.155577054224977, -64.71329836705334),
(46.42323611870197, -71.96821918287208),
(19.10721760028526, -51.9923765099278),
(21.62397422282833, -60.462623155679765),
(37.0088354977009, -66.25068780555978),
(18.66723375315864, -63.21789125203909),
(48.194866310482354, -51.86170684958671),
(54.89538307963339, -79.47084837474144),
(26.892593047872026, -60.0897985455255),
(51.591740370808495, -52.16172768660438),
(49.81321038370324, -45.338607373538494),
(20.34718251527974, -46.46810873853083),
(30.8489740277899, -73.3611995878797),
(50.90101285997888, -80.74763244579447),
(17.486903441488813, -41.24142585490362),
(37.35007785484294, -70.84315248358027),
(45.09865595789075, -57.933702943666916),
(54.85049964132313, -62.184252062652405),
(41.89510786696146, -74.99604641660633),
(38.35095767185615, -50.50465899803217),
(19.749688567605034, -38.90146172418256),
(48.7400383487169, -42.60827055019036),
(16.33915993223365, -78.27103863845058),
(29.50620260980208, -74.13278275702811),
(48.93966701060635, -39.771666249583184),
(25.184145632342616, -80.53360645975354),
(32.79414887903755, -53.50444848757513),
(46.52641580081166, -79.73906334855766),
(34.311242255958675, -71.81964707651025),
(48.8103441599566, -72.28359393078492),
(19.448242519401035, -34.86632366394968),
(47.01650010646048, -57.28682051377381),
(32.75602604827195, -34.88108592954662),
(22.96473910676324, -51.40195661821333),
(41.598638038261946, -69.56240401730386),
(52.710275139180595, -78.08434421820088),
(49.78925103761477, -46.97078163720487),
(24.811451989000638, -44.97931471493216),
(14.699919130492837, -35.212683162013306),
(50.68544381299938, -72.98565119172433),
(20.13994029727531, -58.20018934095722),
(32.083053667129164, -76.00158010871775),
(27.94284020023865, -50.82080809774861),
(34.851760354778875, -49.85806341009817),
(38.836674939107475, -77.85067482602324),
(43.53163876009497, -51.57297320673857),
(29.872535194914843, -70.79161153078735),
(13.85294926980027, -45.04844701597491),
(32.40730228906793, -46.95447922174038),
(28.163403377827535, -49.29047495580616),
(27.736134960188316, -64.16297293980682),
(33.96222794986152, -77.44159262266847),
(47.86258679726498, -35.030913198491945),
(32.652392040723896, -53.85333602749306),
(40.70175397272459, -46.24541146707987),
(35.07469737560649, -78.82379330550226),
(43.22527227652451, -53.42396295221567),
(42.23994501974043, -80.19955267843848),
(49.14007442527291, -60.91822979006097),
(43.038285570312695, -77.82610143048731),
(40.61443675789221, -47.609070376388054),
(31.508033825779766, -65.78099378842877),
(30.588670639143956, -51.728147780992316),
(47.7505615954106, -63.2431611061562),
(20.999500751623202, -51.66891377277655),
(25.9973575047063, -66.54421950091125),
(42.88977666382601, -77.7712925135955),
(16.5445956096779, -42.36556803044885),
(29.84874886987011, -42.413776002412455),
(28.134746709195547, -54.632144951629016),
(31.977501553967862, -73.05305678352089),
(21.28061206982339, -58.702320376357775),
(37.805319056827074, -54.909508598577276),
(34.499919506811764, -55.879138249257615),
(46.03845750543004, -37.31900057754158),
(49.21954326663983, -62.194600542283226),
(30.983037825035634, -41.42914488714719),
(51.12711480407253, -38.42951208911206),
(24.465249810542925, -38.07775245730921),
(41.32699565234628, -67.68854249848653),
(41.436126946527864, -49.14944538152422),
(39.978962508913924, -36.352601268952725),
(42.16520756112106, -77.64283713276477),
(41.55753060021421, -64.50876929170292),
(44.09575232478659, -62.27298878526746),
(39.3426984094219, -76.83790089521935),
(47.44919482397734, -39.18004999661571),
(41.36379016575239, -62.81640454544693),
(28.566017895800208, -63.02463807777613),
(25.405910254482443, -39.31777337375595)
]

# Define the half-width and half-height of the rectangle (in meters)
half_width = 11  # Half of 22 meters
half_height = 11  # Half of 22 meters

# Create an empty list to store the coordinates of rectangle corners
rectangle_coordinates_list = []

# Loop through the list of center coordinates
for lon, lat in coordinates:
    # Calculate the coordinates for the four corners of the rectangle
    top_left = (lon - half_width, lat + half_height)
    top_right = (lon + half_width, lat + half_height)
    bottom_left = (lon - half_width, lat - half_height)
    bottom_right = (lon + half_width, lat - half_height)

    # Add the coordinates of the rectangle corners to the list
    rectangle_coordinates_list.append({
        "Top Left Corner": top_left,
        "Top Right Corner": top_right,
        "Bottom Left Corner": bottom_left,
        "Bottom Right Corner": bottom_right
    })

# Print the list of rectangle coordinates
    for corner, coordinates in rectangle_coordinates.items():
        print(f"{coordinates}")
    print()




(14.405910254482443, -28.317773373755948)
(36.40591025448244, -28.317773373755948)
(14.405910254482443, -50.31777337375595)
(36.40591025448244, -50.31777337375595)

(14.405910254482443, -28.317773373755948)
(36.40591025448244, -28.317773373755948)
(14.405910254482443, -50.31777337375595)
(36.40591025448244, -50.31777337375595)

(14.405910254482443, -28.317773373755948)
(36.40591025448244, -28.317773373755948)
(14.405910254482443, -50.31777337375595)
(36.40591025448244, -50.31777337375595)

(14.405910254482443, -28.317773373755948)
(36.40591025448244, -28.317773373755948)
(14.405910254482443, -50.31777337375595)
(36.40591025448244, -50.31777337375595)

(14.405910254482443, -28.317773373755948)
(36.40591025448244, -28.317773373755948)
(14.405910254482443, -50.31777337375595)
(36.40591025448244, -50.31777337375595)

(14.405910254482443, -28.317773373755948)
(36.40591025448244, -28.317773373755948)
(14.405910254482443, -50.31777337375595)
(36.40591025448244, -50.31777337375595)

(14.405910

# Defining Features

Loading our features dataset from: https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S2_HARMONIZED#bands


In [9]:
# Initialize an empty list to hold the images
image_list = []

# Loop over each year from 2016 to 2021
for year in range(int(i_date), int(f_date)):
    start_date = ee.Date(f'{year}-01-01')
    end_date = start_date.advance(180, 'day') # Threshold 180 days to find an appropriate match to filter conditions

    filtered_images = (ee.ImageCollection("COPERNICUS/S2_HARMONIZED")
                      .filterDate(start_date, end_date) # applying the date range
                      .filterBounds(square) # applying the bounds of the coordinates
                      .filterMetadata('CLOUDY_PIXEL_PERCENTAGE', 'less_than', 50) # applying cloud filter on image property
                      .sort('system:time_start')) # sorting collected images by system time

    clipped_images = filtered_images.map(clip_img) # clipping to the bounds. necessary as filtering by bounds does not clip!
    first_image = clipped_images.first() # getting the individual image, so that it can be bundled in an ImageCollection later

    if first_image:
        image_list.append(first_image)

features = ee.ImageCollection.fromImages(image_list) # create the ImageCollection


NameError: ignored

In [None]:
# Get info about the resulting collection - how many images are in the collection? Should return 6
features.size()

# Get full depth Features nd array

## RGB + NIR bands together

In [None]:
# Get feature bands. Because they have different resolutions we'll stick just
# to those that have resolution of 10m. If we include ones that have 20m or
# 60m resolution, then we'd have to add padding or similar to the nd array in
# its depth. B2 is blue, B3 is green, B4 is red, B8 is NIR (near infra red)
feature_bands = ["B2", "B3", "B4", "B8"]

In [None]:
# Initialize an empty list to collect the numpy arrays
stacked_feature_list = []

# Loop through the image collection
for image in features.getInfo()['features']: # if you do .getInfo() on an image you get a dict, which we're accessing below
    image_id = image['id']
    ee_image = ee.Image(image_id)

    # Download the image as a NumPy array
    url = ee_image.getDownloadUrl({
        'bands': feature_bands,
        'region': square,
        'scale': 10,
        'format': 'NPY' # numpy
    })
    image_array = requests.get(url)
    image_array = np.load(io.BytesIO(image_array.content))

    # Append the numpy array to the list
    stacked_feature_list.append(image_array)

# Stack the arrays depth-wise
feature_stacked_array = np.stack(stacked_feature_list, axis=-1) # create a nd array where the depth dimension is time

## NDVI band only

In [None]:
# Creating the NDVI array - NDVI is an index used for detecting forest in the academic literature
# Assuming data_array is your original array of shape (51, 51, 6) and dtype([('B2', 'f4'), ('B3', 'f4'), ('B4', 'f4'), ('B8', 'f4')])

# Extract B4 (Red) and B8 (NIR)
B4 = feature_stacked_array['B4']
B8 = feature_stacked_array['B8']

# Calculate NDVI - basically the normalised difference between Red and NIR bands
NDVI = (B8 - B4) / (B8 + B4 + 1e-10)  # adding a small constant to avoid division by zero

# Result will be a 3D ndarray of shape (51, 51, 6)

# NEW BASELINE PREDICTION FROM NDVI THRESHOLD
Any pixel with an NDVI value greater than 0.6 is classified as forest. According to ChatGPT this is typical for classifying forest (though the threshold for just classifying vegetation alone, maybe lone trees etc, is lower at 0.2-0.5)

In [None]:
mask_ndvi = NDVI >= 0.6
NDVI_bucketed = np.where(mask_ndvi, 1, 0)

Need to apply a majority pooling to this NDVI nd array to make it compatible with the target

In [None]:
NDVI_pooled = majority_pool(NDVI_bucketed)

# Defining Target

Loading our target dataset from: https://developers.google.com/earth-engine/datasets/catalog/MODIS_061_MCD12Q1#bands%5Blink

In [None]:
# Filtering down for the target band and the date range
target = (ee.ImageCollection("MODIS/061/MCD12Q1").filterDate(i_date, f_date)
          .filterBounds(square)
          .sort('system:time_start'))  # Sort by time to get earliest image

In [None]:
# Checking how many images are contained in the Image Collection, should return 6
target.size()

In [None]:
# Apply the clipping
target = target.map(clip_img)

# Get full depth Target nd array

In [None]:
# Get the target bands. We are only picking one, though there's several to choose from
# We'll pick the LC_Type1 just because it's first
target_bands = ["LC_Type1"]

In [None]:
# Initialize an empty list to collect the numpy arrays
stacked_target_list = []

# Loop through the image collection
for image in target.getInfo()['features']:
    image_id = image['id']
    ee_image = ee.Image(image_id)

    # Download the image as a NumPy array
    url = ee_image.getDownloadUrl({
        'bands': target_bands,
        'region': square,
        'scale': 500, # again, notice the scale! 500m x 500m for a target pixel
        'format': 'NPY'
    })
    image_array = requests.get(url)
    image_array = np.load(io.BytesIO(image_array.content))

    # Append the numpy array to the list
    stacked_target_list.append(image_array)

# Stack the arrays depth-wise
target_stacked_array = np.stack(stacked_target_list, axis=-1)

# NEW TARGET LABELLING BUCKETING FUNCTION

Take any value in the full nd array and allocate it to buckets of "1" forest and "0" not forest. The existing labels in the target nd array are mapped as follows:

*   label -> bucket
*   1-5 -> 1
*   6-17 -> 0

In [None]:
mask = target_stacked_array["LC_Type1"] >= 6  # Create a boolean mask where values greater than 6 are True
target_array_bucketed = np.where(mask, 0, 1)  # Assign 0 where mask is True, 1 where mask is False


# NEW ASSESS BASELINE MODEL PERFORMANCE
Compare the target classification (ground truth) against the prediction using F1, to balance precision and recall.

In [None]:
# Reshape your arrays into 1D arrays
true_values_1D = target_array_bucketed.reshape(-1)
pred_values_1D = NDVI_pooled.reshape(-1)

# Calculate F1 score
f1 = f1_score(true_values_1D, pred_values_1D)

print("F1 Score:", f1)


F1 Score: 0.6666666666666666
