<a href="https://colab.research.google.com/github/chqzeng/WaterSatOnCloud/blob/main/Tool2%20-%20LST8%20Matchup%20Extraction/Tool2_GEE_LST8_Matchup_Extraction_Level_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Tool2 - GEE LST8 Matchup Extraction - Level 2

GEE LST8 satellite data extraction (Level 1), with user-defined locations and time (lat, lon, datetime field in a .csv)

This script finds the median band values of water pixels within a 100m radius from the user-defined locations.

In [1]:
# Load GEE API
import ee
ee.Authenticate()
ee.Initialize()

To authorize access needed by Earth Engine, open the following URL in a web browser and follow the instructions. If the web browser does not start automatically, please manually browse the URL below.

    https://code.earthengine.google.com/client-auth?scopes=https%3A//www.googleapis.com/auth/earthengine%20https%3A//www.googleapis.com/auth/devstorage.full_control&request_id=6djVWYYU2WFFDC7ti3eobVm3Ik-PEWAfXER5anC9y0M&tc=3XRQh0M-kAKNqv6YBgiYv0_UzSS6zt9JvFzWswRJtrs&cc=3rkhwRGJcHAj4RztMWbMS_0dmlAcyKMii_bll41Zog8

The authorization workflow will generate a code, which you should paste in the box below.
Enter verification code: 4/1AZEOvhUUhumo0aeKDyN6V2xrOl46teD8XQn5ptVOZtku7X2SjYXsgFal79k

Successfully saved authorization token.


In [5]:
# Load other libraries
from datetime import timedelta
import numpy as np
import pandas as pd

In [6]:
# Get our date range to search, and format correctly for query
# By default, we only look at images within 15 days BEFORE the in-situ data collection, according to the contest requirement. This can be adjusted.
def get_date_range(date, time_buffer_days=15):
    """Get a date range to search for in the planetary computer based
    on a sample's date. The time range will include the sample date
    and time_buffer_days days prior

    Returns a string"""
    datetime_format = "%Y-%m-%d"
    range_start = pd.to_datetime(date) - timedelta(days=time_buffer_days)
    return [range_start.strftime(datetime_format),pd.to_datetime(date).strftime(datetime_format)]

# Bit operator
def is_set(x, n):
    return x & 2 ** n != 0

In [7]:
# Simulated data - you can replace this with real data

# create data, locations and time from the provided training dataset in the contest
data = [['A', 39.474744, -86.898353, '2021-08-23'],
  ['B', 35.980000, -78.839410, '2021-08-16'],
  ['C', 38.04947, -99.827, '2019-07-23']]

# create the pandas DataFrame
df = pd.DataFrame(data, columns=['sample', 'latitude','longitude','date'])

# print dataframe
df

Unnamed: 0,sample,latitude,longitude,date
0,A,39.474744,-86.898353,2021-08-23
1,B,35.98,-78.83941,2021-08-16
2,C,38.04947,-99.827,2019-07-23


In [8]:
# L8 surface reflectance in GEE
L8_data = ee.ImageCollection("LANDSAT/LC08/C02/T1_L2")

In [9]:
# Create a dataframe to store results
output_matchups = pd.DataFrame()

# Loop through the rows in the dataframe
for i in range(len(df)):

    row = df.iloc[i]
    print('\n========== Row: ' +str(i))
    print('\n' + str(row))

    date_range = get_date_range(row.date)

    # point of interest
    my_poi = ee.Geometry.Point(row.longitude, row.latitude)

    # point of interest with a 100m buffer
    my_poi_buffer = my_poi.buffer(100)

    # if no image found, go to next row
    try:
        # Sort by date time: newest first
        L8_data_filtered = L8_data.filterBounds(my_poi).filterDate(date_range[0], date_range[1]).sort('system:time_start',False)

        # https://gis.stackexchange.com/questions/231333/selecting-every-image-of-collection-using-google-earth-engine
        listOfImages = L8_data_filtered.toList(L8_data_filtered.size())

        numberOfImages = listOfImages.length().getInfo()
        print('Number of images found: ' + str(numberOfImages))

        # Loop through the returned images. If no water pixels found, go to the next image
        for image_n in range(numberOfImages):
            print('\nimage_n: ' + str(image_n))

            image = ee.Image(listOfImages.get(image_n))

            # Create buffered pixels
            image_at_poi_buffer = image.sampleRegions(my_poi_buffer,None,30)

            # Extract pixel band values
            pixels_values = image_at_poi_buffer.getInfo()['features']
            pixels_values_properties = [x['properties'] for x in pixels_values]
            bands = pd.DataFrame(pixels_values_properties)

            # Keep only water
            bands['is_water'] = is_set(bands['QA_PIXEL'],7)
            bands = bands[bands.is_water==True]

            if len(bands)==0:
                print('Failed to find water pixels')
                continue

            # Find median values
            bands_median = bands.median()

            print('Median pixel values within 100m: \n' + str(bands_median))

            # Add meta data back
            bands_median['latitude'] = row.latitude
            bands_median['longitude'] = row.longitude
            bands_median['date'] = row.date

            # Add to results
            output_matchups = output_matchups.append(bands_median,ignore_index=True)
            print('\nMatchup found!')

            break

    except:
        print('Failed to find Landsat-2 imagery')
        continue



sample                A
latitude      39.474744
longitude    -86.898353
date         2021-08-23
Name: 0, dtype: object
Number of images found: 3

image_n: 0
Failed to find water pixels

image_n: 1
Failed to find water pixels

image_n: 2
Failed to find water pixels


sample                B
latitude          35.98
longitude     -78.83941
date         2021-08-16
Name: 1, dtype: object
Number of images found: 1

image_n: 0
Failed to find water pixels


sample                C
latitude       38.04947
longitude       -99.827
date         2019-07-23
Name: 2, dtype: object
Number of images found: 3

image_n: 0
Median pixel values within 100m: 
QA_PIXEL         21952.0
QA_RADSAT            0.0
SR_B1             8152.0
SR_B2             8430.0
SR_B3             8970.0
SR_B4             8834.0
SR_B5             8755.0
SR_B6             8471.0
SR_B7             8212.0
SR_QA_AEROSOL       96.0
ST_ATRAN          6579.0
ST_B10           46560.0
ST_CDIST          4425.0
ST_DRAD           1394.0
ST_

  output_matchups = output_matchups.append(bands_median,ignore_index=True)


In [None]:
# Print output matchups
output_matchups

Unnamed: 0,B1,B10,B11,B2,B3,B4,B5,B6,B7,B8,...,QA_PIXEL,QA_RADSAT,SAA,SZA,VAA,VZA,is_water,latitude,longitude,date
0,0.117129,302.07132,300.669037,0.097685,0.076844,0.06148,0.050018,0.033101,0.024521,0.07108,...,21952.0,0.0,12697.0,2553.0,-8933.0,366.0,1.0,38.04947,-99.827,2019-07-23


In [None]:
# To save data
# output_matchups.to_csv('L8_matchups.csv', index=False)