In [None]:
# A notebook for checking the latest sentinel images for a defined area.
# First, need a means of storing previous imagery.

In [101]:
# These lines require user intervention
from google.colab import auth
auth.authenticate_user()
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [104]:
from google.oauth2 import service_account
from google.cloud import storage
import ee

PROJECT_ID = "earth-engine-workflow"
BUCKET_NAME = "airsignal2023"
REGION = "us-central1"
USER_NAME = 'charlesan'

# Skip earth engine login clickety click
#credentials = service_account.Credentials.from_service_account_file('/content/drive/MyDrive/earth-engine-workflow-013a80bf4f5b.json')


service_account = 'harvester@earth-engine-workflow.iam.gserviceaccount.com'
credentials = ee.ServiceAccountCredentials(service_account, '/content/drive/MyDrive/earth-engine-workflow-013a80bf4f5b.json')

ee.Initialize(credentials)

# Check for bucket and create if not present
client = storage.Client(credentials=credentials)
client
OUTPUT_BUCKET = 'airsignal2023'

def create_bucket(bucket_name):
    storage_client = storage.Client()    
    bucket = storage_client.create_bucket(bucket_name)
    print('Bucket {} created'.format(bucket.name))

def bucket_exists(bucket_name):
    storage_client = storage.Client()
    bucket = storage_client.bucket(bucket_name)
    try:
        bucket.reload()
        return True
    except Exception as e:
        print(e)
        return False

# Replace <BUCKET_NAME> with the name of your bucket
if bucket_exists(OUTPUT_BUCKET):
  print('Bucket exists')
else:
  print('Bucket does not exist')
  create_bucket(OUTPUT_BUCKET)


Bucket exists


In [None]:
#!pip install earthengine-api

In [105]:
# Anglesea Heath
xmin = 144.2
xmax = 144.
ymin = -38.3
ymax = -38.5

ROI_POLY = ee.Geometry.Polygon([[[xmin, ymax],
                          [xmin, ymin],
                          [xmax, ymin],
                          [xmax, ymax]]])
lat,lon = (ymin+ymax)/2,(xmin+xmax)/2

In [106]:
# Define a first set of dates
START_DATE = "2023-01-01"
END_DATE =  "2023-03-01"

In [127]:
import pandas as pd
r = pd.date_range(START_DATE,END_DATE)
r[0].strftime("%Y-%m-%d")
pd.to_datetime(r[0])

Timestamp('2023-01-01 00:00:00', freq='D')

In [107]:
# SENTINEL FUNCTIONS
# Use QA for mask then discard
#https://github.com/davemlz/ee-catalog-scale-offset-params/blob/main/list/ee-catalog-scale-offset-parameters.json
#bands_10m =
#bands_60m = 
#SCALE_BANDS =
BANDS = ['B2','B3','B4','B8','QA60']
CLOUD_LIMIT = 50
def se2mask(image):
    quality_band = image.select('QA60')
    # using the bit mask for clouds and cirrus clouds respectively
    cloudmask = 1 << 10
    cirrusmask = 1 << 11    
    # we only want clear skies
    mask = quality_band.bitwiseAnd(cloudmask).eq(0) and (quality_band.bitwiseAnd(cirrusmask).eq(0)) 
    # we'll divide by 10000 to make interpreting the reflectance values easier
    return image.updateMask(mask)#.divide(10000)
# This scale functions wrecks the dates
def scale(image):
  bands = ['B2','B3','B4','B8']
  image = image.select(bands).multiply(0.0001)
  #image = image.addBands(im2.select(bands))
  return image

In [108]:
imcol = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED').filterBounds(ROI_POLY).filterDate(
    START_DATE, END_DATE).filter(ee.Filter.lt("CLOUDY_PIXEL_PERCENTAGE",CLOUD_LIMIT))
#.select(BANDS)
# Other functions we can map over the collection. Some have side effects.
#.map(scale)
#.map(se2mask)
# map(lambda img: img.divide(10000))

In [109]:
imlist = imcol.toList(imcol.size())
unique_ids = imlist.map(lambda im: ee.Image(im).id()  )
# Tell EE to compute this operation
idslist = ee.List.getInfo(unique_ids)
print("Image IDs")
print(idslist)

Image IDs
['20230105T002049_20230105T002432_T54HYC', '20230105T002049_20230105T002432_T55HBT', '20230110T002051_20230110T002415_T54HYC', '20230110T002051_20230110T002415_T55HBT', '20230115T002049_20230115T002358_T54HYC', '20230115T002049_20230115T002358_T55HBT', '20230125T002049_20230125T002320_T54HYC', '20230125T002049_20230125T002320_T55HBT', '20230209T002051_20230209T002314_T54HYC', '20230209T002051_20230209T002314_T55HBT', '20230214T002059_20230214T002314_T54HYC', '20230214T002059_20230214T002314_T55HBT', '20230219T002051_20230219T002312_T54HYC', '20230219T002051_20230219T002312_T55HBT']


In [110]:
# Retrieve the list of previously processed files
# Can only use tmp locally
import json
blobs = client.list_blobs(OUTPUT_BUCKET)
local_folder = "/tmp/"
filename = "idlist.json"
processed_ids = []
for blob in blobs:
  print(blob)
  if(blob.name == filename):
    print("Blobs: {}".format(blob.name))
    destination_uri = "{}/{}".format(local_folder, filename) 
    blob.download_to_filename(destination_uri)
    with open(destination_uri, 'r') as f:
      s = json.load(f)
      print(s)
      processed_ids = s

In [111]:
processed_ids

[]

In [112]:
# Compare s and idslist
newids = list(set(idslist).difference(set(processed_ids)))

In [113]:
newids

['20230105T002049_20230105T002432_T54HYC',
 '20230110T002051_20230110T002415_T55HBT',
 '20230125T002049_20230125T002320_T54HYC',
 '20230115T002049_20230115T002358_T55HBT',
 '20230214T002059_20230214T002314_T54HYC',
 '20230219T002051_20230219T002312_T54HYC',
 '20230110T002051_20230110T002415_T54HYC',
 '20230125T002049_20230125T002320_T55HBT',
 '20230214T002059_20230214T002314_T55HBT',
 '20230105T002049_20230105T002432_T55HBT',
 '20230209T002051_20230209T002314_T55HBT',
 '20230219T002051_20230219T002312_T55HBT',
 '20230115T002049_20230115T002358_T54HYC',
 '20230209T002051_20230209T002314_T54HYC']

In [114]:
# Filter out already processed
newid_list = ee.List(newids)
id_filter = ee.Filter.inList('system:index',newid_list)
imcol_new = imcol.filter(id_filter).map(se2mask).select(BANDS)#.map(scale)

In [115]:
#ee.Number.getInfo(imcol_new.size())
imlist = imcol_new.toList(imcol_new.size())
unique_ids = imlist.map(lambda im: ee.Image(im).id()  )
idslist = ee.List.getInfo(unique_ids)
print("Image IDs")
print(idslist)

Image IDs
['20230105T002049_20230105T002432_T54HYC', '20230105T002049_20230105T002432_T55HBT', '20230110T002051_20230110T002415_T54HYC', '20230110T002051_20230110T002415_T55HBT', '20230115T002049_20230115T002358_T54HYC', '20230115T002049_20230115T002358_T55HBT', '20230125T002049_20230125T002320_T54HYC', '20230125T002049_20230125T002320_T55HBT', '20230209T002051_20230209T002314_T54HYC', '20230209T002051_20230209T002314_T55HBT', '20230214T002059_20230214T002314_T54HYC', '20230214T002059_20230214T002314_T55HBT', '20230219T002051_20230219T002312_T54HYC', '20230219T002051_20230219T002312_T55HBT']


In [116]:
dates = imlist.map(lambda im: ee.Image(im).date().format("YYYY-MM-dd"))
unique_dates = dates.distinct()
print("Unique dates")
print(ee.List.getInfo(dates))

Unique dates
['2023-01-05', '2023-01-05', '2023-01-10', '2023-01-10', '2023-01-15', '2023-01-15', '2023-01-25', '2023-01-25', '2023-02-09', '2023-02-09', '2023-02-14', '2023-02-14', '2023-02-19', '2023-02-19']


In [117]:
# Append the new files we are going to process
# Upload list of unique ids of files we processed
client = storage.Client(credentials=credentials)
BUCKET = client.get_bucket(OUTPUT_BUCKET)
filename = "idlist.json"
blob = BUCKET.blob(filename)
json_object = idslist + newids

blob.upload_from_string(
    data=json.dumps(json_object),
    content_type='application/json'
)
result = filename + ' upload complete'

In [118]:
# This function adds a band representing the image timestamp.
def addTime(image):
  names = image.bandNames().add('t')
  return image.addBands(image.getNumber('system:time_start')).rename(names)
# Map the function over the collection
#imcol_with_time = imcol.map(addTime)
#img = imcol.first()
#print(ee.List.getInfo(img.bandNames()))
#img = imcol_with_time.first()
#print(ee.List.getInfo(img.bandNames()))

In [119]:
def mosaicByDate(imcol):
  imlist = imcol.toList(imcol.size())
  unique_dates = imlist.map(lambda im: ee.Image(im).date().format("YYYY-MM-dd")).distinct()

  def match_dates(d):
    d = ee.Date(d)
    dateString = ee.Date(d).format('yyyy-MM-dd')
    im = imcol.filterDate(d, d.advance(1, "day")).mosaic()
    return im.set(
        "system:time_start", d.millis(), 
        "system:id", d.format("YYYY-MM-dd"))#.rename(dateString)

  mosaic_imlist = unique_dates.map(match_dates)

  return ee.ImageCollection(mosaic_imlist)

moscol = mosaicByDate(imcol_new)
#moscol_time = mosaicByDate(imcol_with_time)

In [120]:
def rename_date(img):
  date_string = ee.Image(img).date().format("_YYYY-MM-dd")
  #img = img.rename(ee.String('BE').cat(date_string))
  rstr = img.bandNames().map(lambda bandname: ee.String(bandname).cat(date_string))
  img = img.rename(rstr)
  return img

moscol = moscol.map(rename_date)

In [121]:
img = moscol.first()#.clip(ROI_POLY)
projection = img.select(1).projection().getInfo();
projection['crs']

'EPSG:4326'

In [122]:
moslist = moscol.toList(moscol.size())
# print(mostlist.getInfo())
unique_dates_mos = moslist.map(lambda im: ee.Image(im).date().format("YYYY-MM-dd")) 
print(ee.List.getInfo(unique_dates_mos))
ee.List.getInfo(moscol.first().bandNames())

['2023-01-05', '2023-01-10', '2023-01-15', '2023-01-25', '2023-02-09', '2023-02-14', '2023-02-19']


['B2_2023-01-05',
 'B3_2023-01-05',
 'B4_2023-01-05',
 'B8_2023-01-05',
 'QA60_2023-01-05']

In [123]:
# Convert collection to a single multiband image
moscol_bands = moscol.toBands()
ee.List.getInfo(moscol_bands.bandNames())

['0_B2_2023-01-05',
 '0_B3_2023-01-05',
 '0_B4_2023-01-05',
 '0_B8_2023-01-05',
 '0_QA60_2023-01-05',
 '1_B2_2023-01-10',
 '1_B3_2023-01-10',
 '1_B4_2023-01-10',
 '1_B8_2023-01-10',
 '1_QA60_2023-01-10',
 '2_B2_2023-01-15',
 '2_B3_2023-01-15',
 '2_B4_2023-01-15',
 '2_B8_2023-01-15',
 '2_QA60_2023-01-15',
 '3_B2_2023-01-25',
 '3_B3_2023-01-25',
 '3_B4_2023-01-25',
 '3_B8_2023-01-25',
 '3_QA60_2023-01-25',
 '4_B2_2023-02-09',
 '4_B3_2023-02-09',
 '4_B4_2023-02-09',
 '4_B8_2023-02-09',
 '4_QA60_2023-02-09',
 '5_B2_2023-02-14',
 '5_B3_2023-02-14',
 '5_B4_2023-02-14',
 '5_B8_2023-02-14',
 '5_QA60_2023-02-14',
 '6_B2_2023-02-19',
 '6_B3_2023-02-19',
 '6_B4_2023-02-19',
 '6_B8_2023-02-19',
 '6_QA60_2023-02-19']

In [124]:
# Save 10m bands
task = ee.batch.Export.image.toCloudStorage(**{
  'image': moscol_bands,
  'description': 'image_export_job',
  'crs': 'EPSG:7854',
  'bucket': OUTPUT_BUCKET,
  'fileNamePrefix': 'angle_brick_1',
  #'dimensions':,
  'crs': projection['crs'],
  'scale':10,
  'crsTransform': projection['transform'],
  'region': ROI_POLY,
  'fileFormat': 'GeoTIFF',
  'formatOptions': {
    'cloudOptimized': True
  },
  'maxPixels': 1e8})
task.start()

In [None]:
print(ee.batch.Task.list())

[<Task 56UFYNZEA62OZS773AFM4HIW EXPORT_IMAGE: image_export_job (COMPLETED)>, <Task 4UWFYRAPAXVOGUEPJNOTIXSL EXPORT_IMAGE: image_export_job (FAILED)>, <Task FFMRIHTXOSD6BTZAC27M75AY EXPORT_IMAGE: image_export_job (COMPLETED)>, <Task GDNBUX4OGPSDR6AZCYM2BQLI EXPORT_IMAGE: image_export_job (FAILED)>, <Task ZTUJEMBS3UVX7FQWCYG4A7TP EXPORT_IMAGE: image_export_job (COMPLETED)>, <Task G6CGHA7AAZCJW6JDIGFYZ32M EXPORT_IMAGE: image_export_job (FAILED)>, <Task 4OUYJLO5NB6MCGRRHTIUDCXY EXPORT_IMAGE: image_export_job (FAILED)>, <Task 2JCZKLBUWKHOINLOYDXTCYRV EXPORT_IMAGE: image_export_job (FAILED)>, <Task L6J25XQIYR4DWGWPQ65HD44A EXPORT_IMAGE: image_export_job (FAILED)>, <Task 4F3EKAT6MI7TDEVT7I6XMEPA EXPORT_IMAGE: image_export_job (FAILED)>, <Task X6V7MSVE6Z4W4Q7Y4ZFHJXRZ EXPORT_IMAGE: image_export_job (FAILED)>, <Task D3VI7SLHBITSBA7VRFJIBUL7 EXPORT_IMAGE: image_export_job (FAILED)>, <Task MYTFJ7KAE2KTNHG54PEDIKQZ EXPORT_IMAGE: image_export_job (COMPLETED)>]


In [None]:
blobs = client.list_blobs(OUTPUT_BUCKET)
# Note: The call returns a response only when the iterator is consumed.
for blob in blobs:
  print(blob.name)

angle.tif
angle_col.tif
idlist.json


In [None]:
# Some information about the image list we are processing
def get_name(img):
  return(ee.Image(img).getString('id'))

In [None]:
# Define another date
# This time search the local archives to see if these dates are present

In [None]:
# Get some information about the extracted collection
# Get the size of the collection. NB this can be expensive to run!
#print(ee.Number.getInfo(imcol.size()))


print(ee.List.getInfo(unique_dates))
imlist = imcol.toList(imcol.size())
dates = imlist.map(lambda im: ee.Image(im).date()  )
idslist = ee.List.getInfo(dates)
#print(ee.List.getInfo(idslist))

Image IDs
['20230105T002049_20230105T002432_T54HYC', '20230105T002049_20230105T002432_T55HBT', '20230110T002051_20230110T002415_T54HYC', '20230110T002051_20230110T002415_T55HBT', '20230115T002049_20230115T002358_T54HYC', '20230115T002049_20230115T002358_T55HBT', '20230125T002049_20230125T002320_T54HYC', '20230125T002049_20230125T002320_T55HBT']
Dates
['2023-01-05', '2023-01-05', '2023-01-10', '2023-01-10', '2023-01-15', '2023-01-15', '2023-01-25', '2023-01-25']
Unique dates
['2023-01-05', '2023-01-10', '2023-01-15', '2023-01-25']


In [None]:
"""COPERNICUS/S2_SR_HARMONIZED": {
        "AOT": {
            "offset": 0.0,
            "scale": 0.001
        },
        "B1": {
            "offset": 0.0,
            "scale": 0.0001
        },
        "B11": {
            "offset": 0.0,
            "scale": 0.0001
        },
        "B12": {
            "offset": 0.0,
            "scale": 0.0001
        },
        "B2": {
            "offset": 0.0,
            "scale": 0.0001
        },
        "B3": {
            "offset": 0.0,
            "scale": 0.0001
        },
        "B4": {
            "offset": 0.0,
            "scale": 0.0001
        },
        "B5": {
            "offset": 0.0,
            "scale": 0.0001
        },
        "B6": {
            "offset": 0.0,
            "scale": 0.0001
        },
        "B7": {
            "offset": 0.0,
            "scale": 0.0001
        },
        "B8": {
            "offset": 0.0,
            "scale": 0.0001
        },
        "B8A": {
            "offset": 0.0,
            "scale": 0.0001
        },
        "B9": {
            "offset": 0.0,
            "scale": 0.0001
        },
        "MSK_CLDPRB": {
            "offset": 0.0,
            "scale": 1.0
        },
        "MSK_SNWPRB": {
            "offset": 0.0,
            "scale": 1.0
        },
        "QA10": {
            "offset": 0.0,
            "scale": 1.0
        },
        "QA20": {
            "offset": 0.0,
            "scale": 1.0
        },
        "QA60": {
            "offset": 0.0,
            "scale": 1.0
        },
        "SCL": {
            "offset": 0.0,
            "scale": 1.0
        },
        "TCI_B": {
            "offset": 0.0,
            "scale": 1.0
        },
        "TCI_G": {
            "offset": 0.0,
            "scale": 1.0
        },
        "TCI_R": {
            "offset": 0.0,
            "scale": 1.0
        },
        "WVP": {
            "offset": 0.0,
            "scale": 0.001
        }
    },"""