In [1]:
import ee
import pandas as pd
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

In [39]:
# If it's for the first time, authentication is required before running this code
# https://developers.google.com/earth-engine/guides/python_install-conda#windows_5
ee.Initialize()

In [40]:
# earth engine location of the subsets of the 10km grid created separately in the script "00_create_10_by_10_grid.R"
# 10km grid shapefile needs to be uploaded to assets in your GEE before running this line
grid = ee.FeatureCollection("projects/ee-akawano/assets/grid_10km")

# google drive associated with your GEE account
output_drive = "s5p_cloud_L3_10km"
timezone = "utc"

In [41]:
# https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S5P_OFFL_L3_CLOUD
# 2018-07-04 to 2022-12-14
maiac = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_CLOUD").select("cloud_fraction")
maiac_proj = maiac.first().projection()
pixel_res = maiac_proj.nominalScale().getInfo()

In [42]:
# set start_date and end_date
start_date_1 = pd.to_datetime("2018-07-01", format="%Y/%m/%d")
end_date_1 = pd.to_datetime("2022-08-01", format="%Y/%m/%d")

# Initialize an empty list to store the date strings for the first list
start_date_list = []

# Loop through the dates for the first list, incrementing by 3 months each time
while start_date_1 < end_date_1:
    start_date_list.append(start_date_1.strftime("%Y-%m-%d"))
    
    # Increment the start date by 3 months
    start_date_1 += relativedelta(months=3)

# Start and end dates for the second list
start_date_2 = pd.to_datetime("2018-10-01", format="%Y/%m/%d")
end_date_2 = pd.to_datetime("2022-11-01", format="%Y/%m/%d")

# Initialize an empty list to store the date strings for the second list
end_date_list = []

# Loop through the dates for the second list, incrementing by 3 months each time
while start_date_2 < end_date_2:
    # Format the date as a string in the year-month-day format
    end_date_list.append(start_date_2.strftime("%Y-%m-%d"))
    
    # Increment the start date by 3 months
    start_date_2 += relativedelta(months=3)

# Print the lists of date strings
print(start_date_list)
print(end_date_list)

['2018-07-01', '2018-10-01', '2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01', '2020-01-01', '2020-04-01', '2020-07-01', '2020-10-01', '2021-01-01', '2021-04-01', '2021-07-01', '2021-10-01', '2022-01-01', '2022-04-01', '2022-07-01']
['2018-10-01', '2019-01-01', '2019-04-01', '2019-07-01', '2019-10-01', '2020-01-01', '2020-04-01', '2020-07-01', '2020-10-01', '2021-01-01', '2021-04-01', '2021-07-01', '2021-10-01', '2022-01-01', '2022-04-01', '2022-07-01', '2022-10-01']


In [43]:
# Calculate how many days between start and end days
def calculate_length(start_date, end_date):
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    length_date = (end_date - start_date).days
    return length_date

In [44]:
def create_image(start_date, length_date):
    images_list = ee.List([])
    for i in range(0, length_date):
        # timezone is default utc in ee.Date.fromYMD
        start_date = pd.to_datetime(start_date)
        im_date = ee.Date.fromYMD(start_date.year, start_date.month, start_date.day).advance(i, 'day')
        im_date_end = im_date.advance(1, 'day')
        im = maiac.filterDate(im_date, im_date_end).reduce(ee.Reducer.mean())
        im = ee.Image(im).set("start_date", im_date.format("yMMdd"))
        images_list = images_list.add(im)
    return ee.ImageCollection.fromImages(images_list)

In [45]:
# For each day, calculate average for each cell grid
def process_image(start_date, end_date):
    length_date = calculate_length(start_date, end_date)
    maiac_images = create_image(start_date, length_date)
    maiac_images_reduced = maiac_images.map(lambda daily_im: (
        daily_im.reduceRegions(
            collection = grid,
            reducer = ee.Reducer.mean(),
            crs = maiac_proj,
            scale = pixel_res
        ).map(lambda f: f.set("start_date", daily_im.get("start_date")))))
    grid_maiac = ee.FeatureCollection(maiac_images_reduced).flatten()
    return grid_maiac

In [46]:
# make a list of properties to export 
export_properties = ["grid_id", "mean", "start_date"]
export_properties

['grid_id', 'mean', 'start_date']

In [11]:
# loop using start_date_list and end_date_list
for start_date, end_date in zip(start_date_list, end_date_list):
        grid_cloud = process_image(start_date, end_date)
        x = "cloud_10km_grid"
        y = start_date
        t = "to"
        z = end_date
        output_name = "_".join([x, y, t, z])
        aod_task = ee.batch.Export.table.toDrive(
            collection = grid_cloud,
            folder = output_drive,
            description = output_name,
            fileFormat = "CSV",
            selectors = export_properties)
        print(f"saving file as", {output_name})
        aod_task.start()

saving file as {'cloud_10km_grid_2018-07-01_to_2018-10-01'}
saving file as {'cloud_10km_grid_2018-10-01_to_2019-01-01'}
saving file as {'cloud_10km_grid_2019-01-01_to_2019-04-01'}
saving file as {'cloud_10km_grid_2019-04-01_to_2019-07-01'}
saving file as {'cloud_10km_grid_2019-07-01_to_2019-10-01'}
saving file as {'cloud_10km_grid_2019-10-01_to_2020-01-01'}
saving file as {'cloud_10km_grid_2020-01-01_to_2020-04-01'}
saving file as {'cloud_10km_grid_2020-04-01_to_2020-07-01'}
saving file as {'cloud_10km_grid_2020-07-01_to_2020-10-01'}
saving file as {'cloud_10km_grid_2020-10-01_to_2021-01-01'}
saving file as {'cloud_10km_grid_2021-01-01_to_2021-04-01'}
saving file as {'cloud_10km_grid_2021-04-01_to_2021-07-01'}
saving file as {'cloud_10km_grid_2021-07-01_to_2021-10-01'}
saving file as {'cloud_10km_grid_2021-10-01_to_2022-01-01'}
saving file as {'cloud_10km_grid_2022-01-01_to_2022-04-01'}
saving file as {'cloud_10km_grid_2022-04-01_to_2022-07-01'}
saving file as {'cloud_10km_grid_2022-07

In [18]:
start_date = '2022-10-01'
end_date = '2022-11-01'

grid_cloud = process_image(start_date, end_date)
x = "cloud_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_cloud,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'cloud_10km_grid_2022-10-01_to_2022-11-01'}


In [25]:
start_date = '2018-07-04'
end_date = '2018-10-01'

grid_cloud = process_image(start_date, end_date)
x = "cloud_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_cloud,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'cloud_10km_grid_2018-07-04_to_2018-10-01'}


In [34]:
# Error: cloud_10km_grid_2018-07-04_to_2018-10-01
start_date = '2018-07-04'
end_date = '2018-08-01'

grid_cloud = process_image(start_date, end_date)
x = "cloud_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_cloud,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'cloud_10km_grid_2018-07-04_to_2018-08-01'}


In [35]:
# Error: cloud_10km_grid_2018-07-04_to_2018-10-01
start_date = '2018-08-01'
end_date = '2018-09-01'

grid_cloud = process_image(start_date, end_date)
x = "cloud_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_cloud,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'cloud_10km_grid_2018-08-01_to_2018-09-01'}


In [36]:
# Error: cloud_10km_grid_2018-07-04_to_2018-10-01
start_date = '2018-09-01'
end_date = '2018-10-01'

grid_cloud = process_image(start_date, end_date)
x = "cloud_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_cloud,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'cloud_10km_grid_2018-09-01_to_2018-10-01'}


In [37]:
# Error cloud_10km_grid_2018-07-04_to_2018-08-01
start_date = '2018-07-04'
end_date = '2018-07-15'

grid_cloud = process_image(start_date, end_date)
x = "cloud_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_cloud,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'cloud_10km_grid_2018-07-04_to_2018-07-15'}


In [38]:
# Error cloud_10km_grid_2018-07-04_to_2018-08-01
start_date = '2018-07-15'
end_date = '2018-08-01'

grid_cloud = process_image(start_date, end_date)
x = "cloud_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_cloud,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'cloud_10km_grid_2018-07-15_to_2018-08-01'}


In [47]:
# Error cloud_10km_grid_2018-07-04_to_2018-07-15
start_date = '2018-07-04'
end_date = '2018-07-10'

grid_cloud = process_image(start_date, end_date)
x = "cloud_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_cloud,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'cloud_10km_grid_2018-07-04_to_2018-07-10'}


In [48]:
# Error cloud_10km_grid_2018-07-04_to_2018-07-15
start_date = '2018-07-10'
end_date = '2018-07-15'

grid_cloud = process_image(start_date, end_date)
x = "cloud_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_cloud,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'cloud_10km_grid_2018-07-10_to_2018-07-15'}


In [49]:
# cloud_10km_grid_2018-07-04_to_2018-07-10
start_date = '2018-07-04'
end_date = '2018-07-07'

grid_cloud = process_image(start_date, end_date)
x = "cloud_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_cloud,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'cloud_10km_grid_2018-07-04_to_2018-07-07'}


In [50]:
# cloud_10km_grid_2018-07-04_to_2018-07-10
start_date = '2018-07-07'
end_date = '2018-07-10'

grid_cloud = process_image(start_date, end_date)
x = "cloud_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_cloud,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'cloud_10km_grid_2018-07-07_to_2018-07-10'}


In [None]:
# if you want to cancel tasks:
# https://code.earthengine.google.com/tasks