In [1]:
import ee
import pandas as pd
from datetime import datetime, timedelta
from dateutil.relativedelta import relativedelta

In [12]:
#ee.Authenticate()

Enter verification code: 4/1AWgavdftIor5F5b7THHJq3O7wFT4g7asej4pTAXcr2hCvFPq2WiqZW98R1Q

Successfully saved authorization token.


In [25]:
# If it's for the first time, authentication is required before running this code
# https://developers.google.com/earth-engine/guides/python_install-conda#windows_5
ee.Initialize()

In [26]:
# earth engine location of the subsets of the 10km grid created separately in the script "00_create_10_by_10_grid.R"
# 10km grid shapefile needs to be uploaded to assets in your GEE before running this line
grid = ee.FeatureCollection("projects/ee-akawano/assets/grid_10km")

# google drive associated with your GEE account
output_drive = "s5p_HCHO_L3_10km"
timezone = "utc"

In [27]:
# Sentinel-5P OFFL HCHO: Offline Formaldehyde
# https://developers.google.com/earth-engine/datasets/catalog/COPERNICUS_S5P_OFFL_L3_HCHO
# 2018-12-05 to 2022-12-14
maiac = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_HCHO").select("tropospheric_HCHO_column_number_density")
maiac_proj = maiac.first().projection()
pixel_res = maiac_proj.nominalScale().getInfo()

In [28]:
# set start_date and end_date
start_date_1 = pd.to_datetime("2018-12-05", format="%Y/%m/%d")
end_date_1 = pd.to_datetime("2022-08-01", format="%Y/%m/%d")

# Initialize an empty list to store the date strings for the first list
start_date_list = []

# Loop through the dates for the first list, incrementing by 3 months each time
while start_date_1 < end_date_1:
    start_date_list.append(start_date_1.strftime("%Y-%m-%d"))
    
    # Increment the start date by 3 months
    start_date_1 += relativedelta(months=3)

# Start and end dates for the second list
start_date_2 = pd.to_datetime("2019-02-01", format="%Y/%m/%d")
end_date_2 = pd.to_datetime("2022-11-01", format="%Y/%m/%d")

# Initialize an empty list to store the date strings for the second list
end_date_list = []

# Loop through the dates for the second list, incrementing by 3 months each time
while start_date_2 < end_date_2:
    # Format the date as a string in the year-month-day format
    end_date_list.append(start_date_2.strftime("%Y-%m-%d"))
    
    # Increment the start date by 3 months
    start_date_2 += relativedelta(months=3)

# Print the lists of date strings
print(start_date_list)
print(end_date_list)

['2018-12-05', '2019-03-05', '2019-06-05', '2019-09-05', '2019-12-05', '2020-03-05', '2020-06-05', '2020-09-05', '2020-12-05', '2021-03-05', '2021-06-05', '2021-09-05', '2021-12-05', '2022-03-05', '2022-06-05']
['2019-02-01', '2019-05-01', '2019-08-01', '2019-11-01', '2020-02-01', '2020-05-01', '2020-08-01', '2020-11-01', '2021-02-01', '2021-05-01', '2021-08-01', '2021-11-01', '2022-02-01', '2022-05-01', '2022-08-01']


In [29]:
# Calculate how many days between start and end days
def calculate_length(start_date, end_date):
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    length_date = (end_date - start_date).days
    return length_date

In [30]:
def create_image(start_date, length_date):
    images_list = ee.List([])
    for i in range(0, length_date):
        # timezone is default utc in ee.Date.fromYMD
        start_date = pd.to_datetime(start_date)
        im_date = ee.Date.fromYMD(start_date.year, start_date.month, start_date.day).advance(i, 'day')
        im_date_end = im_date.advance(1, 'day')
        im = maiac.filterDate(im_date, im_date_end).reduce(ee.Reducer.mean())
        im = ee.Image(im).set("start_date", im_date.format("yMMdd"))
        images_list = images_list.add(im)
    return ee.ImageCollection.fromImages(images_list)

In [31]:
# For each day, calculate average for each cell grid
def process_image(start_date, end_date):
    length_date = calculate_length(start_date, end_date)
    maiac_images = create_image(start_date, length_date)
    maiac_images_reduced = maiac_images.map(lambda daily_im: (
        daily_im.reduceRegions(
            collection = grid,
            reducer = ee.Reducer.mean(),
            crs = maiac_proj,
            scale = pixel_res
        ).map(lambda f: f.set("start_date", daily_im.get("start_date")))))
    grid_maiac = ee.FeatureCollection(maiac_images_reduced).flatten()
    return grid_maiac

In [32]:
# make a list of properties to export 
export_properties = ["grid_id", "mean", "start_date"]
export_properties

['grid_id', 'mean', 'start_date']

In [10]:
# loop using start_date_list and end_date_list
for start_date, end_date in zip(start_date_list, end_date_list):
        grid_HCHO = process_image(start_date, end_date)
        x = "HCHO_10km_grid"
        y = start_date
        t = "to"
        z = end_date
        output_name = "_".join([x, y, t, z])
        aod_task = ee.batch.Export.table.toDrive(
            collection = grid_HCHO,
            folder = output_drive,
            description = output_name,
            fileFormat = "CSV",
            selectors = export_properties)
        print(f"saving file as", {output_name})
        aod_task.start()

saving file as {'HCHO_10km_grid_2018-12-05_to_2019-02-01'}
saving file as {'HCHO_10km_grid_2019-03-05_to_2019-05-01'}
saving file as {'HCHO_10km_grid_2019-06-05_to_2019-08-01'}
saving file as {'HCHO_10km_grid_2019-09-05_to_2019-11-01'}
saving file as {'HCHO_10km_grid_2019-12-05_to_2020-02-01'}
saving file as {'HCHO_10km_grid_2020-03-05_to_2020-05-01'}
saving file as {'HCHO_10km_grid_2020-06-05_to_2020-08-01'}
saving file as {'HCHO_10km_grid_2020-09-05_to_2020-11-01'}
saving file as {'HCHO_10km_grid_2020-12-05_to_2021-02-01'}
saving file as {'HCHO_10km_grid_2021-03-05_to_2021-05-01'}
saving file as {'HCHO_10km_grid_2021-06-05_to_2021-08-01'}
saving file as {'HCHO_10km_grid_2021-09-05_to_2021-11-01'}
saving file as {'HCHO_10km_grid_2021-12-05_to_2022-02-01'}
saving file as {'HCHO_10km_grid_2022-03-05_to_2022-05-01'}
saving file as {'HCHO_10km_grid_2022-06-05_to_2022-08-01'}


In [11]:
start_date = '2022-08-01'
end_date = '2022-11-01'

grid_HCHO = process_image(start_date, end_date)
x = "HCHO_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_HCHO,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'HCHO_10km_grid_2022-08-01_to_2022-11-01'}


In [21]:
# Error: HCHO_10km_grid_2021-06-05_to_2021-08-01, HCHO_10km_grid_2022-06-05_to_2022-08-01
start_date = '2021-06-05'
end_date = '2021-07-01'

grid_HCHO = process_image(start_date, end_date)
x = "HCHO_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_HCHO,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'HCHO_10km_grid_2021-06-05_to_2021-07-01'}


In [22]:
# Error: HCHO_10km_grid_2021-06-05_to_2021-08-01, HCHO_10km_grid_2022-06-05_to_2022-08-01
start_date = '2021-07-01'
end_date = '2021-08-01'

grid_HCHO = process_image(start_date, end_date)
x = "HCHO_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_HCHO,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'HCHO_10km_grid_2021-07-01_to_2021-08-01'}


In [23]:
# Error: HCHO_10km_grid_2022-06-05_to_2022-08-01
start_date = '2022-06-05'
end_date = '2022-07-01'

grid_HCHO = process_image(start_date, end_date)
x = "HCHO_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_HCHO,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'HCHO_10km_grid_2022-06-05_to_2022-07-01'}


In [24]:
# Error: HCHO_10km_grid_2022-06-05_to_2022-08-01
start_date = '2022-07-01'
end_date = '2022-08-01'

grid_HCHO = process_image(start_date, end_date)
x = "HCHO_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_HCHO,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'HCHO_10km_grid_2022-07-01_to_2022-08-01'}


In [33]:
# HCHO_10km_grid_2022-07-01_to_2022-08-01, HCHO_10km_grid_2021-07-01_to_2021-08-01
start_date = '2022-07-01'
end_date = '2022-07-15'

grid_HCHO = process_image(start_date, end_date)
x = "HCHO_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_HCHO,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'HCHO_10km_grid_2022-07-01_to_2022-07-15'}


In [34]:
# HCHO_10km_grid_2022-07-01_to_2022-08-01, HCHO_10km_grid_2021-07-01_to_2021-08-01
start_date = '2022-07-15'
end_date = '2022-08-01'

grid_HCHO = process_image(start_date, end_date)
x = "HCHO_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_HCHO,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'HCHO_10km_grid_2022-07-15_to_2022-08-01'}


In [35]:
# HCHO_10km_grid_2022-07-01_to_2022-08-01, HCHO_10km_grid_2021-07-01_to_2021-08-01
start_date = '2021-07-01'
end_date = '2021-07-15'

grid_HCHO = process_image(start_date, end_date)
x = "HCHO_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_HCHO,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'HCHO_10km_grid_2021-07-01_to_2021-07-15'}


In [36]:
# HCHO_10km_grid_2022-07-01_to_2022-08-01, HCHO_10km_grid_2021-07-01_to_2021-08-01
start_date = '2021-07-15'
end_date = '2021-08-01'

grid_HCHO = process_image(start_date, end_date)
x = "HCHO_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_HCHO,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'HCHO_10km_grid_2021-07-15_to_2021-08-01'}


In [37]:
# HCHO_10km_grid_2022-07-15_to_2022-08-01, HCHO_10km_grid_2021-07-15_to_2021-08-01
start_date = '2022-07-15'
end_date = '2022-07-22'

grid_HCHO = process_image(start_date, end_date)
x = "HCHO_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_HCHO,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'HCHO_10km_grid_2022-07-15_to_2022-07-22'}


In [38]:
# HCHO_10km_grid_2022-07-15_to_2022-08-01, HCHO_10km_grid_2021-07-15_to_2021-08-01
start_date = '2022-07-22'
end_date = '2022-08-01'

grid_HCHO = process_image(start_date, end_date)
x = "HCHO_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_HCHO,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'HCHO_10km_grid_2022-07-22_to_2022-08-01'}


In [39]:
# HCHO_10km_grid_2022-07-15_to_2022-08-01, HCHO_10km_grid_2021-07-15_to_2021-08-01
start_date = '2021-07-15'
end_date = '2021-07-22'

grid_HCHO = process_image(start_date, end_date)
x = "HCHO_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_HCHO,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'HCHO_10km_grid_2021-07-15_to_2021-07-22'}


In [40]:
# HCHO_10km_grid_2022-07-15_to_2022-08-01, HCHO_10km_grid_2021-07-15_to_2021-08-01
start_date = '2021-07-22'
end_date = '2021-08-01'

grid_HCHO = process_image(start_date, end_date)
x = "HCHO_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_HCHO,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'HCHO_10km_grid_2021-07-22_to_2021-08-01'}


In [41]:
# HCHO_10km_grid_2022-07-15_to_2022-07-22, HCHO_10km_grid_2021-07-15_to_2021-07-22
start_date = '2022-07-15'
end_date = '2022-07-18'

grid_HCHO = process_image(start_date, end_date)
x = "HCHO_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_HCHO,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'HCHO_10km_grid_2022-07-15_to_2022-07-18'}


In [42]:
# HCHO_10km_grid_2022-07-15_to_2022-07-22, HCHO_10km_grid_2021-07-15_to_2021-07-22
start_date = '2022-07-18'
end_date = '2022-07-22'

grid_HCHO = process_image(start_date, end_date)
x = "HCHO_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_HCHO,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'HCHO_10km_grid_2022-07-18_to_2022-07-22'}


In [43]:
# HCHO_10km_grid_2022-07-15_to_2022-07-22, HCHO_10km_grid_2021-07-15_to_2021-07-22
start_date = '2021-07-15'
end_date = '2021-07-18'

grid_HCHO = process_image(start_date, end_date)
x = "HCHO_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_HCHO,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'HCHO_10km_grid_2021-07-15_to_2021-07-18'}


In [44]:
# HCHO_10km_grid_2022-07-15_to_2022-07-22, HCHO_10km_grid_2021-07-15_to_2021-07-22
start_date = '2021-07-18'
end_date = '2021-07-22'

grid_HCHO = process_image(start_date, end_date)
x = "HCHO_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_HCHO,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'HCHO_10km_grid_2021-07-18_to_2021-07-22'}


In [45]:
# HCHO_10km_grid_2022-07-18_to_2022-07-22, HCHO_10km_grid_2021-07-15_to_2021-07-18
start_date = '2022-07-18'
end_date = '2022-07-20'

grid_HCHO = process_image(start_date, end_date)
x = "HCHO_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_HCHO,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'HCHO_10km_grid_2022-07-18_to_2022-07-20'}


In [46]:
# HCHO_10km_grid_2022-07-18_to_2022-07-22, HCHO_10km_grid_2021-07-15_to_2021-07-18
start_date = '2022-07-20'
end_date = '2022-07-22'

grid_HCHO = process_image(start_date, end_date)
x = "HCHO_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_HCHO,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'HCHO_10km_grid_2022-07-20_to_2022-07-22'}


In [47]:
# HCHO_10km_grid_2022-07-18_to_2022-07-22, HCHO_10km_grid_2021-07-15_to_2021-07-18
start_date = '2021-07-15'
end_date = '2021-07-17'

grid_HCHO = process_image(start_date, end_date)
x = "HCHO_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_HCHO,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'HCHO_10km_grid_2021-07-15_to_2021-07-17'}


In [48]:
# HCHO_10km_grid_2022-07-18_to_2022-07-22, HCHO_10km_grid_2021-07-15_to_2021-07-18
start_date = '2021-07-16'
end_date = '2021-07-18'

grid_HCHO = process_image(start_date, end_date)
x = "HCHO_10km_grid"
y = start_date
t = "to"
z = end_date
output_name = "_".join([x, y, t, z])
aod_task = ee.batch.Export.table.toDrive(
    collection = grid_HCHO,
    folder = output_drive,
    description = output_name,
    fileFormat = "CSV",
    selectors = export_properties)
print(f"saving file as", {output_name})
aod_task.start()

saving file as {'HCHO_10km_grid_2021-07-16_to_2021-07-18'}


In [50]:
# HCHO_10km_grid_2022-07-18_to_2022-07-20, HCHO_10km_grid_2021-07-15_to_2021-07-17

In [None]:
# if you want to cancel tasks:
# https://code.earthengine.google.com/tasks