In [None]:
import ee
from datetime import datetime
import time
import pandas as pd

folder = "Basin Differentiation"

ee.Authenticate()
ee.Initialize(project='new-madrid-displacement')

In [None]:
import json

hydrobasins = ee.FeatureCollection("WWF/HydroSHEDS/v1/Basins/hybas_7")

In [None]:
start = '1980-01-01'
end = '2023-01-01'
era5 = ee.ImageCollection('ECMWF/ERA5_LAND/DAILY_AGGR').filterDate(start, end)


def convertImageFactory(geometry):
    def convertImages(image):
        image = image.select('total_precipitation_sum',
                            'snowfall_sum',
                            'temperature_2m_min',
                            'temperature_2m_max',
                            'surface_net_solar_radiation_sum',
                            'surface_net_thermal_radiation_sum',
                            'surface_pressure')

        stats = image.reduceRegion(
            reducer=ee.Reducer.mean(),
            geometry=geometry,
            maxPixels=1e13
        )
        stats = ee.Feature(None, stats).set('date', image.date().format())
        return stats

    return convertImages


basinsStringed = hydrobasins.map(lambda f: f.set('HYBAS_ID', ee.String(f.get('HYBAS_ID'))))
northAmericaBasins = basinsStringed.filter(ee.Filter.Or(ee.Filter.stringStartsWith('HYBAS_ID', '7'), ee.Filter.stringStartsWith('HYBAS_ID', '8')))
basinIDs = northAmericaBasins.aggregate_array('PFAF_ID').distinct().sort().getInfo()
basinType = type(basinIDs[0])

from google.colab import drive
drive.mount('/content/drive', force_remount=True)

import os
os.chdir(f'/content/drive/MyDrive/{folder}/')
completedIndex = [fileName.split("_")[-1].split(".")[0] for fileName in os.listdir(".")]

basinIDs = [str(id) for id in basinIDs]
unfinishedIDs = list(set(basinIDs) - set(completedIndex))
basinIDs = [basinType(id) for id in unfinishedIDs]


for i, basinID in enumerate(basinIDs):
    geometry = hydrobasins.filter(ee.Filter.eq('PFAF_ID', ee.Number(basinID))).union(1).first().geometry()
    dataset = era5.filterBounds(geometry)

    imageMap = convertImageFactory(geometry)

    timeSeries = ee.FeatureCollection(dataset.map(imageMap))

    task = ee.batch.Export.table.toDrive(
        collection=timeSeries,
        description=f'Basin_AreaWeighted_TS_{basinID}',
        folder=folder,
        fileFormat='CSV'
    )
    task.start()

    # while task.status()['state'] != 'COMPLETED':
    #     time.sleep(2)

    if i > 2000:
        break

    print(f"\r{i + 1} / {len(basinIDs)} Started", end="")




Mounted at /content/drive
924 / 924 Started