In [13]:
import glob
import os
import re
import time
import random

import ee
import pandas as pd

#ee.Authenticate()
ee.Initialize()

In [7]:
year = '2021'

sites = ee.FeatureCollection("projects/gef-ld-toolbox/assets/impact_sites_2021_round2")

# Adds species polygons for all IUCN species
t1 = ee.FeatureCollection("projects/ci_geospatial_assets/iucn_ranges/iucn_all_simple_20191210_01")
t2 = ee.FeatureCollection("projects/ci_geospatial_assets/iucn_ranges/iucn_all_simple_20191210_02")
t3 = ee.FeatureCollection("projects/ci_geospatial_assets/iucn_ranges/iucn_all_simple_20191210_03")
t4 = ee.FeatureCollection("projects/ci_geospatial_assets/iucn_ranges/iucn_all_simple_20191210_04")
t5 = ee.FeatureCollection("projects/ci_geospatial_assets/iucn_ranges/iucn_all_simple_20191210_05")
t6 = ee.FeatureCollection("projects/ci_geospatial_assets/iucn_ranges/iucn_all_simple_20191210_06")
t7 = ee.FeatureCollection("projects/ci_geospatial_assets/iucn_ranges/iucn_all_simple_20191210_07")
t8 = ee.FeatureCollection("projects/ci_geospatial_assets/iucn_ranges/iucn_all_simple_20191210_08")
t9 = ee.FeatureCollection("projects/ci_geospatial_assets/iucn_ranges/iucn_all_simple_20191210_09")
t10 = ee.FeatureCollection("projects/ci_geospatial_assets/iucn_ranges/iucn_all_simple_20191210_10")
t11 = ee.FeatureCollection("projects/ci_geospatial_assets/iucn_ranges/iucn_all_simple_20191210_11")

all_species = t1.merge(t2).merge(t3).merge(t4).merge(t5).merge(t6).merge(t7).merge(t8).merge(t9).merge(t10).merge(t11)

In [8]:
site_IDs = [feature['properties']['id'] for feature in sites.getInfo()['features']]

In [9]:
def get_species_filtered(species, sites, site_id, filter_category=True):
    filter_cats = ["CR","EN","VU"]
    if filter_category:
        species = species.filter(ee.Filter.inList('category', filter_cats))
        suffix = 'CR-EN-VU'
    else:
        species = species.filter(ee.Filter.inList('category', filter_cats).Not())
        suffix = 'other'

    species = species.filterBounds(sites.filter(ee.Filter.eq("id", site_id)).geometry()).distinct("binomial")
    t = ee.batch.Export.table.toCloudStorage(collection=species,
                                             description="exportSpecies_{}_{}.csv".format(site_id, suffix),
                                             bucket="trendsearth",
                                             fileNamePrefix='species_data/species_{}_{}'.format(site_id, suffix),
                                             fileFormat='CSV',
                                             selectors=['binomial', 'category', 'kingdom', 'phylum', 'class', 'family', 'iucn_group'])
    t.start()
    return t

def get_species(species, sites, site_id):
    species = species.filterBounds(sites.filter(ee.Filter.eq("id", site_id)).geometry()).distinct("binomial")
    t = ee.batch.Export.table.toCloudStorage(collection=species,
                                             description="exportSpecies_{}.csv".format(site_id),
                                             bucket="trendsearth",
                                             fileNamePrefix='species_data/species_{}'.format(site_id),
                                             fileFormat='CSV',
                                             selectors=['binomial', 'category', 'kingdom', 'phylum', 'class', 'family', 'iucn_group'])
    t.start()
    return t

In [10]:
# How many sites?
print(len(site_IDs))

1478


# Submit tasks to GEE

In [None]:
t = []
n = 0
for site_ID in site_IDs:
    t.append(get_species_filtered(all_species, sites, site_ID))
    # sleep after each submission, and then periodically sleep longer so
    # gee doesn't get upset
    time.sleep(random.random()*3)
    n += 1
    if n % 300 == 0:
        sec = random.random()*1800
        print('Finished submitting {} tasks. Sleeping for {} seconds...'.format(n, sec))
        time.sleep(sec)

Finished submitting 300 tasks. Sleeping for 538.7844349046926 seconds...
Finished submitting 600 tasks. Sleeping for 1701.2933049372084 seconds...
Finished submitting 900 tasks. Sleeping for 621.3375657244209 seconds...
Finished submitting 1200 tasks. Sleeping for 1340.2839616963897 seconds...


In [None]:
stats = {}
for t in ee.batch.Task.list():
    if t.state in stats:
        stats[t.state] += 1
    else:
        stats[t.state] = 0
print(stats)

In [None]:
# Assemble data into a single CSV after it is downloaded

In [23]:
suffix = 'CR-EN-VU'
sp_data_path = os.path.join('D:/', 'Data', 'Impacts_Data', 'species')
li = []
for f in [f for f in os.listdir(sp_data_path) if re.search(r'^species_[A-Za-z0-9]*', f)]:
    df = pd.read_csv(os.path.join(sp_data_path, f), index_col=None, header=0)
    df['site_id'] = re.search(r'^species_([A-Za-z0-9]*)', f).group(1)
    li.append(df)
frame = pd.concat(li, axis=0, ignore_index=True)
frame.to_csv(os.path.join(sp_data_path, f'species_all_sites_{suffix}.csv'), index=False)