## Setup

In [97]:
import ee
ee.Authenticate()
ee.Initialize()

Enter verification code:  4/1AX4XfWiCvaHvQCvGUB5QeLx5la_zwBh3EQjuvek9R8tGDFLBdJVx8oyRtlE



Successfully saved authorization token.


In [100]:
import numpy as np
import geetools
from geetools import ui, cloud_mask
import os, datetime
import config as cf
import pandas as pd
import eeconvert
import time
import geopandas as gpd

cloud_mask_landsatSR = cloud_mask.landsatSR()
cloud_mask_sentinel2 = cloud_mask.sentinel2()

## Parameters

In [101]:
SURVEY_NAME = 'OPM'
REEXTRACT_IF_FILE_EXISTS = True

## Functions

In [107]:
# https://gis.stackexchange.com/questions/257727/iterate-over-imagecollection-returning-pandas-dataframe-using-earth-engine-pyt
def fc2df(fc):
    # Convert a FeatureCollection into a pandas DataFrame
    # Features is a list of dict with the output
    features = fc.getInfo()['features']

    dictarr = []

    for f in features:
        # Store all attributes in a dict
        attr = f['properties']
        # and treat geometry separately
        attr['geometry'] = f['geometry']  # GeoJSON Feature!
        # attr['geometrytype'] = f['geometry']['type']
        dictarr.append(attr)

    df = gpd.GeoDataFrame(dictarr)
    # Convert GeoJSON features to shape
    df = df.drop(columns=['geometry'])
    return df

def survey_to_fc_buffer(survey_df, buffer_size_urban, buffer_size_rural):
    '''
    Convert pandas dataframe of survey locations to a feature collection. 
    
    Inputs:
        survey_df: pandas dataframe of survey locations. Function assumes 
                   the dataframe contains (1) latitude, (2) longitude and
                   (3) uid variables. Assumes coordinates in WGS84.
    Returns:
        (feature collection)
    '''
    
    survey_fc_list = []
    
    n_rows = survey_df.shape[0]
    for i in range(0, n_rows):
        survey_df_i = survey_df.iloc[[i]]
        
        ur = survey_df_i['urban_rural'].iloc[0]
        if ur == 'U':
            buffer_size = buffer_size_urban
        elif ur == 'R':
            buffer_size = buffer_size_rural

        f_i = ee.Feature(ee.Geometry.Point([survey_df_i['longitude'].iloc[0], 
                                            survey_df_i['latitude'].iloc[0]]), 
                         {'uid': survey_df_i['uid'].iloc[0],
                          'year': str(survey_df_i['year'].iloc[0])})
        
        f_i = f_i.buffer(buffer_size)

        survey_fc_list.append(f_i)
        
    survey_fc = ee.FeatureCollection(survey_fc_list)
    
    return survey_fc

def extract_sat(survey_df, buffer_size_urban, buffer_size_rural, satellite, year, chunk, survey_name):
    '''
    Extract satellite imagery to locations 
    
    Inputs:
        survey_df: pandas dataframe of survey locations. Function assumes 
                   the dataframe contains (1) latitude, (2) longitude and
                   (3) uid variables. Assumes coordinates in WGS84.
    Returns:
        (feature collection)
    '''
    
    #print(survey_df.uid)
    
    year_start_sp5 = "2018-01-01"
    year_end_sp5 = '2020-12-31'
    
    # Prep l7 ---------------------------------------------------
    if satellite == 'worldpop':
        
        # Scale
        SCALE = 100 
        
        # Year
        year_use = year
        
        year_plus = year_use
        year_minus = year_use
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('WorldPop/GP/100m/pop')\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        # After the reducer computers the sum, it names the value "sum", not population
        BANDS = ['sum']
    
    # Prep l7 ---------------------------------------------------
    if satellite == 'l7':
        
        # Scale
        SCALE = 100 # ok to upscale
        
        # Year
        year_use = year
        
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('LANDSAT/LC07/C01/T1_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_landsatSR)\
            .median()\
            .multiply(0.0001)

        ndvi = image.normalizedDifference(['B4', 'B3']).rename('NDVI');
        image = image.addBands(ndvi)
        
        BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']
        
    # Sentinel-5P OFFL AER AI: Offline UV Aerosol Index  -------------------
    if satellite == 'uv_aer':
        
        # Scale
        SCALE = 1000 # ok to upscale
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_AER_AI")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['absorbing_aerosol_index']
        
    # Sentinel-5P OFFL CO: Offline Carbon Monoxide  -------------------
    if satellite == 'CO':
        
        # Scale
        SCALE = 1000 # ok to upscale
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_CO")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['CO_column_number_density', 'H2O_column_number_density']
        
    # Sentinel-5P OFFL HCHO: Offline Formaldehyde  -------------------
    if satellite == 'HCHO':
        
        # Scale
        SCALE = 1000 # ok to upscale
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_HCHO")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['tropospheric_HCHO_column_number_density', 'tropospheric_HCHO_column_number_density_amf']
        
    # Sentinel-5P Nitrogen Dioxide  -----------------------------
    if satellite == 'NO2':
        
        # Scale
        SCALE = 1000 # ok to upscale
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_NO2")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['NO2_column_number_density', 'tropospheric_NO2_column_number_density',\
                 'stratospheric_NO2_column_number_density', 'NO2_slant_column_number_density']
        
    # Sentinel-5P OFFL O3: Offline Ozone  -------------------
    if satellite == 'ozone':
        
        # Scale
        SCALE = 1000 # ok to upscale
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_O3")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['O3_column_number_density', 'O3_effective_temperature']
        
    # Sentinel-5P OFFL SO2: Offline Sulphur Dioxide  -------------------
    if satellite == 'SO2':
        
        # Scale
        SCALE = 1000 # ok to upscale
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_SO2")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['SO2_column_number_density', 'SO2_column_number_density_amf', 'SO2_slant_column_number_density']
        
    # Sentinel-5P OFFL CH4: Offline Methane  -------------------
    if satellite == 'CH4':
        
        # Scale
        SCALE = 1000 # ok to upscale
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_CH4")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['CH4_column_volume_mixing_ratio_dry_air']
        
    # CSP gHM: Global Human Modification ---------------------------------
    if satellite == 'GlobalHumanModification':
        
        # Scale
        SCALE = 1000 # ok to upscale
                
        image = ee.ImageCollection("CSP/HM/GlobalHumanModification")\
            .median()
        
        # Original name is "gHM", but because only one value, it takes the
        # name of the reducer; we use mean
        BANDS = ['mean']
        
    # WorldClim BIO Variables V1 ---------------------------------
    if satellite == 'worldclim_bio':
        
        # Scale
        SCALE = 1000 # ok to upscale
                
        image = ee.Image('WORLDCLIM/V1/BIO')
        
        BANDS = ['bio01', 'bio02', 'bio03', 'bio04', 'bio05', 'bio06', 'bio07', 'bio08', 'bio09', 'bio10',\
                 'bio11', 'bio12', 'bio13', 'bio14', 'bio15', 'bio16', 'bio17', 'bio18', 'bio19']
        
    # Elevation - SRTM ------------------------------------------
    if satellite == 'elevation':
        
        # Scale
        SCALE = 1000 # ok to upscale
                
        image = ee.Image('USGS/SRTMGL1_003') # CGIAR/SRTM90_V4
        
        # elevation?
        BANDS = ['mean']
        
    # Elevation - SRTM ------------------------------------------
    if satellite == 'slope':
        # https://developers.google.com/earth-engine/datasets/catalog/CGIAR_SRTM90_V4#description
        
        # Scale
        SCALE = 500 # ok to upscale
                
        image_raw = ee.Image('USGS/SRTMGL1_003') # CGIAR/SRTM90_V4
        image_elev = image_raw.select('elevation')
        image = ee.Terrain.slope(image_elev)
                
        # mean?
        BANDS = ['mean']
        
    # Prep l8 ---------------------------------------------------
    if satellite == 'l8':
        
        SCALE = 100 # ok to upscale
        #SCALE = 2000
        
        # Year
        # landsat 8 starts in April 2013; if year is less than
        # 2014, use 2014 as year (to ensure have year before and after)
        if year < 2014:
            year_use = 2014
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_landsatSR)\
            .median()\
            .multiply(0.0001)

        # https://www.linkedin.com/pulse/ndvi-ndbi-ndwi-calculation-using-landsat-7-8-tek-bahadur-kshetri
        ndvi = image.normalizedDifference(['B5', 'B4']).rename('NDVI');
        ndbi = image.normalizedDifference(['B6', 'B5']).rename('NDBI');
        image = image.addBands(ndvi)
        image = image.addBands(ndbi)
        
        bu = image.select('NDBI').subtract(image.select('NDVI')).rename('BU')
        image = image.addBands(bu)
        
        BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B10', 'B11', 'NDVI', 'NDBI', 'BU']
        #BANDS = ['NDVI']
        
    # Prep s2 ---------------------------------------------------
    if satellite == 's2':
        
        SCALE = 100 # ok to upscale
        
        # Year
        # sentinel starts in March 2017; juse use 2018
        year_use = 2018
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-12-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('COPERNICUS/S2_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_sentinel2)\
            .median()\
            .multiply(0.0001)
        
        ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI');
        image = image.addBands(ndvi)
        
        BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12', 'AOT', 'NDVI']

        image = image.select(BANDS) 
        
    # Prep drought ---------------------------------------------------
    if satellite == 'gridmet_drought':
        
        SCALE = 5000 

        year_minus_str = str(year) + '-01-01'
        year_plus_str = str(year) + '-12-31'
        
        image = ee.ImageCollection("GRIDMET/DROUGHT")\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        BANDS = ['pdsi', 'z', 'eddi1y', 'eddi2y', 'eddi5y']
    
    
    # Prep drought ---------------------------------------------------
    if satellite == 'ecmwf_weather':
        
        SCALE = 1000 

        year_minus_str = str(year) + '-01-01'
        year_plus_str = str(year) + '-12-31'
        
        image = ee.ImageCollection("ECMWF/ERA5/DAILY")\
            .filterDate(year_minus_str, year_plus_str)\
            .mean()
        
        BANDS = ['mean_2m_air_temperature', 
                 'minimum_2m_air_temperature', 
                 'maximum_2m_air_temperature',
                 'total_precipitation']
        
    # Prep drought ---------------------------------------------------
    if satellite == 'ecmwf_weather_q1':
        
        SCALE = 1000 

        year_minus_str = str(year) + '-01-01'
        year_plus_str = str(year) + '-03-31'
        
        image = ee.ImageCollection("ECMWF/ERA5/DAILY")\
            .filterDate(year_minus_str, year_plus_str)\
            .mean()
        
        BANDS = ['mean_2m_air_temperature', 
                 'minimum_2m_air_temperature', 
                 'maximum_2m_air_temperature',
                 'total_precipitation']
        
    # Prep drought ---------------------------------------------------
    if satellite == 'ecmwf_weather_q2':
        
        SCALE = 1000 

        year_minus_str = str(year) + '-04-01'
        year_plus_str = str(year) + '-06-30'
        
        image = ee.ImageCollection("ECMWF/ERA5/DAILY")\
            .filterDate(year_minus_str, year_plus_str)\
            .mean()
        
        BANDS = ['mean_2m_air_temperature', 
                 'minimum_2m_air_temperature', 
                 'maximum_2m_air_temperature',
                 'total_precipitation']
        
    # Prep drought ---------------------------------------------------
    if satellite == 'ecmwf_weather_q3':
        
        SCALE = 1000 

        year_minus_str = str(year) + '-07-01'
        year_plus_str = str(year) + '-09-30'
        
        image = ee.ImageCollection("ECMWF/ERA5/DAILY")\
            .filterDate(year_minus_str, year_plus_str)\
            .mean()
        
        BANDS = ['mean_2m_air_temperature', 
                 'minimum_2m_air_temperature', 
                 'maximum_2m_air_temperature',
                 'total_precipitation']
        
    # Prep drought ---------------------------------------------------
    if satellite == 'ecmwf_weather_q4':
        
        SCALE = 1000 

        year_minus_str = str(year) + '-10-01'
        year_plus_str = str(year) + '-12-31'
        
        image = ee.ImageCollection("ECMWF/ERA5/DAILY")\
            .filterDate(year_minus_str, year_plus_str)\
            .mean()
        
        BANDS = ['mean_2m_air_temperature', 
                 'minimum_2m_air_temperature', 
                 'maximum_2m_air_temperature',
                 'total_precipitation']
    
    # Prep viirs ---------------------------------------------------
    if satellite == 'viirs':
        
        SCALE = 500 
        
        # Year
        # VIIRS starts in April 2012; if year is less than
        # 2013, use 2013 as year (to ensure have year before and after)
        if year < 2013:
            year_use = 2013
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('NOAA/VIIRS/DNB/MONTHLY_V1/VCMCFG')\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        BANDS = ['avg_rad']
        
    # Prep DMSP ---------------------------------------------------
    if satellite == 'dmsp':
        
        SCALE = 1000 
        
        # Year
        # DMSP-OLS starts in 2013; if year is more than
        # 2012, use 2012 as year (to ensure have year before and after)
        if year > 2012:
            year_use = 2012
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('NOAA/DMSP-OLS/NIGHTTIME_LIGHTS')\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        BANDS = ['stable_lights', 'avg_lights_x_pct']
    
    # Prep Survey ---------------------------------------------------
    survey_fc = survey_to_fc_buffer(survey_df, buffer_size_urban, buffer_size_rural)
    
    # Extract Values ---------------------------------------------------
    if satellite == 'worldpop':
        vals = image.reduceRegions(collection = survey_fc,
                                   reducer = ee.Reducer.sum(),
                                   scale = SCALE,
                                   tileScale = 8)
    else:
        vals = image.reduceRegions(collection = survey_fc,
                                   reducer = ee.Reducer.mean(),
                                   scale = SCALE,
                                   tileScale = 8)

    # OLD =============
    # Survey dataset that only contains the uid variable
    #survey_df = survey_df[['uid']]
            
    #for band_i in BANDS:
    #    survey_df[satellite + '_' + band_i] = vals.aggregate_array(band_i).getInfo()
        
    # NEW =============
    #df_out = fc2df(vals)
    #print(df_out)
    #df_out = pd.DataFrame()
    
    bands_to_export = BANDS.copy()
    bands_to_export.append('uid')
    bands_to_export.append('year')
    #print(bands_to_export)
    
    task = ee.batch.Export.table.toDrive(collection=vals, 
                                         folder='satellite_data_from_gee_' + survey_name.lower(), 
                                         description=satellite + "_ubuff" + str(buffer_size_urban) + '_rbuff' + str(buffer_size_rural) + "_" + str(year) + '_' + str(chunk), 
                                         fileFormat='CSV',
                                         selectors = bands_to_export)
    # selectors=props
    task.start()
    #ee.batch.data.startProcessing(mytask.id, mytask.config)
    
    if False:
        time_elapsed = 0
        while task.active():
            if((time_elapsed % 60) == 0):
                print('Polling for task (id: {}).'.format(task.id))
            time.sleep(5)
            time_elapsed = time_elapsed + 5
        
    return task

def extract_satellite_in_chunks(survey_df, buffer_size_urban, buffer_size_rural, satellite, year, survey_name):
    
    vals_df_list = []
    
    for chunk_i in list(np.unique(survey_df.chunk_id)):
        #print(chunk_i)
        #time.sleep(5)

        survey_df_i = survey_df[survey_df['chunk_id'] == chunk_i]
        #print(survey_df_i.shape)
        vals_i_df = extract_sat(survey_df_i, buffer_size_urban, buffer_size_rural, satellite, year, chunk_i, survey_name)

        vals_df_list.append(vals_i_df)

    #vals_df = pd.concat(vals_df_list)
    
    return vals_df_list

def extract_satellite_by_year(survey_df, buffer_size_urban, buffer_size_rural, satellite, survey_name):
    
    vals_df_list = []
    
    for year_i in list(np.unique(survey_df.year)):
        #print(year_i)
        #time.sleep(5)

        survey_df_i = survey_df[survey_df['year'] == year_i]
        vals_i_df = extract_satellite_in_chunks(survey_df_i, buffer_size_urban, buffer_size_rural, satellite, year_i, survey_name)

        vals_df_list.append(vals_i_df)

    #vals_df = pd.concat(vals_df_list)
    
    return vals_df_list

def chunk_ids(total_length, chunk_size):
    n_numbers = np.ceil(total_length / chunk_size)
    n_numbers = int(n_numbers)
    
    chunk_ids = list(range(0,n_numbers)) * chunk_size
    chunk_ids.sort()
    chunk_ids = chunk_ids[:total_length]
    
    return chunk_ids

## Load/Prep Survey Data

In [108]:
survey_df = pd.read_csv(os.path.join(cf.DROPBOX_DIRECTORY, 'Data', SURVEY_NAME, 'FinalData', 'Individual Datasets', 'survey_socioeconomic.csv'))
survey_df = survey_df[['uid', 'year', 'urban_rural', 'latitude', 'longitude']]
survey_df = survey_df.sort_values('year')
#survey_df = survey_df[survey_df.uid != 'IA201400180012']

CHUNK_SIZE = 5000
survey_years = list(survey_df.year.unique())
survey_df['chunk_id'] = chunk_ids(survey_df.shape[0], CHUNK_SIZE)

In [109]:
print(survey_df.head())
print(survey_df.shape)

                  uid  year urban_rural   latitude  longitude  chunk_id
0       1_PAK.7.7.1_1  2011           U  33.300222  71.919333         0
960   320_PAK.2.2.1_1  2011           U  25.388972  64.321778         0
963   320_PAK.8.1.4_1  2011           U  25.388083  68.319667         0
2186   61_PAK.7.8.1_1  2011           R  31.714861  71.041139         0
967   321_PAK.8.1.3_1  2011           U  25.377028  68.373583         0
(2383, 6)


## If re-extract, delete existing files

In [110]:
if REEXTRACT_IF_FILE_EXISTS:
    print("Deleting existing files from Google Drive")

    ## Path with files
    OUT_PATH = os.path.join(cf.GOOGLEDRIVE_DIRECTORY, 
                            'Data', 
                             SURVEY_NAME, 
                             'FinalData', 
                             'Individual Datasets',
                             'satellite_data_from_gee_' + SURVEY_NAME.lower())

    ## Grab csv files
    files_to_rm = [x for x in os.listdir(OUT_PATH) if '.csv' in x]

    ## Delete files
    for file_i in files_to_rm:

        path_i = os.path.join(OUT_PATH, file_i)
        os.remove(path_i)

Deleting existing files


## List of files already extracted

In [111]:
# Cleaned files
DB_DATA_PATH = os.path.join(cf.DROPBOX_DIRECTORY, 'Data', 
                            SURVEY_NAME, 'FinalData', 
                            'Individual Datasets', 'satellite_data_from_gee')


files_already_extracted = [x.replace('.Rds', '') for x in os.listdir(DB_DATA_PATH)]
files_already_extracted

['viirs_ubuff5000_rbuff5000',
 'viirs_ubuff2000_rbuff2000',
 'ecmwf_weather_ubuff10000_rbuff10000',
 'ecmwf_weather_q2_ubuff10000_rbuff10000',
 'worldpop_ubuff10000_rbuff10000',
 'NO2_ubuff2500_rbuff2500',
 'uv_aer_ubuff2500_rbuff2500',
 'HCHO_ubuff2500_rbuff2500',
 'l8_ubuff2500_rbuff2500',
 'CH4_ubuff2500_rbuff2500',
 'ecmwf_weather_q1_ubuff10000_rbuff10000',
 'elevation_ubuff5000_rbuff5000',
 'SO2_ubuff2500_rbuff2500',
 'slope_ubuff5000_rbuff5000',
 'viirs_ubuff2500_rbuff2500',
 'GlobalHumanModification_ubuff10000_rbuff10000',
 'ecmwf_weather_q3_ubuff10000_rbuff10000',
 'ozone_ubuff2500_rbuff2500',
 'CO_ubuff2500_rbuff2500',
 'ecmwf_weather_q4_ubuff10000_rbuff10000']

## Extract Values

In [112]:
to_extract = ['elevation', 
              'slope',
              'viirs_2000',
              'viirs_2500',
              'viirs_5000',
              'GlobalHumanModification',
              'worldpop',
              'l8',
              'ecmwf_weather',
              'ecmwf_weather_q1',
              'ecmwf_weather_q2',
              'ecmwf_weather_q3',
              'ecmwf_weather_q4',
              'NO2', 'uv_aer', 'CO', 'HCHO', 'ozone', 'SO2', 'CH4']

tasks_all = []
for sat in to_extract:
    print(sat)
        
    if sat in ['NO2', 'uv_aer', 'CO', 'HCHO', 'ozone', 'SO2', 'CH4', 'l8']:
        buffer_u = 2500
        buffer_r = 2500
    
    if sat in ['elevation', 'slope']:
        buffer_u = 5000
        buffer_r = 5000
        
    if sat in ['GlobalHumanModification', 
               'ecmwf_weather',
               'ecmwf_weather_q1', 'ecmwf_weather_q2', 'ecmwf_weather_q3', 'ecmwf_weather_q4']:
        buffer_u = 10000
        buffer_r = 10000
                
    if sat == 'viirs_2000':
        sat = 'viirs'
        buffer_u = 2000
        buffer_r = 2000
        
    if sat == 'viirs_2500':
        sat = 'viirs'
        buffer_u = 2500
        buffer_r = 2500
        
    if sat == 'viirs_5000':
        sat = 'viirs'
        buffer_u = 5000
        buffer_r = 5000
                
    file_root = sat + '_ubuff' + str(buffer_u) + '_rbuff' + str(buffer_r)
    
    # Check if should extract data
    if (file_root not in files_already_extracted) | REEXTRACT_IF_FILE_EXISTS:
        print(file_root)
        
        tasks_i = extract_satellite_by_year(survey_df, buffer_u, buffer_r, sat, SURVEY_NAME)
        tasks_all.append(tasks_i)

elevation
elevation_ubuff5000_rbuff5000
slope
slope_ubuff5000_rbuff5000
viirs_2000
viirs_ubuff2000_rbuff2000
viirs_2500
viirs_ubuff2500_rbuff2500
viirs_5000
viirs_ubuff5000_rbuff5000
GlobalHumanModification
GlobalHumanModification_ubuff10000_rbuff10000
worldpop
worldpop_ubuff10000_rbuff10000
l8
l8_ubuff2500_rbuff2500
ecmwf_weather
ecmwf_weather_ubuff10000_rbuff10000
ecmwf_weather_q1
ecmwf_weather_q1_ubuff10000_rbuff10000
ecmwf_weather_q2
ecmwf_weather_q2_ubuff10000_rbuff10000
ecmwf_weather_q3
ecmwf_weather_q3_ubuff10000_rbuff10000
ecmwf_weather_q4
ecmwf_weather_q4_ubuff10000_rbuff10000
NO2
NO2_ubuff2500_rbuff2500
uv_aer
uv_aer_ubuff2500_rbuff2500
CO
CO_ubuff2500_rbuff2500
HCHO
HCHO_ubuff2500_rbuff2500
ozone
ozone_ubuff2500_rbuff2500
SO2
SO2_ubuff2500_rbuff2500
CH4
CH4_ubuff2500_rbuff2500


## Check status

In [113]:
## Failed Tasks
for task_list in tasks_all:
    for task_i in task_list:
        
        task_i_status = task_i[0].status()
        if task_i_status['state'] == 'FAILED':
            print(task_i[0].status())
            print(" ")

In [115]:
## Tasks not started
for task_list in tasks_all:
    for task_i in task_list:
        
        task_i_status = task_i[0].status()
        if task_i_status['state'] == 'READY':
            print(task_i[0])

<Task SFTLOAB5GQI2IXWDA6RG2XPT EXPORT_FEATURES: elevation_ubuff5000_rbuff5000_2011_0 (UNSUBMITTED)>
<Task JTPMVAC3EWVJC5BFXVFNM5PD EXPORT_FEATURES: elevation_ubuff5000_rbuff5000_2013_0 (UNSUBMITTED)>
<Task PICA5ARKS2EFQASBYSIMZ3VX EXPORT_FEATURES: elevation_ubuff5000_rbuff5000_2014_0 (UNSUBMITTED)>
<Task Y2WW6T3NUAGCA4CSDQA3R6P4 EXPORT_FEATURES: elevation_ubuff5000_rbuff5000_2016_0 (UNSUBMITTED)>
<Task O4PKCPYTIRZTLG7L5R4T3CZW EXPORT_FEATURES: slope_ubuff5000_rbuff5000_2011_0 (UNSUBMITTED)>
<Task AZ76CP5QWZDPNF4FF3PFNFWE EXPORT_FEATURES: slope_ubuff5000_rbuff5000_2013_0 (UNSUBMITTED)>
<Task BVFDML4GT2E4L7MEI2CB5VXT EXPORT_FEATURES: slope_ubuff5000_rbuff5000_2014_0 (UNSUBMITTED)>
<Task OPDB26JJNL7T2YGEZETXEQYH EXPORT_FEATURES: slope_ubuff5000_rbuff5000_2016_0 (UNSUBMITTED)>
<Task QL2TICVOXWLKJVOTQPD6JIS7 EXPORT_FEATURES: viirs_ubuff2000_rbuff2000_2011_0 (UNSUBMITTED)>
<Task YCPI5UPRRK3RYLMGKKPAJ2IS EXPORT_FEATURES: viirs_ubuff2000_rbuff2000_2013_0 (UNSUBMITTED)>
<Task SJPUW4SO4EFZ3KXULH

In [116]:
## Tasks completed
for task_list in tasks_all:
    for task_i in task_list:
        
        task_i_status = task_i[0].status()
        if task_i_status['state'] == 'COMPLETED':
            print(task_i[0])

In [129]:
## See all tasks
for task_list in tasks_all:
    for task_i in task_list:
        
        print(task_i[0].status()['state'])


READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY
READY


## Cancel Tasks

In [71]:
if True:
    for task_list in tasks_all:
        for task_i in task_list:

            task_i[0].cancel()