# Extract Satellite Imagery to Survey Locations

TODO:
1. Second pass. If timeout error, go to that year-chunk and break into further, much smaller chunks, then loop over those.

## Setup

In [1]:
import ee
ee.Authenticate()
ee.Initialize()

Enter verification code:  4/1AX4XfWhqR6fP8ASTnUgt2KPwcsG12uknSS1VzkHhhEsnte4ukk_ZiTset1g



Successfully saved authorization token.


In [3]:
import numpy as np
import geetools
from geetools import ui, cloud_mask
import os, datetime
import config as cf
import pandas as pd
import eeconvert
import time
import geopandas as gpd

cloud_mask_landsatSR = cloud_mask.landsatSR()
cloud_mask_sentinel2 = cloud_mask.sentinel2()

AttributeError: module 'pandas' has no attribute 'Panel'

## Parameters

In [None]:
SURVEY_NAME = 'DHS' # 'DHS', 'PAK_POINTS'
REEXTRACT_IF_FILE_EXISTS = False

## Functions

In [52]:
# https://gis.stackexchange.com/questions/257727/iterate-over-imagecollection-returning-pandas-dataframe-using-earth-engine-pyt
def fc2df(fc):
    # Convert a FeatureCollection into a pandas DataFrame
    # Features is a list of dict with the output
    features = fc.getInfo()['features']

    dictarr = []

    for f in features:
        # Store all attributes in a dict
        attr = f['properties']
        # and treat geometry separately
        attr['geometry'] = f['geometry']  # GeoJSON Feature!
        # attr['geometrytype'] = f['geometry']['type']
        dictarr.append(attr)

    df = gpd.GeoDataFrame(dictarr)
    # Convert GeoJSON features to shape
    df = df.drop(columns=['geometry'])
    return df

def survey_to_fc_buffer(survey_df, buffer_size_urban, buffer_size_rural):
    '''
    Convert pandas dataframe of survey locations to a feature collection. 
    
    Inputs:
        survey_df: pandas dataframe of survey locations. Function assumes 
                   the dataframe contains (1) latitude, (2) longitude and
                   (3) uid variables. Assumes coordinates in WGS84.
    Returns:
        (feature collection)
    '''
    
    survey_fc_list = []
    
    n_rows = survey_df.shape[0]
    for i in range(0, n_rows):
        survey_df_i = survey_df.iloc[[i]]
        
        #ur = survey_df_i['urban_rural'].iloc[0]
        #if ur == 'U':
        #    buffer_size = buffer_size_urban
        #elif ur == 'R':
        #    buffer_size = buffer_size_rural
        buffer_size = buffer_size_urban

        f_i = ee.Feature(ee.Geometry.Point([survey_df_i['longitude'].iloc[0], 
                                            survey_df_i['latitude'].iloc[0]]), 
                         {'uid': survey_df_i['uid'].iloc[0],
                          'year': str(survey_df_i['year'].iloc[0])})
        
        f_i = f_i.buffer(buffer_size)

        survey_fc_list.append(f_i)
        
    survey_fc = ee.FeatureCollection(survey_fc_list)
    
    return survey_fc

def extract_sat(survey_df, buffer_size_urban, buffer_size_rural, year, satellite, survey_name, file_name):
    '''
    Extract satellite imagery to locations 
    
    Inputs:
        survey_df: pandas dataframe of survey locations. Function assumes 
                   the dataframe contains (1) latitude, (2) longitude and
                   (3) uid variables. Assumes coordinates in WGS84.
    Returns:
        (feature collection)
    '''
        
    year_start_sp5 = "2018-01-01"
    year_end_sp5 = '2020-12-31'
    
    # Prep worldpop -----------------------------------------------
    if satellite == 'worldpop':
        
        # Scale
        SCALE = 100 
        
        # Year
        year_use = year
        
        year_plus = year_use
        year_minus = year_use
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('WorldPop/GP/100m/pop')\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        # After the reducer computers the sum, it names the value "sum", not population
        BANDS = ['sum']
        
    # Prep worldpop_2020 ---------------------------------------------
    if satellite == 'worldpop2020':
        
        # Scale
        SCALE = 100 
        
        # Year
        year_use = '2020'
        
        year_plus = year_use
        year_minus = year_use
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('WorldPop/GP/100m/pop')\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        # After the reducer computers the sum, it names the value "sum", not population
        BANDS = ['sum']
    
    # Prep l7 ---------------------------------------------------
    if satellite == 'l7':
        
        # Scale
        SCALE = 100 # ok to upscale
        
        # Year
        year_use = year
        
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('LANDSAT/LC07/C01/T1_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_landsatSR)\
            .median()\
            .multiply(0.0001)

        ndvi = image.normalizedDifference(['B4', 'B3']).rename('NDVI');
        image = image.addBands(ndvi)
        
        BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']
        
    # Sentinel-5P OFFL AER AI: Offline UV Aerosol Index  -------------------
    if satellite == 'uv_aer':
        
        # Scale
        SCALE = 1000 # ok to upscale
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_AER_AI")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['absorbing_aerosol_index']
        
    # Sentinel-5P OFFL CO: Offline Carbon Monoxide  -------------------
    if satellite == 'CO':
        
        # Scale
        SCALE = 1000 # ok to upscale
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_CO")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['CO_column_number_density', 'H2O_column_number_density']
        
    # Sentinel-5P OFFL HCHO: Offline Formaldehyde  -------------------
    if satellite == 'HCHO':
        
        # Scale
        SCALE = 1000 # ok to upscale
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_HCHO")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['tropospheric_HCHO_column_number_density', 'tropospheric_HCHO_column_number_density_amf']
        
    # Sentinel-5P Nitrogen Dioxide  -----------------------------
    if satellite == 'NO2':
        
        # Scale
        SCALE = 1000 # ok to upscale
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_NO2")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['NO2_column_number_density', 'tropospheric_NO2_column_number_density',\
                 'stratospheric_NO2_column_number_density', 'NO2_slant_column_number_density']
        
    # Sentinel-5P OFFL O3: Offline Ozone  -------------------
    if satellite == 'ozone':
        
        # Scale
        SCALE = 1000 # ok to upscale
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_O3")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['O3_column_number_density', 'O3_effective_temperature']
        
    # Sentinel-5P OFFL SO2: Offline Sulphur Dioxide  -------------------
    if satellite == 'SO2':
        
        # Scale
        SCALE = 1000 # ok to upscale
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_SO2")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['SO2_column_number_density', 'SO2_column_number_density_amf', 'SO2_slant_column_number_density']
        
    # Sentinel-5P OFFL CH4: Offline Methane  -------------------
    if satellite == 'CH4':
        
        # Scale
        SCALE = 1000 # ok to upscale
        
        # Starts in 2018; take all years
        image = ee.ImageCollection("COPERNICUS/S5P/OFFL/L3_CH4")\
            .filterDate(year_start_sp5, year_end_sp5)\
            .median()
        
        BANDS = ['CH4_column_volume_mixing_ratio_dry_air']
        
    # CSP gHM: Global Human Modification ---------------------------------
    if satellite == 'GlobalHumanModification':
        
        # Scale
        SCALE = 1000 # ok to upscale
                
        image = ee.ImageCollection("CSP/HM/GlobalHumanModification")\
            .median()
        
        # Original name is "gHM", but because only one value, it takes the
        # name of the reducer; we use mean
        BANDS = ['mean']
        
    # WorldClim BIO Variables V1 ---------------------------------
    if satellite == 'worldclim_bio':
        
        # Scale
        SCALE = 1000 # ok to upscale
                
        image = ee.Image('WORLDCLIM/V1/BIO')
        
        BANDS = ['bio01', 'bio02', 'bio03', 'bio04', 'bio05', 'bio06', 'bio07', 'bio08', 'bio09', 'bio10',\
                 'bio11', 'bio12', 'bio13', 'bio14', 'bio15', 'bio16', 'bio17', 'bio18', 'bio19']
        
    # Elevation - SRTM ------------------------------------------
    if satellite == 'elevation':
        
        # Scale
        SCALE = 1000 # ok to upscale
                
        image = ee.Image('USGS/SRTMGL1_003') # CGIAR/SRTM90_V4
        
        # elevation?
        BANDS = ['mean']
        
    # Elevation - SRTM ------------------------------------------
    if satellite == 'slope':
        # https://developers.google.com/earth-engine/datasets/catalog/CGIAR_SRTM90_V4#description
        
        # Scale
        SCALE = 500 # ok to upscale
                
        image_raw = ee.Image('USGS/SRTMGL1_003') # CGIAR/SRTM90_V4
        image_elev = image_raw.select('elevation')
        image = ee.Terrain.slope(image_elev)
                
        # mean?
        BANDS = ['mean']
        
    # Prep l8 ---------------------------------------------------
    if satellite == 'l8':
        
        SCALE = 100 # ok to upscale
        #SCALE = 2000
        
        # Year
        # landsat 8 starts in April 2013; if year is less than
        # 2014, use 2014 as year (to ensure have year before and after)
        if year < 2014:
            year_use = 2014
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_landsatSR)\
            .median()\
            .multiply(0.0001)

        # https://www.linkedin.com/pulse/ndvi-ndbi-ndwi-calculation-using-landsat-7-8-tek-bahadur-kshetri
        ndvi = image.normalizedDifference(['B5', 'B4']).rename('NDVI');
        ndbi = image.normalizedDifference(['B6', 'B5']).rename('NDBI');
        image = image.addBands(ndvi)
        image = image.addBands(ndbi)
        
        bu = image.select('NDBI').subtract(image.select('NDVI')).rename('BU')
        image = image.addBands(bu)
        
        BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B10', 'B11', 'NDVI', 'NDBI', 'BU']
        #BANDS = ['NDVI']
        
    # Prep s2 ---------------------------------------------------
    if satellite == 's2':
        
        SCALE = 100 # ok to upscale
        
        # Year
        # sentinel starts in March 2017; juse use 2018
        year_use = 2018
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-12-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('COPERNICUS/S2_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_sentinel2)\
            .median()\
            .multiply(0.0001)
        
        ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI');
        image = image.addBands(ndvi)
        
        BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12', 'AOT', 'NDVI']

        image = image.select(BANDS) 
        
    # Prep drought ---------------------------------------------------
    if satellite == 'gridmet_drought':
        
        SCALE = 5000 

        year_minus_str = str(year) + '-01-01'
        year_plus_str = str(year) + '-12-31'
        
        image = ee.ImageCollection("GRIDMET/DROUGHT")\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        BANDS = ['pdsi', 'z', 'eddi1y', 'eddi2y', 'eddi5y']
        
    # Prep AOD ------------------------------------------------------
    if satellite == 'aod':
        
        SCALE = 1000 

        year_minus_str = str(year) + '-01-01'
        year_plus_str = str(year) + '-12-31'
        
        image = ee.ImageCollection("MODIS/006/MCD19A2_GRANULES")\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        BANDS = ['Optical_Depth_047', 'Optical_Depth_055']
    
    # Prep drought ---------------------------------------------------
    if satellite == 'ecmwf_weather':
        
        SCALE = 1000 

        year_minus_str = str(year) + '-01-01'
        year_plus_str = str(year) + '-12-31'
        
        image = ee.ImageCollection("ECMWF/ERA5/DAILY")\
            .filterDate(year_minus_str, year_plus_str)\
            .mean()
        
        BANDS = ['mean_2m_air_temperature', 
                 'minimum_2m_air_temperature', 
                 'maximum_2m_air_temperature',
                 'total_precipitation']
        
    # Prep drought ---------------------------------------------------
    if satellite == 'ecmwf_weather_q1':
        
        SCALE = 1000 

        year_minus_str = str(year) + '-01-01'
        year_plus_str = str(year) + '-03-31'
        
        image = ee.ImageCollection("ECMWF/ERA5/DAILY")\
            .filterDate(year_minus_str, year_plus_str)\
            .mean()
        
        BANDS = ['mean_2m_air_temperature', 
                 'minimum_2m_air_temperature', 
                 'maximum_2m_air_temperature',
                 'total_precipitation']
        
    # Prep drought ---------------------------------------------------
    if satellite == 'ecmwf_weather_q2':
        
        SCALE = 1000 

        year_minus_str = str(year) + '-04-01'
        year_plus_str = str(year) + '-06-30'
        
        image = ee.ImageCollection("ECMWF/ERA5/DAILY")\
            .filterDate(year_minus_str, year_plus_str)\
            .mean()
        
        BANDS = ['mean_2m_air_temperature', 
                 'minimum_2m_air_temperature', 
                 'maximum_2m_air_temperature',
                 'total_precipitation']
        
    # Prep drought ---------------------------------------------------
    if satellite == 'ecmwf_weather_q3':
        
        SCALE = 1000 

        year_minus_str = str(year) + '-07-01'
        year_plus_str = str(year) + '-09-30'
        
        image = ee.ImageCollection("ECMWF/ERA5/DAILY")\
            .filterDate(year_minus_str, year_plus_str)\
            .mean()
        
        BANDS = ['mean_2m_air_temperature', 
                 'minimum_2m_air_temperature', 
                 'maximum_2m_air_temperature',
                 'total_precipitation']
        
    # Prep drought ---------------------------------------------------
    if satellite == 'ecmwf_weather_q4':
        
        SCALE = 1000 

        year_minus_str = str(year) + '-10-01'
        year_plus_str = str(year) + '-12-31'
        
        image = ee.ImageCollection("ECMWF/ERA5/DAILY")\
            .filterDate(year_minus_str, year_plus_str)\
            .mean()
        
        BANDS = ['mean_2m_air_temperature', 
                 'minimum_2m_air_temperature', 
                 'maximum_2m_air_temperature',
                 'total_precipitation']
    
    # Prep viirs ---------------------------------------------------
    if satellite == 'viirs':
        
        SCALE = 500 
        
        # Year
        # VIIRS starts in April 2012; if year is less than
        # 2013, use 2013 as year (to ensure have year before and after)
        if year < 2013:
            year_use = 2013
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('NOAA/VIIRS/DNB/MONTHLY_V1/VCMCFG')\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        BANDS = ['avg_rad']
        
    # Prep viirs181920 ---------------------------------------------------
    if satellite == 'viirs181920':
        
        SCALE = 500 
        
        # Year
        # VIIRS starts in April 2012; if year is less than
        # 2013, use 2013 as year (to ensure have year before and after)
        year_use = 2019
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('NOAA/VIIRS/DNB/MONTHLY_V1/VCMCFG')\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        BANDS = ['avg_rad']
        
    # Prep DMSP ---------------------------------------------------
    if satellite == 'dmsp':
        
        SCALE = 1000 
        
        # Year
        # DMSP-OLS starts in 2013; if year is more than
        # 2012, use 2012 as year (to ensure have year before and after)
        if year > 2012:
            year_use = 2012
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('NOAA/DMSP-OLS/NIGHTTIME_LIGHTS')\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        BANDS = ['stable_lights', 'avg_lights_x_pct']
    
    # Prep Survey ---------------------------------------------------
    survey_fc = survey_to_fc_buffer(survey_df, buffer_size_urban, buffer_size_rural)
        
    # Extract Values ---------------------------------------------------
    if (satellite == 'worldpop') | (satellite == 'worldpop2020'):
        vals = image.reduceRegions(collection = survey_fc,
                                   reducer = ee.Reducer.sum(),
                                   scale = SCALE,
                                   tileScale = 8)
    else:
        vals = image.reduceRegions(collection = survey_fc,
                                   reducer = ee.Reducer.mean(),
                                   scale = SCALE,
                                   tileScale = 8)

    # OLD =============
    # Survey dataset that only contains the uid variable
    #survey_df = survey_df[['uid']]
            
    #for band_i in BANDS:
    #    survey_df[satellite + '_' + band_i] = vals.aggregate_array(band_i).getInfo()
        
    # NEW =============
    #df_out = fc2df(vals)
    #print(df_out)
    #df_out = pd.DataFrame()
    
    bands_to_export = BANDS.copy()
    bands_to_export.append('uid')
    bands_to_export.append('year')
    #print(bands_to_export)
    
    task = ee.batch.Export.table.toDrive(collection=vals, 
                                         folder='satellite_data_from_gee_' + survey_name.lower(), 
                                         description=file_name, 
                                         fileFormat='CSV',
                                         selectors = bands_to_export)
    # selectors=props
    task.start()
    #ee.batch.data.startProcessing(mytask.id, mytask.config)
    
    if False:
        time_elapsed = 0
        while task.active():
            if((time_elapsed % 60) == 0):
                print('Polling for task (id: {}).'.format(task.id))
            time.sleep(5)
            time_elapsed = time_elapsed + 5
        
    return task

def extract_satellite_in_chunks(survey_df, buffer_size_urban, buffer_size_rural, satellite, file_name, year, survey_name):
    
    vals_df_list = []
    
    for chunk_i in list(np.unique(survey_df.chunk_id)):

        survey_df_i = survey_df[survey_df['chunk_id'] == chunk_i]
        vals_i_df = extract_sat(survey_df_i, buffer_size_urban, buffer_size_rural, satellite, file_name, year, chunk_i, survey_name)

        vals_df_list.append(vals_i_df)
    
    return vals_df_list

def extract_satellite_by_year(survey_df, buffer_size_urban, buffer_size_rural, satellite, file_name, survey_name):
    
    vals_df_list = []
    
    for year_i in list(np.unique(survey_df.year)):

        survey_df_i = survey_df[survey_df['year'] == year_i]
        vals_i_df = extract_satellite_in_chunks(survey_df_i, buffer_size_urban, buffer_size_rural, satellite, file_name, year_i, survey_name)

        vals_df_list.append(vals_i_df)
    
    return vals_df_list

def chunk_ids(total_length, chunk_size):
    n_numbers = np.ceil(total_length / chunk_size)
    n_numbers = int(n_numbers)
    
    chunk_ids = list(range(0,n_numbers)) * chunk_size
    chunk_ids.sort()
    chunk_ids = chunk_ids[:total_length]
    
    return chunk_ids

## Load/Prep Survey Data

In [53]:
survey_df = pd.read_csv(os.path.join(cf.DROPBOX_DIRECTORY, 'Data', SURVEY_NAME, 'FinalData', 'Individual Datasets', 'survey_socioeconomic.csv'))
survey_df = survey_df[['uid', 'year', 'latitude', 'longitude']] # urban_rural
survey_df = survey_df.sort_values('year')
#survey_df = survey_df[survey_df.uid != 'IA201400180012']

survey_years = list(survey_df.year.unique())

#CHUNK_SIZE = 1000
#survey_df['chunk_id'] = chunk_ids(survey_df.shape[0], CHUNK_SIZE)

In [54]:
print(survey_df.head())
print(survey_df.shape)

                   uid  year   latitude  longitude
0             lahore_1  2020  31.215465  74.002418
10552  faisalabad_4433  2020  31.591699  73.320374
10553  faisalabad_4434  2020  31.591342  73.330886
10554  faisalabad_4435  2020  31.590985  73.341397
10555  faisalabad_4436  2020  31.590627  73.351909
(15841, 4)


## If re-extract, delete existing files

In [55]:
if REEXTRACT_IF_FILE_EXISTS:
    print("Deleting existing files from Google Drive")

    ## Path with files
    OUT_PATH = os.path.join(cf.GOOGLEDRIVE_DIRECTORY, 
                            'Data', 
                             SURVEY_NAME, 
                             'FinalData', 
                             'Individual Datasets',
                             'satellite_data_from_gee_' + SURVEY_NAME.lower())

    ## Grab csv files
    files_to_rm = [x for x in os.listdir(OUT_PATH) if '.csv' in x]

    ## Delete files
    for file_i in files_to_rm:

        path_i = os.path.join(OUT_PATH, file_i)
        os.remove(path_i)

## List of files already extracted

In [56]:
## Path with files
OUT_PATH = os.path.join(cf.GOOGLEDRIVE_DIRECTORY, 
                        'Data', 
                         SURVEY_NAME, 
                         'FinalData', 
                         'Individual Datasets',
                         'satellite_data_from_gee_' + SURVEY_NAME.lower())

## Grab csv files
files_extracted = [x for x in os.listdir(OUT_PATH) if '.csv' in x]

len(files_extracted)

365

## Extract Values

In [57]:
to_extract = ['elevation', 
              'slope',
              'viirs_750',
              'viirs_1120',
              'viirs_1250',
              'viirs_1500',
              'viirs_2000',
              'viirs_2500',
              'viirs_5000',
              'viirs181920_750',
              'viirs181920_1120',
              'viirs181920_1250',
              'viirs181920_1500',
              'viirs181920_2000',
              'viirs181920_2500',
              'viirs181920_5000',
              'GlobalHumanModification',
              'worldpop_750',
              'worldpop_1500',
              'worldpop_2000',
              'worldpop_2500',
              'worldpop_5000',
              'worldpop_10000',
              'worldpop2020_750',
              'worldpop2020_1500',
              'worldpop2020_2000',
              'worldpop2020_2500',
              'worldpop2020_5000',
              'worldpop2020_10000',
              'l8',
              'aod',
              'ecmwf_weather',
              'ecmwf_weather_q1',
              'ecmwf_weather_q2',
              'ecmwf_weather_q3',
              'ecmwf_weather_q4',
              'NO2', 'uv_aer', 'CO', 'HCHO', 'ozone', 'SO2', 'CH4',]

tasks_all = []

# Loop over satellites ------------------------------
for name in to_extract:
    print(name)
        
    sat = name
        
    if name in ['NO2', 'uv_aer', 'CO', 'HCHO', 'ozone', 'SO2', 'CH4', 'l8', 'aod', 'GlobalHumanModification', 'elevation', 'slope']:
        if SURVEY_NAME == "DHS":
            buffer_u = 2500
            buffer_r = 2500
            
        if SURVEY_NAME == "PAK_POINTS":
            buffer_u = 1500
            buffer_r = 1500
            
        if SURVEY_NAME == "PAK_CITY_POINTS":
            buffer_u = 750
            buffer_r = 750
    
    #if name in ['elevation', 'slope']:
    #    if SURVEY_NAME == "DHS":
    #        buffer_u = 5000
    #        buffer_r = 5000
    #        
    #    if SURVEY_NAME == "PAK_POINTS":
    #        buffer_u = 1500
    #        buffer_r = 1500
                
    if name in ['ecmwf_weather',
                'ecmwf_weather_q1', 'ecmwf_weather_q2', 'ecmwf_weather_q3', 'ecmwf_weather_q4']:
        # 27km radius
        buffer_u = 10000
        buffer_r = 10000
        
    if name == 'viirs181920_750':
        sat = 'viirs'
        buffer_u = 750
        buffer_r = 750
        
    if name == 'viirs181920_1120':
        sat = 'viirs'
        buffer_u = 1120
        buffer_r = 1120
                
    if name == 'viirs181920_1250':
        sat = 'viirs'
        buffer_u = 1250
        buffer_r = 1250
        
    if name == 'viirs181920_1500':
        sat = 'viirs'
        buffer_u = 1500
        buffer_r = 1500
        
    if name == 'viirs181920_2000':
        sat = 'viirs'
        buffer_u = 2000
        buffer_r = 2000
        
    if name == 'viirs181920_2500':
        sat = 'viirs'
        buffer_u = 2500
        buffer_r = 2500
        
    if name == 'viirs181920_5000':
        sat = 'viirs'
        buffer_u = 5000
        buffer_r = 5000

    if name == 'viirs_750':
        sat = 'viirs'
        buffer_u = 750
        buffer_r = 750
            
    if name == 'viirs_1120':
        sat = 'viirs'
        buffer_u = 1120
        buffer_r = 1120
            
    if name == 'viirs_1250':
        sat = 'viirs'
        buffer_u = 1250
        buffer_r = 1250
        
    if name == 'viirs_1500':
        sat = 'viirs'
        buffer_u = 1500
        buffer_r = 1500
            
    if name == 'viirs_2000':
        sat = 'viirs'
        buffer_u = 2000
        buffer_r = 2000
        
    if name == 'viirs_2500':
        sat = 'viirs'
        buffer_u = 2500
        buffer_r = 2500
        
    if name == 'viirs_5000':
        sat = 'viirs'
        buffer_u = 5000
        buffer_r = 5000
        
    if name == 'worldpop_750':
        sat = 'worldpop'
        buffer_u = 750
        buffer_r = 750
        
    if name == 'worldpop_1500':
        sat = 'worldpop'
        buffer_u = 1500
        buffer_r = 1500
        
    if name == 'worldpop_2000':
        sat = 'worldpop'
        buffer_u = 2000
        buffer_r = 2000
        
    if name == 'worldpop_2500':
        sat = 'worldpop'
        buffer_u = 2500
        buffer_r = 2500
        
    if name == 'worldpop_5000':
        sat = 'worldpop'
        buffer_u = 5000
        buffer_r = 5000
        
    if name == 'worldpop_10000':
        sat = 'worldpop'
        buffer_u = 10000
        buffer_r = 10000
       
    if name == 'worldpop2020_750':
        sat = 'worldpop2020'
        buffer_u = 750
        buffer_r = 750
    
    if name == 'worldpop2020_1500':
        sat = 'worldpop2020'
        buffer_u = 1500
        buffer_r = 1500
        
    if name == 'worldpop2020_2000':
        sat = 'worldpop2020'
        buffer_u = 2000
        buffer_r = 2000
        
    if name == 'worldpop2020_2500':
        sat = 'worldpop2020'
        buffer_u = 2500
        buffer_r = 2500
        
    if name == 'worldpop2020_5000':
        sat = 'worldpop2020'
        buffer_u = 5000
        buffer_r = 5000
        
    if name == 'worldpop2020_10000':
        sat = 'worldpop2020'
        buffer_u = 10000
        buffer_r = 10000
        
    # Define Chunk Size ---------------------------------
    CHUNK_SIZE = 5000
    
    if sat in ['NO2', 'uv_aer', 'CO', 'HCHO', 'ozone', 'SO2', 'CH4']:
        CHUNK_SIZE = 500
        
    survey_df['chunk_id'] = chunk_ids(survey_df.shape[0], CHUNK_SIZE)
                        
    # Loop over satellites ------------------------------
    
    # LOOP OVER YEARS AND CHUNKS TO EXTRACT DATA 
    for year_i in survey_df['year'].unique():
        
        survey_df_year = survey_df[survey_df['year'] == year_i]

        # LOOP OVER CHUNKS
        for chunk_id_i in survey_df_year['chunk_id'].unique():
            survey_df_year_i = survey_df_year[survey_df_year['chunk_id'] == chunk_id_i]

            file_name_i = 'gee_' + name + '_ubuff' + str(buffer_u) + '_rbuff' + str(buffer_r) + '_' + str(year_i) + '_' + str(chunk_id_i)    
            file_name_i_csv = file_name_i + '.csv'

            # ONLY EXTRACT DATA IF NOT ALREADY EXTRACTED
            if file_name_i_csv not in files_extracted:
                                
                task_i = extract_sat(survey_df = survey_df_year_i, 
                                     buffer_size_urban = buffer_u, 
                                     buffer_size_rural = buffer_r, 
                                     year = year_i,
                                     satellite = sat, 
                                     survey_name = SURVEY_NAME,
                                     file_name = file_name_i)

                tasks_all.append(task_i)

elevation
slope
viirs_750
viirs_1120
viirs_1250
viirs_1500
viirs_2000
viirs_2500
viirs_5000
viirs181920_750
viirs181920_1120
viirs181920_1250
viirs181920_1500
viirs181920_2000
viirs181920_2500
viirs181920_5000
GlobalHumanModification
worldpop_750
worldpop_1500
worldpop_2000
worldpop_2500
worldpop_5000
worldpop_10000
worldpop2020_750
worldpop2020_1500
worldpop2020_2000
worldpop2020_2500
worldpop2020_5000
worldpop2020_10000
l8
aod
ecmwf_weather
ecmwf_weather_q1
ecmwf_weather_q2
ecmwf_weather_q3
ecmwf_weather_q4
NO2
uv_aer
CO
HCHO
ozone
SO2
CH4


## Check status

In [58]:
tasks_all[0:5]

[<Task BIDUIEZSHCTV3KGKRWGY26WS EXPORT_FEATURES: gee_ecmwf_weather_q4_ubuff10000_rbuff10000_2020_0 (UNSUBMITTED)>,
 <Task NLSCPPZMQ4UAVG2J2YVQX3OC EXPORT_FEATURES: gee_ecmwf_weather_q4_ubuff10000_rbuff10000_2020_1 (UNSUBMITTED)>,
 <Task 7URYT2DPD4734BIVQGKWHFAA EXPORT_FEATURES: gee_ecmwf_weather_q4_ubuff10000_rbuff10000_2020_2 (UNSUBMITTED)>,
 <Task 4F5CWNA6BROJVWRNRFOOHOHF EXPORT_FEATURES: gee_ecmwf_weather_q4_ubuff10000_rbuff10000_2020_3 (UNSUBMITTED)>]

In [35]:
## Failed Tasks
for task_i in tasks_all:
            
    task_i_status = task_i.status()
    if task_i_status['state'] == 'FAILED':
        print(task_i)

In [36]:
## Ready Tasks
for task_i in tasks_all:
            
    task_i_status = task_i.status()
    if task_i_status['state'] == 'READY':
        print(task_i)

<Task BULRQ5OYYCH2SHODV4J6G4AU EXPORT_FEATURES: viirs181920_5000_ubuff5000_rbuff5000_2002_0 (UNSUBMITTED)>
<Task P7HAFTLBK3GKQDBSS43WYEXV EXPORT_FEATURES: viirs181920_5000_ubuff5000_rbuff5000_2002_1 (UNSUBMITTED)>
<Task IJZBLH366PJQ2GWXWEYB3U7H EXPORT_FEATURES: viirs181920_5000_ubuff5000_rbuff5000_2003_1 (UNSUBMITTED)>
<Task E7IBDZUWQ3UIMKTIZ7QQQRIJ EXPORT_FEATURES: viirs181920_5000_ubuff5000_rbuff5000_2005_1 (UNSUBMITTED)>
<Task EYOPMJE6RFG7QTD2JRXA3JCW EXPORT_FEATURES: viirs181920_5000_ubuff5000_rbuff5000_2005_2 (UNSUBMITTED)>
<Task SRUT6EGNMWI7FYQ5XD3O4QK5 EXPORT_FEATURES: viirs181920_5000_ubuff5000_rbuff5000_2006_2 (UNSUBMITTED)>
<Task BMN5MNDHQHIQSXJUHXWQ452T EXPORT_FEATURES: viirs181920_5000_ubuff5000_rbuff5000_2008_2 (UNSUBMITTED)>
<Task GNATOYCMOQHKTSLRMXF2KHFK EXPORT_FEATURES: viirs181920_5000_ubuff5000_rbuff5000_2008_3 (UNSUBMITTED)>
<Task SQJVAS3FFLPJHYOODJD54QMK EXPORT_FEATURES: viirs181920_5000_ubuff5000_rbuff5000_2009_3 (UNSUBMITTED)>
<Task 5EAVOMOPJW4UFID6O3UPS655 EXPORT

In [23]:
## Completed Tasks
for task_i in tasks_all:
            
    task_i_status = task_i.status()
    if task_i_status['state'] == 'COMPLETED':
        print(task_i)

<Task LMZ5GMEMEQFUTAI7SO36HCRH EXPORT_FEATURES: gee_viirs_750_ubuff750_rbuff750_2020_0 (UNSUBMITTED)>
<Task 32NCZX63LALFDLNQBKJLSEE4 EXPORT_FEATURES: gee_viirs_750_ubuff750_rbuff750_2020_1 (UNSUBMITTED)>
<Task FLLH6KHGTBQ4XL2Q6PO5HK76 EXPORT_FEATURES: gee_viirs_750_ubuff750_rbuff750_2020_2 (UNSUBMITTED)>
<Task OOCFR253G5O45WHAVMKFM4J5 EXPORT_FEATURES: gee_viirs_750_ubuff750_rbuff750_2020_3 (UNSUBMITTED)>
<Task 3PBADVDVYD73XJPGNSOPH5EV EXPORT_FEATURES: gee_viirs_1120_ubuff1120_rbuff1120_2020_0 (UNSUBMITTED)>
<Task YD7ILP3EJ37DSHDEA4JRNZCQ EXPORT_FEATURES: gee_viirs_1120_ubuff1120_rbuff1120_2020_1 (UNSUBMITTED)>
<Task TJUH43YSGLOK2KS2ZORHPE47 EXPORT_FEATURES: gee_viirs_1120_ubuff1120_rbuff1120_2020_2 (UNSUBMITTED)>
<Task 6WGLWO7GMHUSKWMRE6UYYJFY EXPORT_FEATURES: gee_viirs_1120_ubuff1120_rbuff1120_2020_3 (UNSUBMITTED)>
<Task KDUDH4X2OWI6ML424YHYBTUC EXPORT_FEATURES: gee_viirs_1250_ubuff1250_rbuff1250_2020_0 (UNSUBMITTED)>
<Task MEFN7QS23S6DTKVUK76XFN2J EXPORT_FEATURES: gee_viirs_1250_ubuf

In [60]:
## View State
for task_i in tasks_all:            
    print(task_i.status()['state'])

FAILED
FAILED
FAILED
FAILED


In [61]:
tasks_all

[<Task BIDUIEZSHCTV3KGKRWGY26WS EXPORT_FEATURES: gee_ecmwf_weather_q4_ubuff10000_rbuff10000_2020_0 (UNSUBMITTED)>,
 <Task NLSCPPZMQ4UAVG2J2YVQX3OC EXPORT_FEATURES: gee_ecmwf_weather_q4_ubuff10000_rbuff10000_2020_1 (UNSUBMITTED)>,
 <Task 7URYT2DPD4734BIVQGKWHFAA EXPORT_FEATURES: gee_ecmwf_weather_q4_ubuff10000_rbuff10000_2020_2 (UNSUBMITTED)>,
 <Task 4F5CWNA6BROJVWRNRFOOHOHF EXPORT_FEATURES: gee_ecmwf_weather_q4_ubuff10000_rbuff10000_2020_3 (UNSUBMITTED)>]

## Cancel Tasks

In [43]:
if True:
    for task_i in tasks_all:
        task_i.cancel()

## OLD CODE

In [None]:
to_extract = ['NO2', 'uv_aer', 'CO', 'HCHO', 'ozone', 'SO2', 'CH4',
              'elevation', 
              'slope',
              'viirs_1120',
              'viirs_1250',
              'viirs_2000',
              'viirs_2500',
              'viirs_5000',
              'viirs181920_1120',
              'viirs181920_1250',
              'viirs181920_2000',
              'viirs181920_2500',
              'viirs181920_5000',
              'GlobalHumanModification',
              'worldpop_2000',
              'worldpop_5000',
              'worldpop_10000',
              'worldpop2020_2000',
              'worldpop2020_5000',
              'worldpop2020_10000',
              'l8',
              'aod',
              'ecmwf_weather',
              'ecmwf_weather_q1',
              'ecmwf_weather_q2',
              'ecmwf_weather_q3',
              'ecmwf_weather_q4']

tasks_all = []
for name in to_extract:
    print(name)
        
    sat = name
        
    if name in ['NO2', 'uv_aer', 'CO', 'HCHO', 'ozone', 'SO2', 'CH4', 'l8', 'aod']:
        buffer_u = 2500
        buffer_r = 2500
    
    if name in ['elevation', 'slope']:
        buffer_u = 5000
        buffer_r = 5000
        
    if name in ['GlobalHumanModification', 
               'ecmwf_weather',
               'ecmwf_weather_q1', 'ecmwf_weather_q2', 'ecmwf_weather_q3', 'ecmwf_weather_q4']:
        buffer_u = 10000
        buffer_r = 10000
        
    if name == 'viirs181920_1120':
        sat = 'viirs'
        buffer_u = 1120
        buffer_r = 1120
                
    if name == 'viirs181920_1250':
        sat = 'viirs'
        buffer_u = 1250
        buffer_r = 1250
        
    if name == 'viirs181920_2000':
        sat = 'viirs'
        buffer_u = 2000
        buffer_r = 2000
        
    if name == 'viirs181920_2500':
        sat = 'viirs'
        buffer_u = 2500
        buffer_r = 2500
        
    if name == 'viirs181920_5000':
        sat = 'viirs'
        buffer_u = 5000
        buffer_r = 5000
            
    if name == 'viirs_1120':
        sat = 'viirs'
        buffer_u = 1120
        buffer_r = 1120
            
    if name == 'viirs_1250':
        sat = 'viirs'
        buffer_u = 1250
        buffer_r = 1250
            
    if name == 'viirs_2000':
        sat = 'viirs'
        buffer_u = 2000
        buffer_r = 2000
        
    if name == 'viirs_2500':
        sat = 'viirs'
        buffer_u = 2500
        buffer_r = 2500
        
    if name == 'viirs_5000':
        sat = 'viirs'
        buffer_u = 5000
        buffer_r = 5000
        
    if name == 'worldpop_2000':
        sat = 'worldpop'
        buffer_u = 2000
        buffer_r = 2000
        
    if name == 'worldpop_5000':
        sat = 'worldpop'
        buffer_u = 5000
        buffer_r = 5000
        
    if name == 'worldpop_10000':
        sat = 'worldpop'
        buffer_u = 10000
        buffer_r = 10000
        
    if name == 'worldpop2020_2000':
        sat = 'worldpop2020'
        buffer_u = 2000
        buffer_r = 2000
        
    if name == 'worldpop2020_5000':
        sat = 'worldpop2020'
        buffer_u = 5000
        buffer_r = 5000
        
    if name == 'worldpop2020_10000':
        sat = 'worldpop2020'
        buffer_u = 10000
        buffer_r = 10000
                
    file_root = name + '_ubuff' + str(buffer_u) + '_rbuff' + str(buffer_r)
        
    # Check if should extract data
    if (file_root not in files_already_extracted) | REEXTRACT_IF_FILE_EXISTS:
        print(file_root)
        
        tasks_i = extract_satellite_by_year(survey_df, buffer_u, buffer_r, sat, name, SURVEY_NAME)
        tasks_all.append(tasks_i)