In [None]:
# Download survey GeoTiffs from Landsat 7

New, faster method for downloading DHS cluster images! Based on [this blog post by Noel Gorelick](https://gorelick.medium.com/fast-er-downloads-a2abd512aa26).

Cindy Conlin modified this code originally developed by Markus Pettersson. 

Import, authenticate and initialize the earth-engine library

In [None]:
import ee

ee.Authenticate()

# Initialize the Google Earth Engine API with the high volume end-point.
# See https://developers.google.com/earth-engine/cloud/highvolume
ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com')

In [None]:
# Import other libraries
import pandas as pd
import os
import satellite_sampling_L7
import datetime

Read the csv file with survey points

In [None]:
data_dir = '/mimer/NOBACKUP/groups/globalpoverty1/cindy/eoml_ch_wb/data/'
dhs_cluster_file_path = os.path.join(data_dir, 'AIGlobalLab/dhs_clusters.csv')
df = pd.read_csv(dhs_cluster_file_path)
df.head()

Split the dataframe into each country-year combination:

In [None]:
surveys_with_dfs = [(survey, survey_df.reset_index(drop=True)) for survey, survey_df in 
                    df.groupby(['country', 'year'])]

Function for checking if sample is already downloaded, in case the script needs to be restarted for some reason

In [None]:
def check_if_downloaded(row, save_dir, min_file_size=3145728):
    file_name = f'{row.name:05d}.tif'
    file_path = os.path.join(save_dir, file_name)
    
    # Check if file exists and is larger than min_file_size
    return os.path.isfile(file_path) and (os.stat(file_path).st_size > min_file_size)

Download each survey from Google Earth Engine

In [None]:
for survey, survey_df in surveys_with_dfs:
    country, year = survey
    #download only country years used in analysis
    if f"{year}{country}" not in [
        "1999burkina_faso",
        "1999ghana",
        "1999guinea",
        "1999ivory_coast",
        "1999tanzania",
        "1999zimbabwe",
        "1998niger",
        "1998togo",
        "1997madagascar",
        "1997senegal",
        "1996benin",
        "1996egypt",
        "1996mali",
        "1995central_african_republic",
        "1991cameroon",
        "2000ethiopia",
        "2000malawi",
        "2000namibia",
        "2000uganda",
        "2003kenya",
        "2003morocco",
        "2003nigeria",
        "2004lesotho",
        "2005rwanda",
        "2006angola",
        "2006eswatini",
        "2007democratic_republic_of_congo",
        "2007zambia",
        "2008liberia",
        "2008sierra_leone",
        "2010burundi",
        "2011mozambique",
        "2012comoros",
        "2012gabon",
        "2014chad",
        "2016south_africa"
    ]:    
        continue
    
    print(f'Downloading images for {country}-{year}...'+
        datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S"))
       
    save_dir = os.path.join(data_dir, f'dhs_tifs_L7/{country}_{year}')      
           
    # Check if survey is already fully/partially downloaded
    if os.path.exists(save_dir):
        is_downloaded = survey_df.apply(lambda row: check_if_downloaded(row, save_dir), axis=1)
        samples_to_download = survey_df[~is_downloaded]
    else:
        os.makedirs(save_dir)
        samples_to_download = survey_df
    
    # If there are no samples to download for this survey, continue
    if len(samples_to_download) == 0:    
        continue
    
    satellite_sampling_L7.export_images(samples_to_download, save_dir, span_length=1)