# Download survey GeoTiffs

New, faster method for downloading DHS cluster images! Based on [this blog post by Noel Gorelick](https://gorelick.medium.com/fast-er-downloads-a2abd512aa26).

Adapted from code provided by Markus Pettersson.

Import, authenticate and initialize the earth-engine library

In [None]:
import ee

ee.Authenticate()

# Initialize the Google Earth Engine API with the high volume end-point.
# See https://developers.google.com/earth-engine/cloud/highvolume
ee.Initialize(opt_url='https://earthengine-highvolume.googleapis.com')

In [None]:
# Import other libraries
import pandas as pd
import os
import satellite_sampling_annual_v3
import datetime

Read the csv file with survey points

In [None]:
interim_data_dir = '/mimer/NOBACKUP/groups/globalpoverty1/cindy/eoml_ch_wb/data/interim'
dhs_cluster_file_path = os.path.join(interim_data_dir, 'dhs_est_iwi.csv')
df = pd.read_csv(dhs_cluster_file_path)
df.head()

Split the dataframe into each country-year combination:

In [None]:
surveys_with_dfs = [(survey, survey_df.reset_index(drop=True)) for survey, survey_df in 
                    df.groupby(['country', 'year'])]

Function for checking if sample is already downloaded, in case the script needs to be restarted for some reason

In [None]:
def check_if_downloaded(row, save_dir, min_file_size=3145728):
    file_name = f'{row.name:05d}.tif'
    file_path = os.path.join(save_dir, file_name)
    
    # Check if file exists and is larger than min_file_size
    return os.path.isfile(file_path) and (os.stat(file_path).st_size > min_file_size)

Download each survey from Google Earth Engine

In [None]:
for survey, survey_df in surveys_with_dfs:
    country, year = survey
    print(f'Downloading images for {country}-{year}...'+
        datetime.datetime.now().strftime("%d.%b %Y %H:%M:%S"))
       
    data_dir = '/mimer/NOBACKUP/groups/globalpoverty1/cindy/eoml_ch_wb/data/'    
    save_dir = os.path.join(data_dir, f'dhs_tifs_annual_v3/{country}_{year}')        
           
    # Check if survey is already fully/partially downloaded
    if os.path.exists(save_dir):
        is_downloaded = survey_df.apply(lambda row: check_if_downloaded(row, save_dir), axis=1)
        samples_to_download = survey_df[~is_downloaded]
    else:
        os.makedirs(save_dir)
        samples_to_download = survey_df
    
    # If there are no samples to download for this survey, continue to next one
    if len(samples_to_download) == 0:
        continue
    
    satellite_sampling_annual_v2.export_images(samples_to_download, save_dir, span_length=1)