## Setup

In [2]:
import ee
ee.Authenticate()
ee.Initialize()

Enter verification code:  4/1AY0e-g7HRHqJqUlfOFyk9JzzX7qI8pQe3a_DrEl9cOUsulrhBP_G_mBaU-8



Successfully saved authorization token.


In [5]:
import numpy as np
import geetools
from geetools import ui, cloud_mask
import os, datetime
import config as cf
import pandas as pd

mask_l8SR_all = cloud_mask.landsatSR()

  PANDAS_TYPES = (pd.Series, pd.DataFrame, pd.Panel)


In [6]:
KERNEL_SIZE = 224

## Functions

In [41]:
def survey_to_fc(survey_df):
    '''
    Convert pandas dataframe of survey locations to a feature collection. 
    
    Inputs:
        survey_df: pandas dataframe of survey locations. Function assumes 
                   the dataframe contains (1) latitude, (2) longitude and
                   (3) uid variables. Assumes coordinates in WGS84.
    Returns:
        (feature collection)
    '''
    
    survey_fc_list = []
    
    n_rows = survey_df.shape[0]
    for i in range(0, n_rows):
        survey_df_i = survey_df.iloc[[i]]

        f_i = ee.Feature(ee.Geometry.Point([survey_df_i['longitude'].iloc[0], 
                                            survey_df_i['latitude'].iloc[0]]), 
                         {'uid': survey_df_i['uid'].iloc[0]})

        survey_fc_list.append(f_i)
        
    survey_fc = ee.FeatureCollection(survey_fc_list)
    
    return survey_fc

def normalized_diff(values1, values2):
    '''
    Normalized Difference Value

    Input:  values1, values2 (must be same dimensions)

    Output: np array
    '''

    return (values2 - values1)/(values2 + values1)

def ee_to_np_l8(f, n_rows):
    '''
    Transforms feature collection from neighborhood array to np array for landsat 8

    Input:  
      f (features)
      n_rows (number of features)

    Output: np array
    '''

    brgb_all = []
    b1_all = []
    b5_all = []
    b6_all = []
    b7_all = []
    b10_all = []
    b11_all = []
    bndvi_all = []
    
    for i in range(0, n_rows):
      f_i = f[i]['properties']

      brgb_l = [np.array(f_i['B2']), np.array(f_i['B3']), np.array(f_i['B4'])]
      brgb_np = np.stack(brgb_l, axis=-1)
      brgb_all.append(brgb_np)

      b1_l = np.array(f_i['B1'])
      b1_l = np.expand_dims(b1_l, axis=2)
      b1_all.append(b1_l)

      b5_l = np.array(f_i['B5'])
      b5_l = np.expand_dims(b5_l, axis=2)
      b5_all.append(b5_l)

      b6_l = np.array(f_i['B6'])
      b6_l = np.expand_dims(b6_l, axis=2)
      b6_all.append(b6_l)

      b7_l = np.array(f_i['B7'])
      b7_l = np.expand_dims(b7_l, axis=2)
      b7_all.append(b7_l)

      b10_l = np.array(f_i['B10'])
      b10_l = np.expand_dims(b10_l, axis=2)
      b10_all.append(b10_l)

      b11_l = np.array(f_i['B11'])
      b11_l = np.expand_dims(b11_l, axis=2)
      b11_all.append(b11_l)

      bndvi_l = normalized_diff(np.array(f_i['B5']), np.array(f_i['B4']))
      bndvi_l = np.expand_dims(bndvi_l, axis=2)
      bndvi_all.append(bndvi_l)
        
    brgb_all_s = np.stack(brgb_all)
    b1_all_s = np.stack(b1_all)
    b5_all_s = np.stack(b5_all)
    b6_all_s = np.stack(b6_all)
    b7_all_s = np.stack(b7_all)
    b10_all_s = np.stack(b10_all)
    b11_all_s = np.stack(b11_all)
    bndvi_all_s = np.stack(bndvi_all)

    return brgb_all_s, b1_all_s, b5_all_s, b6_all_s, b7_all_s, b10_all_s, b11_all_s, bndvi_all_s

def prep_cnn_np(survey_df,
                satellite,
                begin_date,
                end_date,
                kernel_size):
    '''
    Creates numpy arrays for CNN

    Input:  df - pandas dataframe
            lat_name - name of latitude variable in df
            lon_name - name of longitude variable in df
    Output: geopandas dataframe
    '''

    survey_fc = survey_to_fc(survey_df)
    
    # Grab satellite and reduce it
    if satellite == "l8":
        BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B10', 'B11']
        SCALE = 30
        
        image = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')\
          .filterDate('2017-01-01', '2020-12-31')\
          .map(mask_l8SR_all)\
          .median()\
          .multiply(0.0001)
        
        image = image.select(BANDS)
        
    # Image to neighborhood array
    list = ee.List.repeat(1, KERNEL_SIZE)
    lists = ee.List.repeat(list, KERNEL_SIZE)
    kernel = ee.Kernel.fixed(KERNEL_SIZE, KERNEL_SIZE, lists)

    arrays = image.neighborhoodToArray(kernel)
    
    # Extract values from GEE    
    values_ee = arrays.sample(
      region = survey_fc, 
      scale = SCALE,
      tileScale = 8
    )
    
    dict_ee = values_ee.getInfo()
    
    # Convert values to numpy array
    n_rows = survey_df.shape[0]
    f = dict_ee['features']

    if satellite == "l8":
      out = ee_to_np_l8(f, n_rows)

    return out

def chunk_ids(total_length, chunk_size):

  n_numbers = np.ceil(total_length / chunk_size)
  n_numbers = int(n_numbers)

  chunk_ids = list(range(0,n_numbers)) * chunk_size
  chunk_ids.sort()
  chunk_ids = chunk_ids[:total_length]

  return chunk_ids

In [42]:
## Implement

In [53]:
survey_df = pd.read_csv(os.path.join(cf.SECURE_DATA_DIRECTORY, 'Data', 'DHS', 'FinalData - PII', 'GPS_uid_crosswalk.csv'))
survey_df = survey_df[survey_df.most_recent_survey == True]
#survey_df = survey_df.head(50)
CHUNK_SIZE = 10

In [54]:
survey_df['chunk_id'] = chunk_ids(survey_df.shape[0], CHUNK_SIZE)

In [48]:
brgb_all = []
b1_all = []
b5_all = []
b6_all = []
b7_all = []
b10_all = []
b11_all = []
bndvi_all = []
survey_all = []

for chunk_i in list(np.unique(survey_df.chunk_id)):
  print(chunk_i)

  survey_df_i = survey_df[survey_df['chunk_id'] == chunk_i]

  l8_result_i = prep_cnn_np(survey_df_i,
                  satellite = 'l8',
                  begin_date = '2017-01-01',
                  end_date = '2020-01-01',
                  kernel_size = KERNEL_SIZE)

  brgb, b1, b5, b6, b7, b10, b11, bndvi = l8_result_i

  brgb_all.append(brgb.copy())
  b1_all.append(b1.copy())

0
1
2
3
4


In [49]:
brgb_np = np.concatenate(brgb_all)
b1_np = np.concatenate(b1_all)

In [50]:
np.save('/Users/robmarty/Desktop/test1234_50.npy', brgb_np)

In [57]:
max(survey_df.chunk_id)

3472

In [38]:
survey_df.shape

(34729, 6)

In [39]:
34729/100

347.29

In [40]:
347.29*120.4

41813.71600000001