## Setup

In [242]:
import ee
ee.Authenticate()
ee.Initialize()

Enter verification code:  4/1AX4XfWi4QM7nyi-0RlXPAgqQB9MAMw1lWGkDUNZH4hIRw6IyFIS5VTh_JuI



Successfully saved authorization token.


In [266]:
import numpy as np
import geetools
from geetools import ui, cloud_mask
import os, datetime
import config as cf
import pandas as pd
import eeconvert

cloud_mask_landsatSR = cloud_mask.landsatSR()
cloud_mask_sentinel2 = cloud_mask.sentinel2()

In [267]:
SURVEY_NAME = 'DHS'

## Functions

In [296]:
def survey_to_fc_buffer(survey_df, buffer_size_urban, buffer_size_rural):
    '''
    Convert pandas dataframe of survey locations to a feature collection. 
    
    Inputs:
        survey_df: pandas dataframe of survey locations. Function assumes 
                   the dataframe contains (1) latitude, (2) longitude and
                   (3) uid variables. Assumes coordinates in WGS84.
    Returns:
        (feature collection)
    '''
    
    survey_fc_list = []
    
    n_rows = survey_df.shape[0]
    for i in range(0, n_rows):
        survey_df_i = survey_df.iloc[[i]]
        
        ur = survey_df_i['urban_rural'].iloc[0]
        if ur == 'U':
            buffer_size = buffer_size_urban
        elif ur == 'R':
            buffer_size = buffer_size_rural

        f_i = ee.Feature(ee.Geometry.Point([survey_df_i['longitude'].iloc[0], 
                                            survey_df_i['latitude'].iloc[0]]), 
                         {'uid': survey_df_i['uid'].iloc[0]})
        
        f_i = f_i.buffer(buffer_size)

        survey_fc_list.append(f_i)
        
    survey_fc = ee.FeatureCollection(survey_fc_list)
    
    return survey_fc

def extract_sat(survey_df, buffer_size_urban, buffer_size_rural, satellite, year):
    '''
    Extract satellite imagery to locations 
    
    Inputs:
        survey_df: pandas dataframe of survey locations. Function assumes 
                   the dataframe contains (1) latitude, (2) longitude and
                   (3) uid variables. Assumes coordinates in WGS84.
    Returns:
        (feature collection)
    '''
    
    #print(survey_df.uid)
    
    # Prep l7 ---------------------------------------------------
    if satellite == 'l7':
        
        # Scale
        SCALE = 100 # ok to upscale
        
        # Year
        year_use = year
        
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('LANDSAT/LC07/C01/T1_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_landsatSR)\
            .median()\
            .multiply(0.0001)

        ndvi = image.normalizedDifference(['B4', 'B3']).rename('NDVI');
        image = image.addBands(ndvi)
        
        BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7']
        
    # Prep l8 ---------------------------------------------------
    if satellite == 'l8':
        
        SCALE = 100 # ok to upscale
        #SCALE = 2000
        
        # Year
        # landsat 8 starts in April 2013; if year is less than
        # 2014, use 2014 as year (to ensure have year before and after)
        if year < 2014:
            year_use = 2014
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('LANDSAT/LC08/C01/T1_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_landsatSR)\
            .median()\
            .multiply(0.0001)

        # https://www.linkedin.com/pulse/ndvi-ndbi-ndwi-calculation-using-landsat-7-8-tek-bahadur-kshetri
        ndvi = image.normalizedDifference(['B5', 'B4']).rename('NDVI');
        ndbi = image.normalizedDifference(['B6', 'B5']).rename('NDBI');
        image = image.addBands(ndvi)
        image = image.addBands(ndbi)
        
        bu = image.select('NDBI').subtract(image.select('NDVI')).rename('BU')
        image = image.addBands(bu)
        
        BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B10', 'B11', 'NDVI', 'NDBI', 'BU']
        #BANDS = ['NDVI']
        
    # Prep s2 ---------------------------------------------------
    if satellite == 's2':
        
        SCALE = 100 # ok to upscale
        
        # Year
        # sentinel starts in March 2017; juse use 2018
        year_use = 2018
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-12-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('COPERNICUS/S2_SR')\
            .filterDate(year_minus_str, year_plus_str)\
            .map(cloud_mask_sentinel2)\
            .median()\
            .multiply(0.0001)
        
        ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI');
        image = image.addBands(ndvi)
        
        BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12', 'AOT', 'NDVI']

        image = image.select(BANDS)    
    
    # Prep viirs ---------------------------------------------------
    if satellite == 'viirs':
        
        SCALE = 500 
        
        # Year
        # VIIRS starts in April 2012; if year is less than
        # 2013, use 2013 as year (to ensure have year before and after)
        if year < 2013:
            year_use = 2013
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('NOAA/VIIRS/DNB/MONTHLY_V1/VCMCFG')\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        BANDS = ['avg_rad']
        
    # Prep DMSP ---------------------------------------------------
    if satellite == 'dmsp':
        
        SCALE = 1000 
        
        # Year
        # DMSP-OLS starts in 2013; if year is more than
        # 2012, use 2012 as year (to ensure have year before and after)
        if year > 2012:
            year_use = 2012
        else:
            year_use = year
                    
        year_plus = year_use + 1
        year_minus = year_use - 1
        
        year_minus_str = str(year_minus) + '-01-01'
        year_plus_str = str(year_plus) + '-12-31'
        
        image = ee.ImageCollection('NOAA/DMSP-OLS/NIGHTTIME_LIGHTS')\
            .filterDate(year_minus_str, year_plus_str)\
            .median()
        
        BANDS = ['stable_lights', 'avg_lights_x_pct']
    
    # Prep Survey ---------------------------------------------------
    survey_fc = survey_to_fc_buffer(survey_df, buffer_size_urban, buffer_size_rural)
    
    # Extract Values ---------------------------------------------------
    #print(survey_fc.size().getInfo())
        
    vals = image.reduceRegions(collection = survey_fc,
                               reducer = ee.Reducer.mean(),
                               scale = SCALE,
                               tileScale = 8)

    survey_df = survey_df[['uid']]
    
        
    #print(BANDS)
    for band_i in BANDS:
        #print(band_i)
        #a = vals.aggregate_array(band_i).getInfo()
        #print(len(a))
        survey_df[satellite + '_' + band_i] = vals.aggregate_array(band_i).getInfo()
        
    return survey_df

def extract_satellite_in_chunks(survey_df, buffer_size_urban, buffer_size_rural, satellite, year):
    
    vals_df_list = []
    
    for chunk_i in list(np.unique(survey_df.chunk_id)):
        print(chunk_i)

        survey_df_i = survey_df[survey_df['chunk_id'] == chunk_i]
        print(survey_df_i.shape)
        vals_i_df = extract_sat(survey_df_i, buffer_size_urban, buffer_size_rural, satellite, year)

        vals_df_list.append(vals_i_df)

    vals_df = pd.concat(vals_df_list)
    
    return vals_df

def extract_satellite_by_year(survey_df, buffer_size_urban, buffer_size_rural, satellite):
    
    vals_df_list = []
    
    for year_i in list(np.unique(survey_df.year)):
        print(year_i)

        survey_df_i = survey_df[survey_df['year'] == year_i]
        vals_i_df = extract_satellite_in_chunks(survey_df_i, buffer_size_urban, buffer_size_rural, satellite, year_i)

        vals_df_list.append(vals_i_df)

    vals_df = pd.concat(vals_df_list)
    
    return vals_df

def chunk_ids(total_length, chunk_size):
    n_numbers = np.ceil(total_length / chunk_size)
    n_numbers = int(n_numbers)
    
    chunk_ids = list(range(0,n_numbers)) * chunk_size
    chunk_ids.sort()
    chunk_ids = chunk_ids[:total_length]
    
    return chunk_ids

## Load/Prep Survey Data

In [297]:
survey_df = pd.read_csv(os.path.join(cf.SECURE_DATA_DIRECTORY, 'Data', SURVEY_NAME, 'FinalData - PII', 'GPS_uid_crosswalk.csv'))

survey_df = survey_df[survey_df.uid != 'IA201400180012']

#survey_df = survey_df[survey_df.most_recent_survey == True]
#survey_df = survey_df.head(3000)
CHUNK_SIZE = 1000

survey_years = list(survey_df.year.unique())

In [298]:
survey_df.shape

(33406, 7)

In [299]:
survey_df['chunk_id'] = chunk_ids(survey_df.shape[0], CHUNK_SIZE)

In [300]:
#survey_df = survey_df[(survey_df['chunk_id'] == 11) & (survey_df['year'] == 2015)]
#survey_df['longitude'].value_counts().value_counts()

In [250]:
# DEBUGGING!!!
buffer_size_urban = 2000
buffer_size_rural = 2000

survey_df_i = survey_df[survey_df['chunk_id'] == 1]
survey_df_i.shape
SCALE = 100 # ok to upscale

# Year
# sentinel starts in March 2017; juse use 2018
year_use = 2018

year_plus = year_use + 1
year_minus = year_use - 1

year_minus_str = str(year_minus) + '-01-01'
year_plus_str = str(year_plus) + '-12-31'

image = ee.ImageCollection('COPERNICUS/S2_SR')\
    .filterDate(year_minus_str, year_plus_str)\
    .map(cloud_mask_sentinel2)\
    .median()\
    .multiply(0.0001)

ndvi = image.normalizedDifference(['B8', 'B4']).rename('NDVI');
image = image.addBands(ndvi)

BANDS = ['B1', 'B2', 'B3', 'B4', 'B5', 'B6', 'B7', 'B8', 'B8A', 'B9', 'B11', 'B12', 'AOT', 'NDVI']

image = image.select(BANDS)  

# Prep Survey ---------------------------------------------------
survey_fc = survey_to_fc_buffer(survey_df_i, buffer_size_urban, buffer_size_rural)

# Extract Values ---------------------------------------------------
vals = image.reduceRegions(collection = survey_fc,
                           reducer = ee.Reducer.mean(),
                           scale = SCALE,
                           tileScale = 8)

a = eeconvert.fcToDf(vals)
#band_i = 'B1'
#a = vals.aggregate_array(band_i)
a = ee.Feature(vals.first()).select(['B1'])
print(a.getInfo())

EEException: Feature.select: Parameter 'input' is required.

## Extract Sentinel

In [None]:
val_s2_df = extract_satellite_by_year(survey_df, 2000, 2000, 's2')

val_s2_df.to_csv(os.path.join(cf.DROPBOX_DIRECTORY, 
                            'Data', 
                            SURVEY_NAME, 
                            'FinalData', 
                            'Individual Datasets',
                           'survey_s2.csv'),
              index = False)

val_s2_df.head()

## Extract Landsat 8

In [301]:
#survey_df['chunk_id'] = range(0, survey_df.shape[0])

In [302]:
val_l8_df = extract_satellite_by_year(survey_df, 3000, 3000, 'l8')

val_l8_df.to_csv(os.path.join(cf.DROPBOX_DIRECTORY, 
                            'Data', 
                            SURVEY_NAME, 
                            'FinalData', 
                            'Individual Datasets',
                           'survey_l8.csv'),
              index = False)



2012
29
(314, 8)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


2014
29
(611, 8)
2015
0
(328, 8)
1
(1000, 8)
2
(1000, 8)
3
(1000, 8)
4
(1000, 8)
5
(1000, 8)
6
(1000, 8)
7
(1000, 8)
8
(1000, 8)
9
(1000, 8)
10
(1000, 8)
11
(1000, 8)
12
(1000, 8)
13
(1000, 8)
14
(1000, 8)
15
(1000, 8)
16
(1000, 8)
17
(1000, 8)
18
(1000, 8)
19
(1000, 8)
20
(1000, 8)
21
(1000, 8)
22
(1000, 8)
23
(1000, 8)
24
(1000, 8)
25
(1000, 8)
26
(1000, 8)
27
(1000, 8)
28
(1000, 8)
29
(75, 8)
30
(430, 8)
2016
30
(383, 8)
32
(49, 8)
33
(406, 8)
2017
0
(672, 8)
30
(187, 8)
31
(1000, 8)
32
(951, 8)


## Extract Landsat 7

In [None]:
val_l7_df = extract_satellite_by_year(survey_df, 2000, 5000, 'l7')

val_l7_df.to_csv(os.path.join(cf.DROPBOX_DIRECTORY, 
                            'Data', 
                            SURVEY_NAME, 
                            'FinalData', 
                            'Individual Datasets',
                           'survey_l7.csv'),
              index = False)

val_l7_df.head()

## Extract VIIRS

In [67]:
val_viirs_df = extract_satellite_by_year(survey_df, 2000, 2000, 'viirs')

val_viirs_df.to_csv(os.path.join(cf.DROPBOX_DIRECTORY, 
                            'Data', 
                            SURVEY_NAME, 
                            'FinalData', 
                            'Individual Datasets',
                           'survey_viirs.csv'),
              index = False)

val_viirs_df.head()

(314, 8)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


(611, 8)
(328, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(1000, 8)
(75, 8)
(431, 8)
(383, 8)
(48, 8)
(407, 8)
(672, 8)
(186, 8)
(1000, 8)
(952, 8)


Unnamed: 0,uid,viirs_avg_rad
29676,KY201200000105,0.374646
29677,KY201200000106,0.725092
29678,KY201200000107,0.169814
29679,KY201200000108,0.772588
29680,KY201200000109,0.873666


## Extract DMSP-OLS

In [None]:
val_dmsp_df = extract_satellite_by_year(survey_df, 2000, 5000, 'dmsp')

val_dmsp_df.to_csv(os.path.join(cf.DROPBOX_DIRECTORY, 
                            'Data', 
                            SURVEY_NAME, 
                            'FinalData', 
                            'Individual Datasets',
                           'survey_dmsp.csv'),
              index = False)

val_dmsp_df.head()

In [None]:
#import folium
#from folium import plugins

#mapid = survey_i_fc.getMapId()

#map = folium.Map(location=[80.773137, 7.873592])
#folium.TileLayer(
#    tiles=mapid['tile_fetcher'].url_format,
#    attr='Map Data &copy; <a href="https://earthengine.google.com/">Google Earth Engine</a>',
#    overlay=True,
#    name='border',
#  ).add_to(map)

#map.add_child(folium.LayerControl())


In [None]:
survey_df.head()