*This code calculates the distance of each snail occurrence point to urban areas (defined by the UN) using population data interpolated from 1990-1999*

In [None]:
# this code will create rasters that is distance to urban edges in Brazil
import ee

# Trigger the authentication flow.
ee.Authenticate()

# Initialize the library.
ee.Initialize()

To authorize access needed by Earth Engine, open the following URL in a web browser and follow the instructions. If the web browser does not start automatically, please manually browse the URL below.

    https://code.earthengine.google.com/client-auth?scopes=https%3A//www.googleapis.com/auth/earthengine%20https%3A//www.googleapis.com/auth/devstorage.full_control&request_id=8Op1GJrRjW9erEazA21bXMjO7W8mcOrV2G4o1Iftc8c&tc=0racLep1j7jjJ40rUzpJSdZ472Yj6P4o0Pr0s5I1E4E&cc=OInxss11-bstdimUdeqPyjRApr73PuMDqHeOHR2O5sU

The authorization workflow will generate a code, which you should paste in the box below.
Enter verification code: 4/1AfJohXlxi0GGl2U1O3ocoemeI_0Th_DC0Yj0u5aYsSNMtwXDc0cTvWy5K04

Successfully saved authorization token.


**SET UP DATA**

In [None]:
## set up the variables to include in the function
#Brazil feature
region = ee.FeatureCollection('FAO/GAUL_SIMPLIFIED_500m/2015/level0').filter(ee.Filter.equals('ADM0_NAME', 'Brazil'));


# Read in snail points
all_sdm_points = ee.FeatureCollection('users/cglidden/all_points_schisto_sdm')

def buffer_points(radius, bounds):
    def buffer_feature(pt):
        pt = ee.Feature(pt)
        return ee.Algorithms.If(bounds, pt.buffer(radius).bounds(), pt.buffer(radius))
    return buffer_feature

# Asset of regions for which you want to calculate statistics - radius of 1km2
bufferedPoints = all_sdm_points.map(buffer_points(500, True))


**Load in pre-processed interpolated data**
using code from https://spatialthoughts.com/2021/11/08/temporal-interpolation-gee/


In [None]:
# interpolated population size @ 1km
pop1k = ee.Image('users/cglidden/interpolated_population_90s_v3');

# intermediate urban pixels 300-1500
int_urban = ee.Image('users/cglidden/int_urban_center_int90s_v3'); # Bands = "population_"*year*

# high urban pixels > 1500
high_urban = ee.Image('users/cglidden/high_urban_center_int90s_v3'); # Bands = "population_"*year*

**DEFINE FUNCTION TO GET CONTIGUOUS URBAN PIXELS FILTERING BY TOTAL POPULATION CUTOFF (5k, 150k)**

In [None]:
def mapUrban(year):

    yearString = ee.Number(year).toInt().format()
    band_name = ee.String("population_").cat(yearString)

    # for medium to high pop
    urbanMask1 = int_urban.select([band_name]);
    urbanMask1 = urbanMask1.updateMask(urbanMask1.neq(0)); # non-ones already be masked but leaving this here

    # for high pop
    urbanMask2 = high_urban.select([band_name]);
    urbanMask2 = urbanMask2.updateMask(urbanMask2.neq(0)); # non-ones already be masked but leaving this here

    urban1vect = urbanMask1.select([band_name]).reduceToVectors(
       geometry = region.geometry(),
        crs=pop1k.projection(),
        scale=1000,
      geometryType='polygon',
        eightConnected=True,
        labelProperty='zone',
        bestEffort=False,
        maxPixels=1e13,
        tileScale=16
    )

    urban2vect = urbanMask2.select([band_name]).reduceToVectors(
        geometry= region.geometry(),
        crs=pop1k.projection(),
        scale=1000,
        geometryType='polygon',
        eightConnected=True,
        labelProperty='zone',
        bestEffort=False,
        maxPixels=1e13,
        tileScale=16
    )

    # now get population per feature
    reducer = ee.Reducer.sum();
    pop_sum_features1 = (pop1k.select([band_name]).reduceRegions(
                      collection = urban1vect,
                      reducer = reducer,
                      scale = 1000,
                      crs = 'EPSG:4326',
                      tileScale = 16));


    pop_sum_features2 = (pop1k.select([band_name]).reduceRegions(
                      collection = urban2vect,
                      reducer = reducer,
                      scale = 1000,
                      crs = 'EPSG:4326',
                      tileScale = 16));

    # filter features
    pop_sum_features_final1 = (pop_sum_features1
                              .filter(ee.Filter.gte('sum', 5000)));

    pop_sum_features_final2 = (pop_sum_features2
                              .filter(ee.Filter.gte('sum', 150000)));

    # convert back to binary image
    final_int_urban = (pop_sum_features_final1
                       .filter(ee.Filter.notNull(['sum']))
                       .reduceToImage(
                          properties = ['sum'],
                          reducer = ee.Reducer.first())
                       .rename(ee.String("contig_Iurban_").cat(ee.Number(year).toInt().format()))
                       .unmask(0).clip(region).gt(0));

    final_high_urban = (pop_sum_features_final2
                       .filter(ee.Filter.notNull(['sum']))
                       .reduceToImage(
                          properties = ['sum'],
                          reducer = ee.Reducer.first())
                       .rename(ee.String("contig_Hurban_").cat(ee.Number(year).toInt().format()))
                       .unmask(0).clip(region).gt(0));

    return ee.Image(final_int_urban.addBands(final_high_urban))

# Define a function to remove prefix from band name
def remove_index(band_name):
    return ee.String(band_name).slice(-18)




**Export urban boundaries in chunks bc memory intensive**








* 1990 - 1991

In [None]:
# Define the years as a list of strings - 1990 - 1991
years_1991 = ee.List.sequence(1990, 1991) # Modify this list as needed

region_images1991 = ee.ImageCollection(years_1991.map(mapUrban))
region_image1991 = region_images1991.toBands()

# Get the band names
band_names1991 = region_image1991.bandNames()

# Apply the function to each band name
new_band_names1991 = band_names1991.map(remove_index)

# Rename the bands
renamed_image1991 = region_image1991.select(band_names1991, new_band_names1991)
# print(region_images)

# Define the asset ID where you want to save the collection
asset_id1991 = 'projects/gbsc-gcp-lab-emordeca/assets/urban_mapping/contigUrbanUN/contigUrban1991'

# Save the collection
task = ee.batch.Export.image.toAsset(
    image= renamed_image1991,
    description ='contigUrban1991',
    assetId=asset_id1991,
    region=region.geometry(),
    scale=1000,
    crs='EPSG:4326'
)
task.start()

* 1992 - 1994

In [None]:
####### break up export bc of memory issues
# Define the years as a list of strings - 1990 - 1994
years_1994 = ee.List.sequence(1992, 1994) # Modify this list as needed

region_images1994 = ee.ImageCollection(years_1994.map(mapUrban))
region_image1994 = region_images1994.toBands()

# Get the band names
band_names1994 = region_image1994.bandNames()

# Apply the function to each band name
new_band_names1994 = band_names1994.map(remove_index)

# Rename the bands
renamed_image1994 = region_image1994.select(band_names1994, new_band_names1994)
# print(region_images)

# Define the asset ID where you want to save the collection
asset_id1994 = 'projects/gbsc-gcp-lab-emordeca/assets/urban_mapping/contigUrbanUN/contigUrban1994'

# Save the collection
task = ee.batch.Export.image.toAsset(
    image= renamed_image1994,
    description ='contigUrban1994',
    assetId=asset_id1994,
    region=region.geometry(),
    scale=1000,
    crs='EPSG:4326'
)
task.start()

*   1995 - 1999

In [None]:
# Define the years as a list of strings - 1995 - 1999
years_1999 = ee.List.sequence(1995, 1999) # Modify this list as needed

region_images1999 = ee.ImageCollection(years_1999.map(mapUrban))
region_image1999 = region_images1999.toBands()

# Get the band names
band_names1999 = region_image1999.bandNames()

# Apply the function to each band name
new_band_names1999 = band_names1999.map(remove_index)

# Rename the bands
renamed_image1999 = region_image1999.select(band_names1999, new_band_names1999)
# print(region_images)

# Define the asset ID where you want to save the collection
asset_id1999 = 'projects/gbsc-gcp-lab-emordeca/assets/urban_mapping/contigUrbanUN/contigUrban1999'

# Save the collection
task = ee.batch.Export.image.toAsset(
    image= renamed_image1999,
    description ='contigUrban1999',
    assetId=asset_id1999,
    region=region.geometry(),
    scale=1000,
    crs='EPSG:4326'
)
task.start()

**FINAL CUMULATIVE COST MAPPING GeoTiffs** -- need to consolidate in image collection

In [None]:
# read in images from mapUrban - change years
contigUrban1991 = (ee.Image("projects/gbsc-gcp-lab-emordeca/assets/urban_mapping/contigUrbanUN/contigUrban1991").clip(region))
contigUrban1994 = (ee.Image("projects/gbsc-gcp-lab-emordeca/assets/urban_mapping/contigUrbanUN/contigUrban1994").clip(region))
contigUrban1999 = (ee.Image("projects/gbsc-gcp-lab-emordeca/assets/urban_mapping/contigUrbanUN/contigUrban1999").clip(region))
contigUrban = (contigUrban1991.addBands(contigUrban1994).addBands(contigUrban1999));


In [None]:
##### FINAL COST MAPPING
def mapCost (year):

    yearString = ee.Number(year).toInt().format();
    band_name_int = ee.String("contig_Iurban_").cat(yearString);
    band_name_high = ee.String("contig_Hurban_").cat(yearString);

    final_int_urban_binary = contigUrban.select([band_name_int])
    final_high_urban_binary = contigUrban.select([band_name_high])

    # set masked values to 50km
    overUrban = ee.Image().toByte().paint(region.geometry(), 50000)

    # create even surface
    landscape = ee.Image().toByte().paint(region.geometry(), 1)

    # int urban cross mapping
    final_int_urban_binary_v2 = final_int_urban_binary.gt(0) # maybe unneccesary
    cumulativeCostInt0 = (landscape.cumulativeCost(
                            source=final_int_urban_binary_v2,
                            maxDistance= 50 * 1000,
                            geodeticDistance = False)
                         .rename(ee.String("inte_").cat(ee.Number(year).toInt().format()))
                         .unmask(overUrban))

    # convert anything greater than 50km to 50km
    condition1 = cumulativeCostInt0.lte(50000); #50,000 meters = 50 km
    cumulativeCostInt = cumulativeCostInt0.updateMask(condition1).unmask(50000).clip(region);

    # repeat for high density
    final_high_urban_binary_v2 = final_high_urban_binary.gt(0)
    cumulativeCostHigh0 = (landscape.cumulativeCost(
                            source=final_high_urban_binary_v2,
                            maxDistance= 50 * 1000,
                            geodeticDistance = False)
                         .rename(ee.String("high_").cat(ee.Number(year).toInt().format()))
                         .unmask(overUrban))

    # convert anything greater than 50km to 50km
    condition2 = cumulativeCostHigh0.lte(50000);
    cumulativeCostHigh = cumulativeCostHigh0.updateMask(condition2).unmask(50000).clip(region);

    return ee.Image(cumulativeCostInt.addBands(cumulativeCostHigh))

# Define a function to remove index from band name - updated to specific band names here
def remove_index(band_name):
    return ee.String(band_name).slice(-9)

**TRY TO RUN IN ONE OUTPUT**

In [None]:
years = ee.List.sequence(1990, 1999) # Modify this list as needed

cc_images = ee.ImageCollection(years.map(mapCost))
cc_image = cc_images.toBands()

# updated bands
cc_names = cc_image.bandNames()
new_cc_names = cc_names.map(remove_index)
renamed_cc = cc_image.select(cc_names, new_cc_names)

# asset ID
cc_id = 'projects/gbsc-gcp-lab-emordeca/assets/urban_mapping/urbanCostMapUNinterpolated'

# Save the collection
task = ee.batch.Export.image.toAsset(
    image= renamed_cc,
    description ='urbanCostMapUNinterpolated',
    assetId = cc_id,
    region=region.geometry(),
    scale=1000,
    crs='EPSG:4326'
)
task.start()

**Read in cost mapping data**

In [None]:
# read in images from mapUrban
cost_maps = (ee.Image("projects/gbsc-gcp-lab-emordeca/assets/urban_mapping/urbanCostMapUNinterpolated").clip(region))

**Map over feature collection**

In [None]:
#### export feature collection 1km around snails
# worth it to map it so we are only getting data on the ones per year?

reducer2 = ee.Reducer.mean()

finalFeature = cost_maps.reduceRegions(
      collection = bufferedPoints,
                      reducer = reducer2,
                      scale = 1000,
                      crs = 'EPSG:4326',
                      tileScale = 16);

# Export the image sample feature collection to Drive as a CSV file.
task = ee.batch.Export.table.toDrive(
    collection=finalFeature,
    description='schisto_urbanCC_UN_interpolated_oct162023',
    folder='final_schisto_data',
    fileFormat='CSV',
)
task.start()

**NOW READ IN CSV & CLEAN**

In [1]:
# acess the file & set up file
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
# read in data
import pandas as pd
df = pd.read_csv('/content/drive/MyDrive/GEEexports brazil_schisto_snails/final_schisto_data/schisto_urbanCC_UN_interpolated_oct162023.csv')
# print(df.head())
print(df.columns)

Index(['system:index', 'dataset', 'high_1990', 'high_1991', 'high_1992',
       'high_1993', 'high_1994', 'high_1995', 'high_1996', 'high_1997',
       'high_1998', 'high_1999', 'inte_1990', 'inte_1991', 'inte_1992',
       'inte_1993', 'inte_1994', 'inte_1995', 'inte_1996', 'inte_1997',
       'inte_1998', 'inte_1999', 'origRC', 'row_code', 'source', 'species',
       'year', '.geo'],
      dtype='object')


  df = pd.read_csv('/content/drive/MyDrive/GEEexports brazil_schisto_snails/final_schisto_data/schisto_urbanCC_UN_interpolated_oct162023.csv')


In [3]:
# subset data to make it easier to switch from wide to long
desired_indices_high = list(range(12, 22)) + [23] + [26]
df_high = df.iloc[:, desired_indices_high] # might have to update this
print(df_high.columns)

# swtich high urban from wide to long
desired_indices_int = list(range(2, 12)) + [23] + [26]
df_int = df.iloc[:, desired_indices_int] # might have to update this
print(df_int.columns)

Index(['inte_1990', 'inte_1991', 'inte_1992', 'inte_1993', 'inte_1994',
       'inte_1995', 'inte_1996', 'inte_1997', 'inte_1998', 'inte_1999',
       'row_code', 'year'],
      dtype='object')
Index(['high_1990', 'high_1991', 'high_1992', 'high_1993', 'high_1994',
       'high_1995', 'high_1996', 'high_1997', 'high_1998', 'high_1999',
       'row_code', 'year'],
      dtype='object')


In [4]:
#### now convert wide to long dataset, merge data, and export (print len to make sure merged okay)

df_high_long = pd.melt(df_high, id_vars=['row_code', 'year'], var_name='year2', value_name='dist_high_urban')
df_high_long = df_high_long.dropna()
df_high_long['year2'] = df_high_long['year2'].str.extract(r'(\d+)')
df_high_long['year2'] = pd.to_numeric(df_high_long['year2'])
df_high_long = df_high_long[(df_high_long['year'] == df_high_long['year2'])]
df_high_long = df_high_long.drop(columns=['year2'])
print(df_high_long.head())
print(len(df_high_long)) # no of rows

df_int_long = pd.melt(df_int, id_vars=['row_code', 'year'], var_name='year2', value_name='dist_int_urban')
df_int_long = df_int_long.dropna()
df_int_long['year2'] = df_int_long['year2'].str.extract(r'(\d+)')
df_int_long['year2'] = pd.to_numeric(df_int_long['year2'])
df_int_long = df_int_long[(df_int_long['year'] == df_int_long['year2'])]
df_int_long = df_int_long.drop(columns=['year2'])
print(df_int_long.head())
print(len(df_int_long)) # no of rows


       row_code    year  dist_high_urban
17580    8034.0  1990.0     40563.590338
17581   10273.0  1990.0     40563.590338
17582   41252.0  1990.0     40563.590338
17583    7607.0  1990.0     36620.990619
17584    9818.0  1990.0     36620.990619
10525
       row_code    year  dist_int_urban
17580    8034.0  1990.0         50000.0
17581   10273.0  1990.0         50000.0
17582   41252.0  1990.0         50000.0
17583    7607.0  1990.0         50000.0
17584    9818.0  1990.0         50000.0
10525


In [5]:
## save code
merged_df = pd.merge(df_high_long, df_int_long, on=['row_code','year'], how='inner')
print(len(merged_df))
print(merged_df.head)

# sanity check - check years
unique_values = merged_df['year'].unique()
print("Unique Years :", unique_values)

merged_df.to_csv('/content/drive/MyDrive/GEEexports brazil_schisto_snails/final_schisto_data/cleaned_schisto_urbanCC_UN_interpolated_oct172023.csv', index=False)


10525
<bound method NDFrame.head of        row_code    year  dist_high_urban  dist_int_urban
0        8034.0  1990.0     40563.590338         50000.0
1       10273.0  1990.0     40563.590338         50000.0
2       41252.0  1990.0     40563.590338         50000.0
3        7607.0  1990.0     36620.990619         50000.0
4        9818.0  1990.0     36620.990619         50000.0
...         ...     ...              ...             ...
10520   92205.0  1999.0      3002.710171             0.0
10521   92206.0  1999.0      3002.710171             0.0
10522   92207.0  1999.0      3002.710171             0.0
10523   92208.0  1999.0      3002.710171             0.0
10524   92209.0  1999.0      3002.710171             0.0

[10525 rows x 4 columns]>
Unique Years : [1990. 1991. 1992. 1993. 1994. 1995. 1996. 1997. 1998. 1999.]
