# Width and Basin Attributte Match

In [1]:
#Imports
import ee
#ee.Authenticate()
ee.Initialize() #opt_url='https://earthengine-highvolume.googleapis.com'
import geemap
import geemap.foliumap as geemap
import pandas as pd
import numpy as np
import os
import time
import glob

In [None]:
#Useful functions

#Add long and lat as feature properties
def longLat (feat):
    geom = feat.geometry()
    long = geom.coordinates().get(0)
    lat = geom.coordinates().get(-1)
    return feat.set('long', long, 'lat', lat)

def polyProps(polygon):
    pfaf = polygon.get('PFAF_ID')
    sort = polygon.get('SORT')
    hybas = polygon.get('HYBAS_ID')
    basinArea = polygon.get('SUB_AREA')
    upArea =  polygon.get('UP_AREA')
    distMain =  polygon.get('DIST_MAIN')
    points = ee.FeatureCollection(ee.List(polygon.get('points'))).map(lambda point : point.set({'SORT': sort,
                                                                                               'DIST_MAIN': distMain}))
    return points

In [None]:
#Take a look at basins of interest or your sites too

Map = geemap.Map(center=(40, -100), zoom=4)

level = '06'
dataset =  ee.FeatureCollection("WWF/HydroATLAS/v1/Basins/level" + level) 

visualization = {
  'color' : '808080',
  'strokeWidth':1
}

Map.addLayer(sites, {'color': 'purple'}, 'My sites')


Map.setCenter(-92.140225737902,37.08486741, 7);
Map.addLayer(dataset, visualization, 'Basins');
Map


In [None]:


# Draw in sites
sites = ee.FeatureCollection('your_filepath')
#Define your sites
table = sites
print(table.size().getInfo())

#Filter geometry and props for basin
feats = dataset.select(['SORT', 'PFAF_ID', 'HYBAS_ID', 'SUB_AREA', 'UP_AREA', 'DIST_MAIN']).filterBounds(table.geometry().bounds())


spatialFilter = ee.Filter.intersects(**{
  'leftField': '.geo',
  'rightField': '.geo',
  'maxError': 10
})

saveAllJoin = ee.Join.saveAll(**{
  'matchesKey': 'points'
})

intersectJoined = saveAllJoin.apply(**{
    'primary': feats, 
    'secondary': table, 
    'condition': spatialFilter
  }).map(polyProps).flatten()

intersectJoined = intersectJoined.map(lambda feat : feat.set('long', feat.geometry().coordinates().get(0), 'lat', feat.geometry().coordinates().get(-1)))

print(intersectJoined.size().getInfo())  
print(intersectJoined.limit(10).getInfo(), 'intersectJoined')

##Save
# filepath = '/your_filepath/basinAttributes'+ level+ '_sites.csv'
# geemap.ee_export_vector(ee.FeatureCollection(intersectJoined), filepath)

##OR 
##Export an ee.FeatureCollection as an Earth Engine asset.
# task = ee.batch.Export.table.toAsset(
#     collection = intersectJoined, 
#     description = 'basinAttributesExport', 
#     assetId = 'yourSiteFileName'
# )
# task.start()


# Widths

In [None]:
#Assumes you've loaded your site list+location as gee asset called 'sites'

#add lat long as property and sort by one or the other (computationally more efficient for width below code)
sites = sites.map(longLat).sort('lat')
print(sites.limit(2).getInfo())

#GRWL widths
grwl_water_vector = ee.FeatureCollection("projects/sat-io/open-datasets/GRWL/water_vector_v01_01") #more exact
grwl_summary = ee.FeatureCollection("projects/sat-io/open-datasets/GRWL/grwl_SummaryStats_v01_01") #computationally cheap


In [None]:
#FC as list
sitesList = sites.toList(8000) #sort by lat or long for

# Chunk size, define for computational feasibility
chunk_size = 5

# List to store chunks
chunks = []

# Split the list into chunks
for i in range(0, len(ee.List(sitesList).getInfo()), chunk_size):
    chunk = sitesList.slice(i,i + chunk_size)
    chunks.append(chunk)

print(len(chunks))
#print((chunks[121]).getInfo())
#print(ee.FeatureCollection(chunks[3]).getInfo())


### maybe you need a special subset that did not work the first time?
# special = ee.FeatureCollection(chunks[1])
# special1 = ee.FeatureCollection(chunks[1]).toList(50).slice(0,15)
# special2 = ee.FeatureCollection(chunks[1]).toList(50).slice(15,30)

# specials = [special1, special2]
# print(specials)

In [None]:
# Print the first few chunks
for i, chunk in enumerate(chunks):
    
    chunk = ee.FeatureCollection(chunk)
    
    #Get the GRWL summary vector and trim to area and feature of interest, add buffer if sites are on shore
    grwl_summary_intersect = grwl_water_vector \
      .select('width_m') \
      .filterBounds(chunk.geometry().bounds())\
      .map(lambda feat: feat.buffer(ee.Number(0.8).multiply((feat.get('width_m'))), 10))


    #Define an geometry intersection
    interesectsFilter = ee.Filter.intersects(**{
      'leftField': '.geo',
      'rightField': '.geo',
      'maxError': 50
    })

    #Join the two datasets
    siteWidths = ee.Join.saveFirst('widths').apply(**{
       'primary': chunk,
      'secondary': grwl_summary_intersect.filterBounds(chunk.geometry().bounds()),
      'condition': interesectsFilter
    }).filter(ee.Filter.neq('widths', None))


    widthExtract = ee.FeatureCollection(hucWidths.aggregate_array('widths').flatten())


    # Load the first feature collection.
    fc1 = siteWidths

    # Load the second feature collection.
    fc2 = widthExtract

    # Map over the features in both collections and combine properties.

    def widthCombine(feature1):
      # Get the properties of the first feature.
        properties1 = feature1.toDictionary()
      # Find the corresponding feature in the second collection.
        feature2 = fc2.filterBounds(feature1.geometry()).first()
      # Assuming geometry is the basis for association
      # Get the properties of the second feature.
        properties2 = feature2.toDictionary()
      # Combine the properties from both features.
        combinedProperties = properties1.combine(properties2)

      # Create a new feature with the combined properties.
        return ee.Feature(feature1.geometry(), combinedProperties)
    
    
    try: 
        combinedFeatures = fc1.map(widthCombine) #\
          #.filter(ee.Filter.gte('width_mean', 90))

        def latLong(feat):
            geom = feat.geometry()
            long = geom.coordinates().get(0)
            lat = geom.coordinates().get(-1)
            return feat.set('long', long, 'lat', lat).setGeometry(None)

        combinedFeatures = combinedFeatures.map(latLong)


        filepath = 'your_filepath/'+ str(i) + '_someOtherIdentifierMaybe'+'.csv'
        geemap.ee_export_vector(ee.FeatureCollection(combinedFeatures), filepath, ['SiteID', 'width_m', 'lat','long'])

    except Exception as error:
        print("An error occurred,", error)
        pass

#     task = ee.batch.Export.table.toDrive(
#                         collection = ee.FeatureCollection(combinedFeatures), 
#                         description = str(i + 138) + '_' + 'aquaWidths', 
#                         folder = 'aquaWidths', 
#                         fileFormat = 'csv',
#                         selectors = ['SiteID','SiteID2', 'width_mean', 'lat', 'long'])



    #Start the task
    #task.start()
    #while task.active():
        #print('Polling for task (id: {}).'.format(task.id), i)
        #time.sleep(5)
    
    print('done', i)
    time.sleep(5)

In [None]:
#Stitch together when done, filter, and hydrobasin
file_list = glob.glob('/width_files/*.csv')
widths = pd.concat(list(map(lambda file: pd.read_csv(file, low_memory=False), file_list)), ignore_index=True)
print(widths.shape[0])
print(widths.head())

widths = widths[widths['width_m'] >= 90]
print(widths.shape[0])
print(widths.head())

#SAVE
widths.to_csv('/your_filepath.csv', index=False)