In [1]:
import ee
import geemap
import json
import os
import sys
from geemap import geojson_to_ee
from ipyleaflet import GeoJSON
sys.path.insert(1, 'D:/!!Research/rgee_test/GEE_LPC/python_scripts')
#sys.path.insert(1, 'C:/Users/Justin-Laptop/Documents/Research/GEE_LPC/python_scripts')
from my_gee_functions import file_address_func
from my_gee_functions import ee_list_func
from my_gee_functions import stdrd_func

In [2]:
# Authenticate if needed
ee.Authenticate()

True

In [3]:
#Initializing the ee project

ee.Initialize(project = 'ee-jdawsey')

In [4]:
Map = geemap.Map()
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [5]:
buffer = ee.FeatureCollection('users/jdawsey/assets/20km_buffer')
Map.addLayer(buffer)

In [6]:
# Load the TIGER dataset
tiger = ee.FeatureCollection("TIGER/2016/States")

# Filter to get the geometry for Texas
texas = tiger.filter(ee.Filter.eq('NAME', 'Texas'))

Map.addLayer(texas, {'color': 'blue', 'weight': 3}, 'Texas')
Map.addLayerControl()
grid = texas.geometry().coveringGrid('EPSG:4326')
Map.addLayer(grid)
grid_clipped = grid.filterBounds(texas)
Map.addLayer(grid_clipped)

### 1. Creating a grid around the points

In [17]:
given_state = 'texas'
given_state_abbrev = 'TX'
points = ee.FeatureCollection(f'projects/ee-jdawsey/assets/states_shrubs/{given_state}_shrubs')

grid = points.geometry().coveringGrid('EPSG:4326')
grid_clipped = grid.filterBounds(points)
Map.addLayer(grid_clipped)

### 2. Saving the grid as feature collection

In [23]:
# Get the number of features in the grid
num_cells = grid_clipped.size()

# Initialize a counter for unique numbers
unique_number = 1

# Create an empty feature collection to store the modified cells
tx_modified_grid = ee.FeatureCollection([])

# Iterate over each cell in the grid and add a unique number
for i in range(num_cells.getInfo()):
    cell = ee.Feature(grid.toList(num_cells).get(i))
    cell_with_number = cell.set('number', unique_number)
    tx_modified_grid = tx_modified_grid.merge(cell_with_number)
    unique_number += 1

# Save the modified grid as a feature collection in your Earth Engine account
#ee.data.createAsset({'type': 'FeatureCollection'}, 'projects/ee-jdawsey/assets/states_shrubs', tx_modified_grid)

# Export an ee.FeatureCollection as an Earth Engine asset.
grid_export_task = ee.batch.Export.table.toAsset(
    collection = tx_modified_grid,
    description = 'texas_modified_grid',
    assetId='projects/ee-jdawsey/assets/tx_modified_grid',
)
grid_export_task.start()

### 3. Sorting the point data by latitude

In [25]:
points = ee.FeatureCollection(f'projects/ee-jdawsey/assets/states_shrubs/{given_state}_shrubs')

# Define the column you want to sort by
sort_column = 'latitude'

# Sort the FeatureCollection by the specified column
sorted_points = points.sort(sort_column)

# Convert the sorted FeatureCollection to a list
sorted_points_list = sorted_points.toList(sorted_points.size())

# Create a new FeatureCollection from the sorted list
sorted_tx_points = ee.FeatureCollection(sorted_points_list)

# Print the new sorted FeatureCollection
#print(sorted_feature_collection)

# Export the sorted FeatureCollection to your Google Drive or asset
task = ee.batch.Export.table.toAsset(
    collection = sorted_tx_points,
    description = 'Sorted Points Export',
    assetId = 'projects/ee-jdawsey/assets/states_shrubs/sorted_tx_points'
)
task.start()

ee.FeatureCollection({
  "functionInvocationValue": {
    "functionName": "Collection",
    "arguments": {
      "features": {
        "functionInvocationValue": {
          "functionName": "Collection.toList",
          "arguments": {
            "collection": {
              "functionInvocationValue": {
                "functionName": "Collection.limit",
                "arguments": {
                  "collection": {
                    "functionInvocationValue": {
                      "functionName": "Collection.loadTable",
                      "arguments": {
                        "tableId": {
                          "constantValue": "projects/ee-jdawsey/assets/states_shrubs/texas_shrubs"
                        }
                      }
                    }
                  },
                  "key": {
                    "constantValue": "latitude"
                  }
                }
              }
            },
            "count": {
              "functionInvocatio

In [None]:
points = ee.FeatureCollection(f'projects/ee-jdawsey/assets/states_shrubs/{given_state}_shrubs_counties')

# Define the column you want to sort by
sort_column = 'NAME'

# Sort the FeatureCollection by the specified column
sorted_points = points.sort(sort_column)

# Convert the sorted FeatureCollection to a list
sorted_points_list = sorted_points.toList(sorted_points.size())

# Create a new FeatureCollection from the sorted list
sorted_tx_points = ee.FeatureCollection(sorted_points_list)

### 4. Exporting the data
This works if don't have the county data

In [15]:
# Function to split a collection into chunks
def splitCollection(fc, chunk_size):
    size = fc.size().getInfo()
    indices = list(range(0, size, chunk_size))
    return [fc.toList(chunk_size, i) for i in indices]

# Set your given state and abbreviation
given_state = "texas"
given_state_abbrev = "TX"

# Load your points FeatureCollection
points = ee.FeatureCollection(f'projects/ee-jdawsey/assets/states_shrubs/{given_state}_shrubs_counties')
# Define the column you want to sort by
sort_column = 'NAME'
# Sort the FeatureCollection by the specified column
sorted_points = points.sort(sort_column)
# Convert the sorted FeatureCollection to a list
sorted_points_list = sorted_points.toList(sorted_points.size())
# Create a new FeatureCollection from the sorted list
sorted_tx_points = ee.FeatureCollection(sorted_points_list)


# Split points into chunks of 2500 features
chunk_size = 2500
points_chunks = splitCollection(points, chunk_size)

batch_num = 0

# Process each chunk separately and export
for i, chunk in enumerate(points_chunks):
    batch_num += 1
    chunk_fc = ee.FeatureCollection(chunk)
    points_with_location_chunk = chunk_fc.map(addLocationInfo)
    
    # Extract environmental data for each county
    county_column = 'county'
    county_values = points_with_location_chunk.aggregate_array(county_column)
    county_unique = county_values.distinct()
    county_names = county_unique.getInfo()
    
    for county_name in county_names:
        points_to_download = points_with_location_chunk.filter(ee.Filter.eq(county_column, county_name))
    
        nhd_area = ee.FeatureCollection(f'projects/sat-io/open-datasets/NHD/NHD_{given_state_abbrev}/NHDArea')
        nhd_flowline = ee.FeatureCollection(f'projects/sat-io/open-datasets/NHD/NHD_{given_state_abbrev}/NHDFlowline')
        nhd = nhd_area.merge(nhd_flowline)
        dist_nhd = nhd.distance(searchRadius=50000)
        dist_nhd = dist_nhd.select(['distance'])
        dist_nhd = dist_nhd.rename('dist_drain')
        dist_nhd = dist_nhd.reproject('EPSG:4326', scale = 1)
        
        roads = ee.FeatureCollection('TIGER/2016/Roads')
        dist_roads = roads.distance(searchRadius=50000)
        dist_roads = dist_roads.select(['distance'])
        dist_roads = dist_roads.rename('dist_road')
        dist_roads = dist_roads.reproject('EPSG:4326', scale = 1)
        
        dist_from_bands = ee.Image([dist_nhd, dist_roads])
        
        bands_to_pull = dist_from_bands
        bands_name = 'dist_from_bands'

        work_dir = f'D:/tifs/broad_xgb_classification/obs_batches/{given_state_abbrev.lower()}_batches/{bands_name}'
        out_csv = os.path.join(work_dir, f'{given_state}_cell{county_name}_{bands_name}_batch_{batch_num}_env_data.csv')
        
        geemap.extract_values_to_points(points_to_download, bands_to_pull, out_csv)

Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/ee-jdawsey/tables/3cb1382a7a78b3fa7bae97ca1b318635-57bb601914d733d9ad02377c52aaf7bb:getFeatures
Please wait ...
Data downloaded to D:\tifs\broad_xgb_classification\obs_batches\tx_batches\dist_from_bands\texas_cell16_dist_from_bands_batch_1_env_data.csv
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/ee-jdawsey/tables/1c8043f408fe4bba893c27daeb80ea1b-f6deae6d9b41986a714d8f69d2c3ecd1:getFeatures
Please wait ...
Data downloaded to D:\tifs\broad_xgb_classification\obs_batches\tx_batches\dist_from_bands\texas_cell21_dist_from_bands_batch_1_env_data.csv
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/ee-jdawsey/tables/0c22cf403f2b2342d18527438cfd8dc2-f2511fc849f184c762f95112ff16e8b1:getFeatures
Please wait ...
Data downloaded to D:\tifs\broad_xgb_classification\obs_batches\tx_batches\dist_from_bands\texas_cell20_dist_from_band

KeyboardInterrupt: 

### Run this if already have county data

In [28]:
# Set your given state and abbreviation
given_state = "texas"
given_state_abbrev = "TX"

# Load your points FeatureCollection
points = ee.FeatureCollection(f'projects/ee-jdawsey/assets/states_shrubs/{given_state}_shrubs_counties')
# Define the column you want to sort by
sort_column = 'NAME'
# Sort the FeatureCollection by the specified column
sorted_points = points.sort(sort_column)
# Convert the sorted FeatureCollection to a list
sorted_points_list = sorted_points.toList(sorted_points.size())
# Create a new FeatureCollection from the sorted list
sorted_points = ee.FeatureCollection(sorted_points_list)

# Extract environmental data for each county
county_column = 'NAME'
county_values = sorted_points.aggregate_array(county_column)
county_unique = county_values.distinct()
county_names = county_unique.getInfo()


for countyy in county_names:
    points_datas = points.filter(f'NAME == "{countyy}"')
    
    nhd_area = ee.FeatureCollection(f'projects/sat-io/open-datasets/NHD/NHD_{given_state_abbrev}/NHDArea')
    nhd_flowline = ee.FeatureCollection(f'projects/sat-io/open-datasets/NHD/NHD_{given_state_abbrev}/NHDFlowline')
    nhd = nhd_area.merge(nhd_flowline)
    dist_nhd = nhd.distance(searchRadius=50000)
    dist_nhd = dist_nhd.select(['distance'])
    dist_nhd = dist_nhd.rename('dist_drain')
    dist_nhd = dist_nhd.reproject('EPSG:4326', scale = 1)
        
    roads = ee.FeatureCollection('TIGER/2016/Roads')
    dist_roads = roads.distance(searchRadius=50000)
    dist_roads = dist_roads.select(['distance'])
    dist_roads = dist_roads.rename('dist_road')
    dist_roads = dist_roads.reproject('EPSG:4326', scale = 1)
        
    dist_from_bands = ee.Image([dist_nhd, dist_roads])
        
    bands_to_pull = dist_from_bands
    bands_name = 'dist_from_bands'
    
    
    # Define the batch size
    batch_size = 2500
        
    # Get the total number of features
    num_features = points_datas.size().getInfo()
        
    # Calculate the number of batches
    num_batches = num_features // batch_size
    if num_features % batch_size != 0:
        num_batches += 1
        
    # Define a function to process each batch and export as CSV
    def process_batch(start_index, batch_num):
        # Calculate the end index of the batch
        end_index = start_index + batch_size
        if end_index > num_features:
            end_index = num_features
            
        # Get the features for the current batch
        batch_features = points_datas.toList(batch_size, start_index)
            
        # Create a feature collection for the batch
        batch_fc = ee.FeatureCollection(batch_features)
            
        # then exporting the data as a csv so that can be used in the xgb algorithm
        work_dir = f'D:/tifs/broad_xgb_classification/obs_batches/{given_state_abbrev.lower()}_batches/{bands_name}'
        #work_dir = f'C:/Users/Justin-Laptop/Documents/Research/GEE_LPC/point_data/inat_mesq_obsv/inat_points_env_data/env_batch_csv'
        out_csv = os.path.join(work_dir, f'{given_state}_shrubs_{countyy}_{bands_name}_batch_{batch_num}_env_data.csv')
            
        #change depending on the bands being pulled
        geemap.extract_values_to_points(batch_fc, bands_to_pull, out_csv)
        
    # Iterate over batches and process each one
    for i in range(num_batches):
        process_batch(i * batch_size, i+1)

Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/ee-jdawsey/tables/045ab42f2a6d4748559383e5dad2cc2e-8578f9d264f1a211146c267d93b74f12:getFeatures
Please wait ...
Data downloaded to D:\tifs\broad_xgb_classification\obs_batches\tx_batches\dist_from_bands\texas_shrubs__dist_from_bands_batch_1_env_data.csv
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/ee-jdawsey/tables/237b354c1ea9da83019d2becc7c630d3-08ba19875c09003a16e2f214677a9554:getFeatures
Please wait ...
Data downloaded to D:\tifs\broad_xgb_classification\obs_batches\tx_batches\dist_from_bands\texas_shrubs_Anderson County_dist_from_bands_batch_1_env_data.csv
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1/projects/ee-jdawsey/tables/6a653e213c03f024cc0cb84d38d77195-c87e9d8e05adfc4c8625ec772a5aab55:getFeatures
Please wait ...
Data downloaded to D:\tifs\broad_xgb_classification\obs_batches\tx_batches\dist_from_bands\texas_shru

EEException: Computation timed out.