In [None]:
import numpy as np
import pandas as pd
import geojson
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import seaborn as sns
import geopandas as gpd
from shapely import geometry, ops
import rasterio
from rasterio import features
import geemap
import ee

In [None]:
ee.Authenticate()
ee.Initialize()

In [None]:
def maskWater(image):
    return image.updateMask(waterMask.select('water_mask').lt(1));

# To do: combine the relief functions 

def computeRelief(feature):
    max = ee.Number(feature.get('elev_max'));
    min = ee.Number(feature.get('elev_min'));
    
    return feature.set('relief', max.subtract(min));

def computeArcticRelief(feature):
    emax = ee.Number(feature.get('aelev_max'));
    emin = ee.Number(feature.get('aelev_min'));
    
    return feature.set('relief', emax.subtract(emin));

def getCentroid(poly):
    centroid = poly.geometry().centroid().coordinates()
    return ee.Feature(poly).set('centroid', centroid)

# MODIS water mask
waterMask = (
    ee.ImageCollection('MODIS/006/MOD44W')
    .filter(ee.Filter.date('2015-01-01', '2015-01-02'))
    .select('water_mask')
    .first()
)

In [None]:
# segment is an imagecollection so we mosaic to make an image
segment = ee.ImageCollection("projects/sat-io/open-datasets/HYDROGRAPHY90/base-network-layers/segment")

flow_accumulation = ee.ImageCollection("projects/sat-io/open-datasets/HYDROGRAPHY90/base-network-layers/flow_accumulation")


# Get relevant climate and elevation variables

In [None]:
# Get climate variables
bio = ee.Image('WORLDCLIM/V1/BIO');
MeanAP = bio.select('bio12');
MeanAT = bio.select('bio01').multiply(0.1); #The variables are scaled by a factor of 10
T_range = bio.select('bio07').multiply(0.1);

# Get elevation (midlats use SRTM, high lats use ArcticDEM)
elevation = ee.Image("USGS/SRTMGL1_003").select('elevation').rename('elev');

arctic =  ee.Image("UMN/PGC/ArcticDEM/V3/2m_mosaic").select('elevation').rename('aelev')
# for some reason you don't need to reproject in Python API....ok
# arctic_stack = arctic.reproject('EPSG:4326').addBands(MeanAP).addBands(ndvi_raw).addBands(MeanAT)

## Mask river and NDVI data with water mask

In [None]:
# Mask watery NDVI
ndvi = ee.ImageCollection('MODIS/006/MOD13Q1').select('NDVI')
ndviMasked = ndvi.map(maskWater);
ndvi_raw = ndviMasked.filter(ee.Filter.date('2020-01-01', '2021-01-01')).reduce(ee.Reducer.max())
ndvi_raw = ndvi_raw.rename('ndvi')

In [None]:
# Mask watery rivers
segment = segment.map(maskWater).mosaic().rename('segment') #imagecollection to image
flow_accumulation = flow_accumulation.map(maskWater).mosaic().rename('flow_acc')

## Build data stack

In [None]:
stack = (
    elevation
    .addBands(MeanAP)
    .addBands(MeanAT)
    .addBands(T_range)
    .addBands(ndvi_raw)
    .addBands(segment)
    .addBands(flow_accumulation)
)

arctic_stack = (
    arctic
    .addBands(MeanAP)
    .addBands(MeanAT)
    .addBands(T_range)
    .addBands(ndvi_raw)
    .addBands(segment)
    .addBands(flow_accumulation)
)



# Collect midlat data from EE

In [None]:
import datetime

midlat_points = pd.DataFrame()

xMin_vals = np.arange(-180.0, 190.0, 1)
# xMin_vals = [0.0]

for i, xMin in enumerate(xMin_vals): 
    e = datetime.datetime.now()
    print ("Time= %s:%s:%s" % (e.hour, e.minute, e.second), 'xMin=',xMin)
    try:
        ## Extract data
        # Load watersheds from a data table.
        midlat_sheds = ee.FeatureCollection("WWF/HydroSHEDS/v1/Basins/hybas_10")\
        .filterBounds(ee.Geometry.Rectangle([xMin, 23.4, xMin+1.0, 60.0]))\
        .filter(ee.Filter.equals(leftField = 'SUB_AREA', rightField = 'UP_AREA'))



        midlat_reduced = stack.reduceRegions(
          collection =  midlat_sheds,
          reducer = (ee.Reducer.mean().combine(
                  reducer2 = ee.Reducer.minMax(),
                  sharedInputs=True)),
                  scale = 1000,
        );          

        midlat_computed = midlat_reduced.filter(ee.Filter.notNull(ee.List(['elev_max']))).map(computeRelief).map(getCentroid)

        midlat_threshold = midlat_computed.filter(ee.Filter.gt('ndvi_mean', 3900))\
        #.filter(ee.Filter.lt('bio12_mean', 800))
        #.filter(ee.Filter.gt('relief', 150))

        # Here I'll just count all the pixels in each layer of the stack instead of building two different stacks. Can always drop columns later
        midlat_channel_counts = stack.reduceRegions(
          reducer =  ee.Reducer.count(),
          collection =  midlat_threshold
          )
        
      

        new = geemap.ee_to_pandas(midlat_channel_counts)

        print('Number of sheds: ', len(new.index))
        midlat_points = pd.concat([midlat_points,new])
        midlat_points = midlat_points.reset_index(drop=True)
    except KeyboardInterrupt:
        print('Interrupted')
        break
    except Exception:
        pass


In [None]:
# midlat_points.to_csv("midlat_points.csv")

midlat_points.loc[:, 'long'] = midlat_points.centroid.map(lambda x: x[0])
midlat_points.loc[:, 'lat'] = midlat_points.centroid.map(lambda x: x[1])

gdf = gpd.GeoDataFrame(
    midlat_points, geometry=gpd.points_from_xy(midlat_points.long, midlat_points.lat))

gdf.drop('centroid', axis=1).to_file("midlat_h90.shp")

## Collect Arctic data from EE

In [None]:
arctic_points = pd.DataFrame()

xMin_vals = np.arange(-180.0, 190.0, 1)
# xMin_vals = [-152]

for i, xMin in enumerate(xMin_vals): 
    e = datetime.datetime.now()
    try:
      print ("Time= %s:%s:%s" % (e.hour, e.minute, e.second), 'xMin=',xMin)

      ## Extract data
      # Load watersheds from a data table.
      sheds_a = ee.FeatureCollection("WWF/HydroSHEDS/v1/Basins/hybas_10")\
      .filterBounds(ee.Geometry.Rectangle([xMin, 60.0, xMin+1.0, 90.0]))\
      .filter(ee.Filter.equals(leftField = 'SUB_AREA', rightField = 'UP_AREA'))


      arctic_reduced = arctic_stack.reduceRegions(
        collection =  sheds_a,
        reducer = (ee.Reducer.mean().combine(
                reducer2 = ee.Reducer.minMax(),
                sharedInputs=True)),
                scale = 1000,
      );          

      arctic_computed = arctic_reduced.filter(ee.Filter.notNull(ee.List(['aelev_max']))).map(computeArcticRelief).map(getCentroid)

      arctic_threshold = arctic_computed.filter(ee.Filter.gt('ndvi_mean', 3900))\
      #.filter(ee.Filter.lt('bio12_mean', 800))
      #.filter(ee.Filter.gt('relief', 150))

      # Here I'll just count all the pixels in each layer of the stack instead of building two different stacks. Can always drop columns later
      arctic_channel_counts = arctic_stack.select(['segment', 'flow_acc']).reduceRegions(
        reducer =  ee.Reducer.count(),
        scale = 30,
        collection =  arctic_threshold
        )
      new = geemap.ee_to_pandas(arctic_channel_counts)
      print('Number of sheds: ', len(new.index))
      arctic_points = pd.concat([arctic_points,new])
      arctic_points = arctic_points.reset_index()
    except KeyboardInterrupt:
        print('Interrupted')
        break
    except Exception:
        pass

In [None]:
# arctic_points.to_csv("arctic_points_new.csv")

arctic_points = arctic_points.drop('level_0', axis=1).reset_index()

arctic_points.loc[:, 'long'] = arctic_points.centroid.map(lambda x: x[0])
arctic_points.loc[:, 'lat'] = arctic_points.centroid.map(lambda x: x[1])
gdf = gpd.GeoDataFrame(
    arctic_points, geometry=gpd.points_from_xy(arctic_points.long, arctic_points.lat))

gdf.drop('centroid', axis=1).to_file("arctic_h90.shp")