In [None]:
import numpy as np
import pandas as pd
import geojson
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import seaborn as sns
import geopandas as gpd
from shapely import geometry, ops
import rasterio
from rasterio import features
import geemap
import ee

from pathlib import Path

import csv

import requests

In [None]:
ee.Authenticate()
ee.Initialize()

In [None]:
shed_list = pd.read_csv("shed_list_for_dd_new.csv", dtype = {'HYBAS_ID':'int', 'DD':'float64'},
                        #  index_col='HYBAS_ID'
                         )

In [None]:
big_file_list = []

for row in range(len(shed_list)):

  HYBAS_ID = int(shed_list.iloc[row]['HYBAS_ID']) # Needs to be int here
  # print(f'Now looking at {HYBAS_ID}')
  
  hybas_path = Path(f'./lsdtt/{HYBAS_ID}/')

  if (hybas_path / f'{HYBAS_ID}.tif').exists() == True: # In case the script breaks while running
    # print(f'{HYBAS_ID}.tif already exists')
    continue
  else:
    
    shed = ee.FeatureCollection("WWF/HydroSHEDS/v1/Basins/hybas_10").filter(ee.Filter.eq('HYBAS_ID', HYBAS_ID))

    long = shed.geometry().centroid().coordinates().get(0).getInfo()
    lat = shed.geometry().centroid().coordinates().get(1).getInfo()
    epsg = int(32700-(np.round((45+lat)/90,0)*100)+np.round((183+long)/6,0))

    # if epsg < 32610:
    #   print("Whoops this is a single digit UTM zone so it'll break lsdtt, skipping")
    #   continue

    # else:
    if lat < 59.0:
      elevation_0 = ee.Image('USGS/3DEP/10m').select('elevation').clip(shed)
      elevation = elevation_0.resample('bilinear').reproject(crs=f'EPSG:{epsg}', scale=10) # It is not clear why I have to do this again, but the sheds weren't clipping otherwise
    else:
      elevation_0 = ee.Image('UMN/PGC/ArcticDEM/V3/2m_mosaic').select('elevation').clip(shed)
      elevation = elevation_0.resample('bilinear').reproject(crs=f'EPSG:{epsg}', scale=10)
      # elevation = elevation_0.resample('bilinear').reproject(crs='EPSG:4326', scale=10)

    try: 
      url = elevation.getDownloadUrl({
          'region': shed.geometry(),
          'scale': 10,
          'crs': f'EPSG:{epsg}',
          # 'crs': 'EPSG:4326',
          'maxPixels': 10063164208,
          'format': 'GEO_TIFF'
      }) 
      print(f'Downloading {HYBAS_ID}.tif')
      response = requests.get(url)
      Path(f'./lsdtt/{HYBAS_ID}/').mkdir(parents=True, exist_ok=True)
      with open(hybas_path / f'{HYBAS_ID}.tif', 'wb') as fd:
        fd.write(response.content)
    except KeyboardInterrupt:
        print('Interrupted')
        break
    except Exception: # I can't get the EEException to work?? so now it's generic error which is not great
      # print(f'{HYBAS_ID} is too big! :(')
      big_file_list.append([f'{HYBAS_ID}'])
      # print(big_file_list)

# And now record the too big sheds

fields = ['HYBAS_ID']

with open('big_sheds.csv', 'w') as f:
  # using csv.writer method from CSV package
  write = csv.writer(f)
    
  write.writerow(fields)
  write.writerows(big_file_list)

In [None]:
p = Path('./lsdtt/')
downloaded = pd.DataFrame({'HYBAS_ID':[f.name for f in p.iterdir() if f.is_dir()]}).astype(int)
shed_list = pd.read_csv("shed_list_for_dd_new.csv", dtype = {'HYBAS_ID':'int', 'DD':'float64'},
                        #  index_col='HYBAS_ID'
                         )
merged = shed_list.merge(downloaded, on="HYBAS_ID")

In [None]:
merged['EXTENT'].value_counts()