<a href="https://colab.research.google.com/github/ericslevenson/arctic-surface-water/blob/main/mask2seed.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
'''
author: @ericslevenson
date: 11/28/2022
description: Convert lake occurrence raster to clean buffered lake shapefile to
seed the time series in GEE
'''

In [None]:
# Authenticate private account (only required for exporting to drive/gee/gcp)
from google.colab import auth
auth.authenticate_user()
# Google Drive setup
from google.colab import drive
drive.mount('/content/drive')
# Complete the environment
!pip install rasterio
!pip install rioxarray
! pip install geopandas

Mounted at /content/drive
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting rasterio
  Downloading rasterio-1.3.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (20.9 MB)
[K     |████████████████████████████████| 20.9 MB 4.3 MB/s 
[?25hCollecting affine
  Downloading affine-2.3.1-py2.py3-none-any.whl (16 kB)
Collecting snuggs>=1.4.1
  Downloading snuggs-1.4.7-py3-none-any.whl (5.4 kB)
Collecting cligj>=0.5
  Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Collecting click-plugins
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Installing collected packages: snuggs, cligj, click-plugins, affine, rasterio
Successfully installed affine-2.3.1 click-plugins-1.1.1 cligj-0.7.2 rasterio-1.3.4 snuggs-1.4.7
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting rioxarray
  Downloading rioxarray-0.13.1-py3-none-any.whl (53 kB)
[K     |██████████████████

In [None]:
import os
import rasterio
import geopandas
import scipy
import pandas as pd
import numpy as np
from rasterio.plot import show
from geopandas import GeoDataFrame
from scipy import ndimage
from pandas import DataFrame
from shapely.geometry import shape
from rasterio.features import shapes
from rasterio.merge import merge
import geopandas as gpd

In [None]:
# input directory
LOdirectory = '/content/drive/MyDrive/lakeTransectResults/'
outDirectory = '/content/drive/MyDrive/AGU/Lakes75/'


IDs = []
files = [i for i in os.listdir(LOdirectory) if i.split('.')[1] == 'tif']
for f in files:
  IDs.append(f.split('_')[0])
IDs = list(set(IDs))

In [None]:
IDs

['04WEA',
 '04WEB',
 '04WES',
 '04WEU',
 '04WED',
 '04WEV',
 '04VEN',
 '04WEE',
 '04VEP',
 '04VEM',
 '04WEC',
 '04VEQ',
 '04VER',
 '04WET']

In [None]:
##BINARIZE | DILATE | VECTORIZE | EXPORT |

# Define Kernel
kernel = np.ones((3,3), np.uint8)

for i in IDs:
  print('id of interest is ' + i)
  LOoi = [] # lake occurrence of interest
  for f in files:
    if f.split('_')[0] == i: # list of files for ID
      LOoi.append(f)
  if len(LOoi) == 1: # binarize, dilate, vectorize if there's only one
    print('processing solo ' + i)
    src = rasterio.open(LOdirectory + LOoi[0])
    data = src.read(1)
    meta = src.meta
    trans = src.transform
    # binary at 0.05
    data = np.where(data > 0.05, 1, 0)
    # dilate
    dilated = scipy.ndimage.binary_dilation(data, kernel, iterations = 5).astype(np.int32)
    # generate shapes
    shape_gen = ((shape(s), v) for s, v in shapes(dilated, transform=src.transform))
    # build a pd.DataFrame of shapes and convert to geodataframe
    df = DataFrame(shape_gen, columns=['geometry', 'class'])
    gdf = GeoDataFrame(df["class"], geometry=df.geometry, crs=src.crs)
    # filter out non-lake polygons
    gdf = gdf[gdf['class'] == 1]
    # write to file
    gdf.to_file(outDirectory + i + '_seed.shp')
  # For split up rasters
  elif len(LOoi) == 2:
    print('processing as mosaic ' + i)
    # Open both rasters
    LO1 = rasterio.open(LOdirectory + LOoi[0])
    LO2 = rasterio.open(LOdirectory + LOoi[1])
    # Merge and retrieve shape
    LO, out_trans = merge([LO1, LO2])
    LO = LO[0]
    width = LO.shape[1]
    height = LO.shape[0]
    # Metadata and update shape for mosaic
    meta = LO1.meta
    meta.update({'width':width})
    meta.update({'height':height})
    LO = np.where(LO > 0.05, 1, 0)
    dilated = scipy.ndimage.binary_dilation(LO, kernel, iterations = 5).astype(np.int32)
    # generate shapes
    shape_gen = ((shape(s), v) for s, v in shapes(dilated, transform=out_trans))
    # build a pd.DataFrame of shapes and convert to geodataframe
    df = DataFrame(shape_gen, columns=['geometry', 'class'])
    gdf = GeoDataFrame(df["class"], geometry=df.geometry, crs=LO1.crs)
    # filter out non-lake polygons
    gdf = gdf[gdf['class'] == 1]
    # write to file
    gdf.to_file(outDirectory + i + '_seed.shp')

In [None]:
# BINARIZE | VECTORIZE | EXPORT

for i in IDs:
  print('id of interest is ' + i)
  LOoi = [] # lake occurrence of interest
  for f in files:
    if f.split('_')[0] == i: # list of files for ID
      LOoi.append(f)
  if len(LOoi) == 1: # binarize, dilate, vectorize if there's only one
    print('processing solo ' + i)
    src = rasterio.open(LOdirectory + LOoi[0])
    data = src.read(1)
    meta = src.meta
    trans = src.transform
    # binary at 0.05
    data = np.where(data > 0.25, 1, 0).astype('int32')
    # generate shapes
    shape_gen = ((shape(s), v) for s, v in shapes(data, transform=src.transform))
    # build a pd.DataFrame of shapes and convert to geodataframe
    df = DataFrame(shape_gen, columns=['geometry', 'class'])
    gdf = GeoDataFrame(df["class"], geometry=df.geometry, crs=src.crs)
    # filter out non-lake polygons
    gdf = gdf[gdf['class'] == 1]
    # write to file
    gdf.to_file(outDirectory + i + '_lake75.shp')
  # For split up rasters
  elif len(LOoi) == 2:
    print('processing as mosaic ' + i)
    # Open both rasters
    LO1 = rasterio.open(LOdirectory + LOoi[0])
    LO2 = rasterio.open(LOdirectory + LOoi[1])
    # Merge and retrieve shape
    LO, out_trans = merge([LO1, LO2])
    LO = LO[0]
    width = LO.shape[1]
    height = LO.shape[0]
    # Metadata and update shape for mosaic
    meta = LO1.meta
    meta.update({'width':width})
    meta.update({'height':height})
    LO = np.where(LO > 0.25, 1, 0).astype('int32')
    # generate shapes
    shape_gen = ((shape(s), v) for s, v in shapes(LO, transform=out_trans))
    # build a pd.DataFrame of shapes and convert to geodataframe
    df = DataFrame(shape_gen, columns=['geometry', 'class'])
    gdf = GeoDataFrame(df["class"], geometry=df.geometry, crs=LO1.crs)
    # filter out non-lake polygons
    gdf = gdf[gdf['class'] == 1]
    # write to file
    gdf.to_file(outDirectory + i + '_lake75.shp')

id of interest is 04WEA
processing as mosaic 04WEA
id of interest is 04WEB
processing as mosaic 04WEB
id of interest is 04WES
processing as mosaic 04WES
id of interest is 04WEU
processing as mosaic 04WEU
id of interest is 04WED
processing as mosaic 04WED
id of interest is 04WEV
processing as mosaic 04WEV
id of interest is 04VEN
processing solo 04VEN
id of interest is 04WEE
processing as mosaic 04WEE
id of interest is 04VEP
processing as mosaic 04VEP
id of interest is 04VEM
processing solo 04VEM
id of interest is 04WEC
processing as mosaic 04WEC
id of interest is 04VEQ
processing as mosaic 04VEQ
id of interest is 04VER
processing as mosaic 04VER
id of interest is 04WET
processing as mosaic 04WET


In [None]:
#Grid
grid = gpd.read_file('/content/drive/MyDrive/AGU/Lakes75/densityGrid_10km.shp') # read in grid
grid = grid.set_crs('EPSG:32604') # project to utm
grid['full_area'] = grid['geometry'].area # add area field

In [None]:
# lakes
lakes = gpd.read_file('/content/drive/MyDrive/AGU/Lakes75/transect_lake75_utm.shp')

In [None]:
grid_clipped = gpd.clip(grid, lakes)