Objectives:
   1. Open catchments dataframe (polygons over which landscape data will be collected)
   2. Collect catchment statistics from Earth Engine datasets
   3. Export to a table that can be used in RandomForest

## 1. Import packages / set working directory

In [1]:
import os
import io

import earthpy as et
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import geopandas as gpd
import geemap
from shapely.geometry import shape
import ee
#import elevation
import math

# Initialize Google Earth Engine to generate area of interest 
try:
    ee.Initialize()
except Exception as e:
    ee.Authenticate()
    ee.Initialize()

%matplotlib inline

In [2]:
# Set working directory

# if the desired path exists:
data_dir = os.path.join(et.io.HOME, 'Dropbox',
                        'cu_earthdata_certificate_2021', 'earthlab_project', 'data')
if os.path.exists(data_dir):
    # set working directory:
    os.chdir(data_dir)
    print("path exists")
else:
    print("path does not exist, making new path")
    os.makedirs(data_dir)
    os.chdir(data_dir)

path exists


## 2. Defining the bounds
Let's import the bounds of our Area of Study

In [3]:
lis = ['Colorado','Utah', 'Wyoming', 'New Mexico', 'Arizona']
states = ee.FeatureCollection("TIGER/2018/States").filter(ee.Filter.inList('NAME', lis))

states_gdf = geemap.ee_to_geopandas(states, selectors = ['NAME'])
print("The crs of your area of interest df is", states_gdf.crs)
states_gdf.head()

bounding_box = states_gdf.envelope
states_bb = gpd.GeoDataFrame(gpd.GeoSeries(bounding_box), columns=['geometry'])
states_bb

An error occurred while downloading. 
 Retrying ...
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/tables/7bdabb9600c21f0b21d04f4b1e71010c-de024b585609b84b12edb4c53d9ba138:getFeatures
Please wait ...
The crs of your area of interest df is epsg:4326


Unnamed: 0,geometry
0,"POLYGON ((-109.05041 31.33222, -103.00203 31.3..."
1,"POLYGON ((-114.05288 36.99765, -109.04157 36.9..."
2,"POLYGON ((-109.06020 36.99245, -102.04153 36.9..."
3,"POLYGON ((-111.05454 40.99478, -104.05226 40.9..."
4,"POLYGON ((-114.81657 31.33217, -109.04516 31.3..."


In [4]:
Map = geemap.Map()
Map

Map(center=[40, -100], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(children=(T…

In [5]:
# Import file containing full catchment for each burned lake
# from local drive generated in 01_download_clip_merge.ipynb
file_path = os.path.join('land_only_whole_ctchmt_aqst.shp')

# Geodataframe used in folium map
land_catchmts = gpd.read_file(file_path)

# EE object used for Earth Engine collections and calculations
#small_gdf = land_catchmts[["Hylak_id", "geometry"]]
land_ctchmts_ee = geemap.geopandas_to_ee(land_catchmts)

## 3. Import Earth Engine images/dataset

In [6]:
landcover = ee.Image("USGS/NLCD/NLCD2016").select('landcover')

landcover_roi = landcover.clip(land_ctchmts_ee);

Map.addLayer(landcover, {}, 'NLCD 2016')

In [7]:
raw_class_values = landcover_roi.get('landcover_class_values').getInfo()
print(raw_class_values)

n_classes = len(raw_class_values)
new_class_values = list(range(0, n_classes))

class_palette = landcover_roi.get('landcover_class_palette').getInfo()
print(class_palette)

nlcd = landcover_roi.remap(raw_class_values, new_class_values).select(['remapped'], ['landcover'])
nlcd = nlcd.set('landcover_class_values', new_class_values)
nlcd = nlcd.set('landcover_class_palette', class_palette)

Map.addLayer(nlcd, {}, 'NLCD')
Map


[11, 12, 21, 22, 23, 24, 31, 41, 42, 43, 51, 52, 71, 72, 73, 74, 81, 82, 90, 95]
['476ba1', 'd1defa', 'decaca', 'd99482', 'ee0000', 'ab0000', 'b3aea3', '68ab63', '1c6330', 'b5ca8f', 'a68c30', 'ccba7d', 'e3e3c2', 'caca78', '99c247', '78ae94', 'dcd93d', 'ab7028', 'bad9eb', '70a3ba']


Map(center=[40, -100], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=HBox(children=(T…

In [8]:
nlcd = nlcd.addBands(landcover)

## 4. Collect statistics for each catchment polygon

In [10]:
reduced_fc = nlcd.reduceRegions(
    reducer=ee.Reducer.mean(),
    collection=land_ctchmts_ee,
    scale=30)

In [11]:
nlcd_stats = os.path.join('nlcd_stats.csv')  

    
# statistics_type can be either 'SUM' or 'PERCENTAGE'
# denominator can be used to convert square meters to other areal units, such as square kilimeters
geemap.zonal_statistics_by_group(landcover, land_ctchmts_ee, nlcd_stats, statistics_type='PERCENTAGE', denominator=1000000, decimal_places=2)


Computing ... 
Generating URL ...
Downloading data from https://earthengine.googleapis.com/v1alpha/projects/earthengine-legacy/tables/13f13e93a45500fc50438e9720bbae84-45c3e1fcf7927a4ef13c49644abdf6d2:getFeatures
Please wait ...
Data downloaded to /Users/laurenkremer/Dropbox/cu_earthdata_certificate_2021/earthlab_project/data/data/nlcd_stats.csv


In [12]:
# Import file containing full catchment for each burned lake
# from local drive generated in 01_download_clip_merge.ipynb
file_path = os.path.join('nlcd_stats.csv')

# Geodataframe used in folium map
landcover_stats = pd.read_csv(file_path)

In [13]:
landcover_stats.dtypes
#df_image_red = df_image_red.astype({'landcover': 'object'}).dtypes

Class_71        float64
Class_82        float64
Class_52        float64
Class_31        float64
Class_42        float64
Class_21        float64
Class_11        float64
Class_sum       float64
Class_90        float64
system:index      int64
date             object
qa_sd           float64
TZID             object
nir_sd          float64
swir1           float64
swir2           float64
clouds          float64
source           object
type             object
qa                int64
red             float64
path              int64
nir             float64
blue_sd         float64
date_unity       object
system_ind        int64
id               object
landsat_id       object
row               int64
swir1_sd        float64
SiteID           object
green           float64
sat               int64
timediff        float64
endtime          object
chl_a           float64
pixelCount        int64
Hylak_id          int64
date_utc         object
swir2_sd        float64
blue            float64
date_only       

In [14]:
landcover_stats = landcover_stats.rename(columns={"Class_71": "Grassland/Herbaceous",
          "Class_82": "Cultivated_Crops", "Class_52": "Shrub/Scrub", "Class_31": "BarrenLand_Rock/Sand/Clay", "Class_42": "Evergreen Forest", "Class_21": "Developed_OpenSpace", "Class_11": "Open_water", "Class_90": "Woody_Wetlands"}, errors="raise")

In [15]:
landcover_stats.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 547 entries, 0 to 546
Data columns (total 46 columns):
 #   Column                     Non-Null Count  Dtype  
---  ------                     --------------  -----  
 0   Grassland/Herbaceous       547 non-null    float64
 1   Cultivated_Crops           547 non-null    float64
 2   Shrub/Scrub                547 non-null    float64
 3   BarrenLand_Rock/Sand/Clay  547 non-null    float64
 4   Evergreen Forest           547 non-null    float64
 5   Developed_OpenSpace        547 non-null    float64
 6   Open_water                 547 non-null    float64
 7   Class_sum                  547 non-null    float64
 8   Woody_Wetlands             547 non-null    float64
 9   system:index               547 non-null    int64  
 10  date                       547 non-null    object 
 11  qa_sd                      543 non-null    float64
 12  TZID                       547 non-null    object 
 13  nir_sd                     543 non-null    float64

In [16]:
# Export file to local drive
out_path = os.path.join("lake_landcover.csv")
landcover_stats.to_csv(out_path)