# MODIS snow metrics

MODIS snow metrics are downloaded as multi-band rasters, each band is a
metric and rasters are for years: 2001-2019.

Steps:
1. Create new raster by year filtered on band 10 cells equal to 32.
2. Use zonal statistics to calculate average lcld by watershed.


In [1]:
# 1.1 create dictionary with path and year for all modis rasters

import arcpy
import os
modis_folder = r"W:\GIS\MODIS_Snow_Metrics"
modis_rasters = {}

arcpy.env.workspace = modis_folder
rasters = arcpy.ListRasters()

#remove extra 2011 raster. not sure what it is.
rasters = [x for x in rasters if "_c.tif" not in x]
rasters = [x for x in rasters if "byyear.tif" not in x]
print(rasters)

for raster in rasters:
    # path = os.path.join(modis_folder, raster)
    year = raster[0:4]
    modis_rasters[year] = path

print(len(modis_rasters))
print(modis_rasters)

In [None]:
# 1.2 create new set of rasters after filtering on band 10 value
# for land and continuous snow cover

import arcpy, os
from arcpy.sa import *

arcpy.env.overwriteOutput = True

for key, value in modis_rasters.items():
    print("Creating band_5 mask for: " + key)
    #create mask by converting band_10 to 1/0
    band_10 = Raster(value + "/Band_10")
    mask1 = Con(band_10 == 32, 1)
    #extract band_5 to new raster using new mask
    extract1 = ExtractByMask(value + "/Band_5", mask1)
    extract1.save("W://GIS//MODIS_Snow_Metrics//LCLD_rasters//" + key + "_lcld_32.tif")

In [2]:
# 1.3 create dictionary with path and year for all lcld rasters

import arcpy
import os

lcld_folder = r'D:\\GIS_temp\\LCLD_rasters'
lcld_rasters = {}

arcpy.env.workspace = lcld_folder
raster_list = arcpy.ListRasters()

for raster in raster_list:
    desc = arcpy.Describe(raster)
    ext1 =desc.extent # get extent of flow dir
    xm = ext1.XMin
    ym = ext1.YMin
    xM = ext1.XMax
    yM = ext1.YMax

    print ("Flow Direction Raster Information")
    print("")
    print("Raster name:      %s" % desc.name)
    print("Projection:      %s" % desc.SpatialReference.name)
    print("Compression Type: %s" % desc.compressionType)
    print("Raster Format:    %s" % desc.format)
    print("Height: %d" % desc.height)
    print("Width:  %d" % desc.width)
    print("Cellsize:  %f" % desc.meanCellHeight)
    print("Integer Raster: %s" % desc.isInteger)
    #print("Raster stats: min = {:,.2f} max = {:,.2f} mean = {:,.2f}".format(desc.MIN, desc.Max, desc.Mean)
    print ("----------")

Flow Direction Raster Information

Raster name:      2001_lcld_32.tif
Projection:      Albers_Conic_Equal_Area
Compression Type: LZW
Raster Format:    TIFF
Height: 4391
Width:  7036
Cellsize:  500.000000
Integer Raster: True
----------
Flow Direction Raster Information

Raster name:      2002_lcld_32.tif
Projection:      Albers_Conic_Equal_Area
Compression Type: LZW
Raster Format:    TIFF
Height: 4391
Width:  7036
Cellsize:  500.000000
Integer Raster: True
----------
Flow Direction Raster Information

Raster name:      2003_lcld_32.tif
Projection:      Albers_Conic_Equal_Area
Compression Type: LZW
Raster Format:    TIFF
Height: 4391
Width:  7036
Cellsize:  500.000000
Integer Raster: True
----------
Flow Direction Raster Information

Raster name:      2004_lcld_32.tif
Projection:      Albers_Conic_Equal_Area
Compression Type: LZW
Raster Format:    TIFF
Height: 4391
Width:  7036
Cellsize:  500.000000
Integer Raster: True
----------
Flow Direction Raster Information

Raster name:      200

Step 2 get average lcld by watershed using zonal statistics as table
Note there is an error for very small watersheds that don't intersect cell center
Used try except to skip error, buffer by 150 meters and try again
MODIS cell size is 500 m so that should work
Adding fields for watershed number and year of MODIS snow metric
appending mean, watershed, and year to a table
deleting zonal stats table
converting everything to pandas data frame and exporting as .csv

## DM - Pick up on 08112021 -
Merge all AKSSF watersheds together (all regions) and loop over input lcld raster datasets to calculate mean.  Identify
small watersheds that did not intersect a cell center and as a result, did not processed for a given dataset and store
in a separate df with watershed identifier and missing lcld raster path.  Buffer missing watersheds by 150 meters (less?)
and run again.

In [3]:
import pandas as pd

data = []

for r in raster_list:
    rdesc = arcpy.Describe(r)
    ext = rdesc.extent
    cS = rdesc.meanCellHeight
    rn = rdesc.name
    spr = rdesc.SpatialReference.name
    isint = rdesc.isInteger
    rminr = arcpy.GetRasterProperties_management(r,"MINIMUM")
    rmin = float(rminr.getOutput(0))
    rmaxr = arcpy.GetRasterProperties_management(r,"MAXIMUM")
    rmax = float(rmaxr.getOutput(0))
    rmeanr = arcpy.GetRasterProperties_management(r,"MEAN")
    rmean = float(rmeanr.getOutput(0))
    colr = arcpy.GetRasterProperties_management(r,"COLUMNCOUNT")
    col = colr.getOutput(0)
    rowr = arcpy.GetRasterProperties_management(r,"ROWCOUNT")
    row = rowr.getOutput(0)
    #nodatr = arcpy.GetRasterProperties_management(r,"ANYNODATA")
    #nodat = nodatr.getOutput(0)
    data.append([rn, ext.XMin, ext.YMin, ext.XMax, ext.YMax, rmin, rmax, rmean, col, row, cS, isint, spr])

pd.options.display.float_format = '{:,.2f}'.format
df = pd.DataFrame(data=data, columns=["raster",'XMin','YMin','XMax','YMax','Min Value','Max Value',
                                      'Mean Value','Columns','Rows','Cell Size','Integer','Spatial Reference'])
df.set_index('raster')

Unnamed: 0_level_0,XMin,YMin,XMax,YMax,Min Value,Max Value,Mean Value,Columns,Rows,Cell Size,Integer,Spatial Reference
raster,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2001_lcld_32.tif,-1805447.82,374524.16,1712552.18,2570024.16,305.0,578.0,509.34,7036,4391,500.0,True,Albers_Conic_Equal_Area
2002_lcld_32.tif,-1805447.82,374524.16,1712552.18,2570024.16,232.0,577.0,505.1,7036,4391,500.0,True,Albers_Conic_Equal_Area
2003_lcld_32.tif,-1805447.82,374524.16,1712552.18,2570024.16,261.0,577.0,497.88,7036,4391,500.0,True,Albers_Conic_Equal_Area
2004_lcld_32.tif,-1805447.82,374524.16,1712552.18,2570024.16,270.0,578.0,501.34,7036,4391,500.0,True,Albers_Conic_Equal_Area
2005_lcld_32.tif,-1805447.82,374524.16,1712552.18,2570024.16,261.0,578.0,500.98,7036,4391,500.0,True,Albers_Conic_Equal_Area
2006_lcld_32.tif,-1805447.82,374524.16,1712552.18,2570024.16,289.0,577.0,504.93,7036,4391,500.0,True,Albers_Conic_Equal_Area
2007_lcld_32.tif,-1805447.82,374524.16,1712552.18,2570024.16,302.0,577.0,501.58,7036,4391,500.0,True,Albers_Conic_Equal_Area
2008_lcld_32.tif,-1805447.82,374524.16,1712552.18,2570024.16,285.0,578.0,505.48,7036,4391,500.0,True,Albers_Conic_Equal_Area
2009_lcld_32.tif,-1805447.82,374524.16,1712552.18,2570024.16,256.0,578.0,504.58,7036,4391,500.0,True,Albers_Conic_Equal_Area
2010_lcld_32.tif,-1805447.82,374524.16,1712552.18,2570024.16,252.0,577.0,496.76,7036,4391,500.0,True,Albers_Conic_Equal_Area


In [4]:
# Function to add key, value pairs to dictionary
def append_value(dict_obj, key, value):
    # Check if key exist in dict or not
    if key in dict_obj:
        # Key exist in dict.
        # Check if type of value of key is list or not
        if not isinstance(dict_obj[key], list):
            # If type is not list then make it list
            dict_obj[key] = [dict_obj[key]]
        # Append the value in list
        dict_obj[key].append(value)
    else:
        # As key is not in dict,
        # so, add key-value pair
        dict_obj[key] = value

In [10]:
import arcpy, os, sys, time, datetime
from arcpy.sa import *
# Start timing function
processStart = time.time()
processStartdt = datetime.datetime.now()
# List to store zonal stats tables
lcld_Ztables = []
# Dictionary to store watersheds missed in zonal stats and the associated raster dataset
miss_stats = {}
# Merged watersheds created from spatial join script
akssf_wtds = r"D:\\GIS_temp\\AKSSF_land_met\\AKSSF_land_met.gdb\\all_akssf_wtds"
# Remove 2 smallest sites that are missed and buffer by 150 meters, merge back with the rest of the sites and use as input.
smallwtd_lyr = arcpy.management.SelectLayerByAttribute(akssf_wtds, "NEW_SELECTION", "cat_ID_con = 'Copper_River_75003900055694' Or cat_ID_con = 'Bristol_Bay_4000885'", None)
wtds = arcpy.management.SelectLayerByAttribute(akssf_wtds, "SWITCH_SELECTION")
small_buff = arcpy.Buffer_analysis(smallwtd_lyr,r'memory\small_buff',150)
modis_wtds = arcpy.Merge_management([wtds, small_buff],r'memory\modis_wtds')
outgdb = r"D:\\GIS_temp\\AKSSF_land_met\\AKSSF_land_met.gdb\\"

arcpy.env.overwriteOutput = True
walk = arcpy.da.Walk(lcld_folder, datatype='RasterDataset')
for dirpath, dirnames, filenames in walk:
    for filename in filenames:
        raspath = os.path.join(dirpath, filename)
        year = filename[0:4]
        lcld_outname = 'lcld_'+str(year)+'_zStats'
        lcld_outpath = os.path.join(outgdb, lcld_outname)
        print(f'Year: {year} - raster path {raspath}')
        colname = 'wtd_lcld_mn_' + str(year)
        # lcld zonal statistics as table for all akssf watersheds
        print(f'Calculating {filename} zonal stats for all AKSSF watersheds...')
        #arcpy.env.snapRaster = raspath
        #arcpy.env.cellSize = raspath
        try:
            # Begin Zonal Stats
            zstat_start = time.time()
            print(f'Begin zonal stats for {filename}')
            lcld_table = ZonalStatisticsAsTable(in_zone_data = modis_wtds,
                                                            zone_field = 'cat_ID_con',
                                                            in_value_raster = raspath,
                                                            out_table = lcld_outpath,
                                                            statistics_type='MEAN'
                                                            )
            # Append zTable to table list
            lcld_Ztables.append(lcld_outpath)
            arcpy.AlterField_management(lcld_table,'MEAN', colname,colname)
            proc_list = [row[0] for row in arcpy.da.SearchCursor(lcld_table,'cat_ID_con')]
            zstat_stop = time.time()
            zstat_time = int (zstat_stop - zstat_start)
            print(f'Zonal Stats for {filename} Elapsed time: ({datetime.timedelta(seconds=zstat_time)})')
            # Code to identify missing watersheds
            with arcpy.da.SearchCursor(akssf_wtds,'cat_ID_con') as cur:
                for row in cur:
                    if row[0] not in proc_list:
                        print (f' Watershed {row[0]} not processed for {filename}')
                        append_value(miss_stats,filename,row[0])
                del(row)
            del(cur)
            print('----------')

        except:
            e = sys.exc_info()[1]
            print(e.args[0])
            arcpy.AddError(e.args[0])

# End timing
processEnd = time.time()
processElapsed = int(processEnd - processStart)
processSuccess_time = datetime.datetime.now()

# Report success
print(f'Process completed at {processSuccess_time.strftime("%Y-%m-%d %H:%M")} '
      f'(Elapsed time: {datetime.timedelta(seconds=processElapsed)})')
print('----------')

Year: 2001 - raster path D:\\GIS_temp\\LCLD_rasters\2001_lcld_32.tif
Calculating 2001_lcld_32.tif zonal stats for all AKSSF watersheds...
Begin zonal stats for 2001_lcld_32.tif
Zonal Stats for 2001_lcld_32.tif Elapsed time: (0:00:03)
----------
Year: 2002 - raster path D:\\GIS_temp\\LCLD_rasters\2002_lcld_32.tif
Calculating 2002_lcld_32.tif zonal stats for all AKSSF watersheds...
Begin zonal stats for 2002_lcld_32.tif
Zonal Stats for 2002_lcld_32.tif Elapsed time: (0:00:03)
----------
Year: 2003 - raster path D:\\GIS_temp\\LCLD_rasters\2003_lcld_32.tif
Calculating 2003_lcld_32.tif zonal stats for all AKSSF watersheds...
Begin zonal stats for 2003_lcld_32.tif
Zonal Stats for 2003_lcld_32.tif Elapsed time: (0:00:03)
----------
Year: 2004 - raster path D:\\GIS_temp\\LCLD_rasters\2004_lcld_32.tif
Calculating 2004_lcld_32.tif zonal stats for all AKSSF watersheds...
Begin zonal stats for 2004_lcld_32.tif
Zonal Stats for 2004_lcld_32.tif Elapsed time: (0:00:02)
----------
Year: 2005 - raster 

# Convert to Pandas df and merge tables/drop unnecessary columns.

In [None]:
# create df for all tables in table list

In [19]:
# Check that no watersheds were missed after buffer
dfs = []
for table in lcld_Ztables:
    tblname = table[-16:]
    print(tblname)

    # Make df
    z_df = pd.DataFrame()
    z_field_list = []
    for field in arcpy.ListFields(table):
        z_field_list.append(field.name)
        #print(f'{field.name}')
        z_arr = arcpy.da.TableToNumPyArray(table, z_field_list)
        z_df = pd.DataFrame(z_arr)
        z_df = z_df.drop('OBJECTID',axis=1)
        # z_df = z_df.set_index('cat_ID_con')
        dfs.append(z_df)

lcld_2001_zStats
OBJECTID
cat_ID_con
ZONE_CODE
COUNT
AREA
wtd_lcld_mn_2001
lcld_2002_zStats
OBJECTID
cat_ID_con
ZONE_CODE
COUNT
AREA
wtd_lcld_mn_2002
lcld_2003_zStats
OBJECTID
cat_ID_con
ZONE_CODE
COUNT
AREA
wtd_lcld_mn_2003
lcld_2004_zStats
OBJECTID
cat_ID_con
ZONE_CODE
COUNT
AREA
wtd_lcld_mn_2004
lcld_2005_zStats
OBJECTID
cat_ID_con
ZONE_CODE
COUNT
AREA
wtd_lcld_mn_2005
lcld_2006_zStats
OBJECTID
cat_ID_con
ZONE_CODE
COUNT
AREA
wtd_lcld_mn_2006
lcld_2007_zStats
OBJECTID
cat_ID_con
ZONE_CODE
COUNT
AREA
wtd_lcld_mn_2007
lcld_2008_zStats
OBJECTID
cat_ID_con
ZONE_CODE
COUNT
AREA
wtd_lcld_mn_2008
lcld_2009_zStats
OBJECTID
cat_ID_con
ZONE_CODE
COUNT
AREA
wtd_lcld_mn_2009
lcld_2010_zStats
OBJECTID
cat_ID_con
ZONE_CODE
COUNT
AREA
wtd_lcld_mn_2010
lcld_2011_zStats
OBJECTID
cat_ID_con
ZONE_CODE
COUNT
AREA
wtd_lcld_mn_2011
lcld_2012_zStats
OBJECTID
cat_ID_con
ZONE_CODE
COUNT
AREA
wtd_lcld_mn_2012
lcld_2013_zStats
OBJECTID
cat_ID_con
ZONE_CODE
COUNT
AREA
wtd_lcld_mn_2013
lcld_2014_zStats
OBJECTID

In [31]:
dfs = []
for table in lcld_Ztables:
    tblname = table[-16:]
    print(tblname)
    dfname = tblname + '_arr'
    # Make df
    dfname = pd.DataFrame()
    lcld_field_list = []
    for field in arcpy.ListFields(table):
        lcld_field_list.append(field.name)
        #print(f'{field.name}')
    lcld_arr = arcpy.da.TableToNumPyArray(table, lcld_field_list)
    dfname = pd.DataFrame(lcld_arr)
    dfname = dfname.drop(['OBJECTID','ZONE_CODE', 'AREA', 'COUNT'],axis=1)
    dfname = dfname.set_index('cat_ID_con')
    dfs.append(dfname)

# Merge all data frames together
import numpy as np
from functools import reduce
lcld_df = reduce(lambda left,right: pd.merge(left,right,on='cat_ID_con',how="outer"), dfs)
lcld_df

lcld_2001_zStats
lcld_2002_zStats
lcld_2003_zStats
lcld_2004_zStats
lcld_2005_zStats
lcld_2006_zStats
lcld_2007_zStats
lcld_2008_zStats
lcld_2009_zStats
lcld_2010_zStats
lcld_2011_zStats
lcld_2012_zStats
lcld_2013_zStats
lcld_2014_zStats
lcld_2015_zStats
lcld_2016_zStats
lcld_2017_zStats
lcld_2018_zStats
lcld_2019_zStats


Unnamed: 0_level_0,wtd_lcld_mn_2001,wtd_lcld_mn_2002,wtd_lcld_mn_2003,wtd_lcld_mn_2004,wtd_lcld_mn_2005,wtd_lcld_mn_2006,wtd_lcld_mn_2007,wtd_lcld_mn_2008,wtd_lcld_mn_2009,wtd_lcld_mn_2010,wtd_lcld_mn_2011,wtd_lcld_mn_2012,wtd_lcld_mn_2013,wtd_lcld_mn_2014,wtd_lcld_mn_2015,wtd_lcld_mn_2016,wtd_lcld_mn_2017,wtd_lcld_mn_2018,wtd_lcld_mn_2019
cat_ID_con,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
Cook_Inlet_75004200007057,517.18,505.81,499.56,510.20,498.50,509.47,502.24,533.69,501.80,519.13,556.59,528.00,517.67,489.62,501.38,511.06,509.69,504.67,504.44
Cook_Inlet_75004300006312,489.49,488.91,428.90,475.63,474.72,486.84,467.09,489.58,469.48,486.15,730.00,488.76,494.26,462.85,418.14,456.46,471.71,467.68,467.13
Cook_Inlet_75004300001906,505.05,496.20,457.75,489.20,471.84,494.89,485.62,492.69,474.01,495.96,597.32,496.03,500.62,458.37,441.12,469.47,479.20,468.60,477.02
Cook_Inlet_75004300000100,478.16,485.19,417.08,473.74,464.30,481.86,464.53,484.58,467.03,481.16,730.00,490.23,489.97,457.87,407.09,435.60,465.68,459.29,458.41
Cook_Inlet_75004300004983,516.52,504.37,472.77,502.55,492.26,506.11,507.52,515.32,491.74,518.45,575.39,515.79,508.53,483.56,471.16,485.77,499.03,489.81,492.50
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Prince_William_Sound_43933,535.88,518.67,516.48,522.22,513.47,530.91,518.16,535.95,520.98,523.09,463.38,544.10,535.58,507.51,508.89,508.61,512.41,527.41,503.06
Prince_William_Sound_43973,516.43,508.38,488.68,508.22,494.92,516.78,502.38,514.60,506.17,511.72,548.59,530.82,525.48,488.29,487.51,479.77,493.83,503.86,482.63
Prince_William_Sound_44553,566.03,556.64,549.08,549.82,548.11,560.26,556.17,564.60,551.56,559.05,439.07,568.74,558.21,541.25,541.09,545.18,547.73,546.73,543.40
Prince_William_Sound_44623,521.31,517.54,500.27,510.78,505.13,512.96,514.97,525.94,506.22,516.12,481.00,544.99,528.42,499.57,495.92,494.14,509.45,512.58,499.17


### Export merged lcld df to csv

In [34]:
# Export merged dataframe to csv
outdir = r"C:\Users\dwmerrigan\Documents\GitHub\AKSSF\data_preparation\sensitivity_drivers"
lcld_csv_out = os.path.join(outdir,'AKSSF_wtd_lcld_mn.csv')
lcld_df.to_csv(lcld_csv_out, encoding = 'utf-8')
print('Export dataframe to csv complete')

Export dataframe to csv complete
