# AKSSF
## Purpose:  Identify RCAs for AKSSF Study Area that have already been generated via ACCS or NHD+ Beta
<br>
<a href="https://prd-tnm.s3.amazonaws.com/index.html?prefix=StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/">NHD+ BETA data are currently available here</a>
Hucs to download if available:

* 1902 - Cook Inlet currently available

1. Download and extract all <a href="https://www.usgs.gov/core-science-systems/ngp/national-hydrography"> NHDPLusCatchment polygons (catchments - see 3rd paragraph)</a> from NHDPlus feature dataset in HRNHDPlus Geodatabases.

2. Download all NHDPlus raster data for covariate calculations
    * Flow Accumulation
    * DEM
        * Aspect
        * Slope
        * Gradient
        * Elevation
    * Stream Grid

3. Collect and Merge catchments
    * There are a number of self intersections and data gaps along the HUC borders but this should not be a problem
    for our purposes

4. Collect and Merge flowlines

5. Collect and Merge waterbodies


# Overview map of Study Area
## Link to current <a href="https://arcg.is/arS8j">AKSSF Map</a> with study area divided by region and temperature data
![AKSSF_2021](https://user-images.githubusercontent.com/36055691/106055635-1a746d80-609a-11eb-927d-8771f1afa150.JPG)

# Create workspaces and begin Data Download
### Set Environments and import modules

In [3]:
# Import modules
import arcpy
import zipfile
import requests
import time

# Environmental Settings
arcpy.env.overwriteOutput = True
arcpy.env.outputCoordinateSystem = arcpy.SpatialReference("Alaska Albers Equal Area Conic")

In [1]:
### Make working directories and set variables
import os

path = os.getcwd()
print (path)

dirname = 'hydrography\\AKSSF_NHDPlus'
temp_dir = os.path.join(path,dirname)
ziploc = os.path.join(temp_dir, 'zips')
extractloc = os.path.join(temp_dir, 'extracts')
dl_readpath = os.path.join(temp_dir,"data_download_readme.txt")

if not os.path.exists(temp_dir):
    print("Creating output folders")
    os.makedirs(temp_dir)
    os.makedirs(ziploc)
    os.makedirs(extractloc)
    dl_read = open(dl_readpath, "w+")
    dl_read.write("Readme for NHDPLUS data dowload information")
    dl_read.close()
else:
    print('Output locations already exist')
    print('\t------------')

print ("1:",temp_dir)
print("2:",ziploc)
print("3:",extractloc)

C:\Users\dwmerrigan\Documents\GitHub\AKSSF
Creating output folders
1: C:\Users\dwmerrigan\Documents\GitHub\AKSSF\hydrography\AKSSF_NHDPlus
2: C:\Users\dwmerrigan\Documents\GitHub\AKSSF\hydrography\AKSSF_NHDPlus\zips
3: C:\Users\dwmerrigan\Documents\GitHub\AKSSF\hydrography\AKSSF_NHDPlus\extracts


### Create output gdb
**or change path and specify location to out gdb**

In [4]:
print('Creating output GDB')
outcheck = os.path.join(temp_dir,"AKSSF_temp.gdb")

if not os.path.exists(outcheck):
    tempgdb = arcpy.CreateFileGDB_management(temp_dir,"AKSSF_temp.gdb")
    print ('Output geodatabase created at',outcheck)
    outgdb = tempgdb.getOutput(0)
else:
    print ('Output location already exists')
    outgdb = outcheck

Creating output GDB
Output geodatabase created at C:\Users\dwmerrigan\Documents\GitHub\AKSSF\hydrography\AKSSF_NHDPlus\AKSSF_temp.gdb


# Download NHD+ Data
### urls are current as of 01/27/2021 may need to be updated prior to running again

In [5]:
#Create list of Alaska Specific NHD+ gdbs that are currently available

from datetime import datetime
today = datetime.now()
# Make the time stamp.
time_stamp = '{:%Y%m%d}'.format(today)
print(time_stamp)
# AKSSF NHDPlus Data
urls = ['https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_19020202_HU8_GDB.zip',
       'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_19020301_HU8_GDB.zip',
       'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_19020302_HU8_GDB.zip',
       'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_19020401_HU8_GDB.zip',
       'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_19020402_HU8_GDB.zip',
       'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_19020501_HU8_GDB.zip',
       'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_19020502_HU8_GDB.zip',
       'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_19020503_HU8_GDB.zip',
       'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_19020504_HU8_GDB.zip',
       'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_19020505_HU8_GDB.zip',
       'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_19020601_HU8_GDB.zip',
       'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_19020602_HU8_GDB.zip',
       'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_19020800_HU8_GDB.zip',
       'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_190200101_HU8_GDB.zip',
       'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_190200102_HU8_GDB.zip',
       'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_190200103_HU8_GDB.zip',
       'https://prd-tnm.s3.amazonaws.com/StagedProducts/Hydrography/NHDPlusHR/Beta/GDB/NHDPLUS_H_190200104_HU8_GDB.zip']

r_urls = []
for url in urls:
    rurl = str(url[:102]) + 'RASTER.7z'
    r_urls.append(rurl)

with open(dl_readpath, mode = "a", encoding='utf-8') as dl_read:
    dl_read.write('NHDPLUS data current as of ' + str(time_stamp)+ '\n')
    dl_read.write('NHDPLUS GDB links \n')
    for url in urls:
        dl_read.write(str(url))
        dl_read.write('\n')
dl_read.close()

with open(dl_readpath, mode = "a", encoding='utf-8') as dl_read:
    dl_read.write("NHDPlus Raster Data links\n")
    for r_url in r_urls:
        dl_read.write(str(r_url))
        dl_read.write('\n')
dl_read.close()

20210127


### Raster Data Downloads
 * Downloads will take some time
  * **Need to call 7 zip using subprocess to extract these files as zipfile will not read them**

In [6]:
import datetime

 # Start timing function
iteration_start = time.time()
print ('Begin raster file downloads')
print("\t---------")

# itereate through list and extract gdb name
for rurl in r_urls:
    name = rurl[89:]
    print (name)
    zippath = str(ziploc) + '/'+ str(name) #path to save download to plus name of download
    r= requests.get(rurl)
    if not os.path.exists(zippath):
        with open(zippath,'wb') as f:
            f.write(r.content)
            # Retrieve HTTP meta-datas
            print(r.status_code)
            print(r.headers['content-type'])
            print(r.encoding)
    else:
        print("Raster Data Already Downloaded")

# End timing
iteration_end = time.time()
iteration_elapsed = int(iteration_end - iteration_start)
iteration_success_time = datetime.datetime.now()
# Report success
print(f'Raster all data downloaded at {iteration_success_time.strftime("%Y-%m-%d %H:%M")} (Elapsed time: {datetime.timedelta(seconds=iteration_elapsed)})')
print('----------')
print("Raster Data Available")

Begin raster file downloads
	---------
19020202_HU8_RASTER.7z


KeyboardInterrupt: 

### Begin downloading NHD+ data
***Copied to <T:\Aquatic\AKSSF\NHDPlus_20201216> ***

In [7]:
import time
import datetime
print ('Begin file downloads')
#itereate through list and extract gdb name
start = datetime.datetime.now()  # wait a bit, then paste the next line
for url in urls:
    name = url[89:]
    print (name)
    zippath = str(ziploc) + '/'+ str(name) #path to save download to plus name of download
    r= requests.get(url)
    if not os.path.exists(zippath):
        with open(zippath,'wb') as f:
            f.write(r.content)

            # Retrieve HTTP meta-datas
            print(r.status_code)
            print(r.headers['content-type'])
            print(r.encoding)

print('')
print ('ALL DOWNLOADS COMPLETE')
stop = datetime.datetime.now()
elapsed = stop - start
print ('Time to complete = ',elapsed)

Begin file downloads
19020202_HU8_GDB.zip


KeyboardInterrupt: 

**Unzip all the gdbs to a new dir and delete (if required then uncomment line 202)**

In [None]:
os.chdir(ziploc)
ext = ".zip"
start = datetime.datetime.now()
for item in os.listdir(ziploc): # loop through items in dir
    if item.endswith(ext): # check for ".zip" extension
        file_name = os.path.abspath(item) # get full path of files
        zip_ref = zipfile.ZipFile(file_name) # create zipfile object
        zip_ref.extractall(extractloc) # extract file to dir
        zip_ref.close() # close file

        #os.remove(file_name) # delete zipped file if required

        print ('Unzipping..', file_name)
print('')
print ('Unzipping complete')
stop = datetime.datetime.now()
elapsed = stop - start
print ('Time to complete = ',elapsed)

### Call 7 zip using subprocess to extract 7z raster data as zipfile will not read them


In [None]:
start = datetime.datetime.now()
import subprocess
zext = '.7z'
for item in os.listdir(ziploc): # loop through items in dir
    if item.endswith(zext):
        file_name = os.path.abspath(item)
        print ('Unzipping ' ,file_name)
        subprocess.call(r'"C:\Program Files\7-Zip\7z.exe" x ' + file_name + ' -o' + extractloc)
print ('Finished extracting 7z file')
print('')
print ('Unzipping complete')
stop = datetime.datetime.now()
elapsed = stop - start
print ('Time to complete = ',elapsed)

# Begin GIS portion

**Walk through folder and gdbs to extract catchments and add/calc HUC8 identifier field**

**id needs to be identified from the end of string because naming convention is the same for all NHD+ gdbs but not  temp_dir**

In [8]:
arcpy.env.workspace = extractloc
gdbs = arcpy.ListWorkspaces()
catchments = []
start = datetime.datetime.now()
for gdb in gdbs:
    arcpy.env.workspace = gdb
    datasets = arcpy.ListDatasets(feature_type='feature')
    for ds in datasets:
        for fc in arcpy.ListFeatureClasses(feature_dataset=ds):
            if fc == "NHDPlusCatchment":
                catchpath = os.path.join(arcpy.env.workspace, ds, fc)

                hucid= 'NHDPlusHUC8'+str(catchpath[-46:-37])
                catchments.append(catchpath)

                arcpy.AddField_management(fc,'HUC8_ID', 'TEXT','','',255,'HUC8_ID', 'NULLABLE')#adding HUC 8 identifier

                fields = ['HUC8_ID']
                print ('Updating Rows of ' , hucid)
                with arcpy.da.UpdateCursor(fc,fields) as cur:
                    for row in cur:
                        row[0] = hucid

                        cur.updateRow(row)
                del row,cur
print ('')
print("UPDATES COMPLETE")
stop = datetime.datetime.now()
elapsed = stop - start
print ('Time to complete = ',elapsed)

Updating Rows of  NHDPlusHUC8_19020202
Updating Rows of  NHDPlusHUC8_19020301
Updating Rows of  NHDPlusHUC8_19020302
Updating Rows of  NHDPlusHUC8_19020401
Updating Rows of  NHDPlusHUC8_19020402
Updating Rows of  NHDPlusHUC8_19020501
Updating Rows of  NHDPlusHUC8_19020502
Updating Rows of  NHDPlusHUC8_19020503
Updating Rows of  NHDPlusHUC8_19020504
Updating Rows of  NHDPlusHUC8_19020505
Updating Rows of  NHDPlusHUC8_19020601
Updating Rows of  NHDPlusHUC8_19020602
Updating Rows of  NHDPlusHUC8_19020800

UPDATES COMPLETE
Time to complete =  0:00:39.833777


### Collect NHDPlus Data and Merge together

In [10]:
import arcpy
import os

arcpy.env.workspace = extractloc
gdbs = arcpy.ListWorkspaces()

# Create empty feature class containers
catchments = []
flowlines = []
waterbodies = []
start = datetime.datetime.now()
for gdb in gdbs:
    arcpy.env.workspace = gdb
    datasets = arcpy.ListDatasets(feature_type='feature')
    for ds in datasets:
        for fc in arcpy.ListFeatureClasses(feature_dataset=ds):
            if fc == "NHDFlowline":
                flowpath = os.path.join(arcpy.env.workspace, ds, fc)
                flowlines.append(flowpath)
            elif fc == "NHDWaterbody":
                waterpath = os.path.join(arcpy.env.workspace, ds, fc)
                waterbodies.append(waterpath)
            elif fc == "NHDPlusCatchment":
                catchpath = os.path.join(arcpy.env.workspace, ds, fc)
                catchments.append(catchpath)

arcpy.env.workspace = outgdb
start = datetime.datetime.now()
print("Merging flowlines")
print('\t----------')
# Merge flowlines
nhdflow_merge = arcpy.Merge_management(flowlines,'NHDPlusFlowlines_Merge')
print("Merging waterbodies")
print('\t----------')
# Merge Waterbodies
waterbody_merge = arcpy.Merge_management(waterbodies,'NHDPlusWaterbodies_Merge')
print("Merging catchments")
print('\t----------')
# Merge Catchments
catchments_merge = arcpy.Merge_management(catchments,'NHDPlusCatchments_Merge')
print ('')
print("ALL MERGES COMPLETE")
stop = datetime.datetime.now()
elapsed = stop - start
print ('Time to complete = ',elapsed)



Merging flowlines
	----------
Merging waterbodies
	----------
Merging waterbodies
	----------

MERGE COMPLETE
Time to complete =  0:01:15.329667


### Calculate Covariates
* Copy code from Deshka and add code for any new covariates
