In [1]:
!pip install arcgis --quiet
!pip install ipython --quiet
!pip install ipywidgets --quiet!pip install openpyxl --quiet

## Setup supporting packages

In [2]:
from IPython.display import display, HTML;

from arcgis.gis import GIS;
from arcgis.features import FeatureLayer,FeatureSet;
from arcgis.geocoding import geocode;

import requests,json;

## Load all HUC12s from 1408

In [3]:
huc4 = '1408' # Upper San Juan

wbd_fl = FeatureLayer(
    'https://watersgeo.epa.gov/arcgis/rest/services/Support/HydrologicUnits/MapServer/6'
);

wbd_rez = wbd_fl.query(
    where = "SUBSTRING(HUC12,1,4) = '" + huc4 + "'"
   ,out_fields = 'HUC12,NAME'
   ,return_geometry = False
   ,orderByFields = 'HUC12'
);

wbd_rez.sdf.head()

Unnamed: 0,HUC12,NAME,OBJECTID
0,140801010101,Headwaters East Fork of the San Juan River,37600
1,140801010102,Quartz Creek,37669
2,140801010103,Sand Creek,37670
3,140801010104,The Clam Shell-East Fork San Juan River,37599
4,140801010201,Beaver Creek,36640


## Query ATTAINS by HUC12

In [4]:
wbd_df1 = wbd_rez.sdf.drop(columns="OBJECTID");
wbd_df1 = wbd_df1.rename(columns={"NAME": "Name"});

def pull_attains(huc12):
    response = requests.get(
        'https://attains.epa.gov/attains-public/api/huc12summary?huc=' + huc12
    );
    strip = response.json();
    
    return strip['items'][0];

wbd_df1['ATTAINS_RESULTS'] = wbd_df1['HUC12'].apply(pull_attains);

wbd_df1.head()

Unnamed: 0,HUC12,Name,ATTAINS_RESULTS
0,140801010101,Headwaters East Fork of the San Juan River,"{'huc12': '140801010101', 'assessmentUnitCount..."
1,140801010102,Quartz Creek,"{'huc12': '140801010102', 'assessmentUnitCount..."
2,140801010103,Sand Creek,"{'huc12': '140801010103', 'assessmentUnitCount..."
3,140801010104,The Clam Shell-East Fork San Juan River,"{'huc12': '140801010104', 'assessmentUnitCount..."
4,140801010201,Beaver Creek,"{'huc12': '140801010201', 'assessmentUnitCount..."


## Skim off the HUC12 header data

In [5]:
wbd_df2 = wbd_df1;

wbd_df2['assessmentUnitCount']                       = wbd_df2['ATTAINS_RESULTS'].apply(lambda x: x['assessmentUnitCount']);
wbd_df2['totalCatchmentAreaSqMi']                    = wbd_df2['ATTAINS_RESULTS'].apply(lambda x: x['totalCatchmentAreaSqMi']);
wbd_df2['assessedCatchmentAreaSqMi']                 = wbd_df2['ATTAINS_RESULTS'].apply(lambda x: x['assessedCatchmentAreaSqMi']);
wbd_df2['assessedCatchmentAreaPercent']              = wbd_df2['ATTAINS_RESULTS'].apply(lambda x: x['assessedCatchmentAreaPercent']);
wbd_df2['assessedGoodCatchmentAreaSqMi']             = wbd_df2['ATTAINS_RESULTS'].apply(lambda x: x['assessedGoodCatchmentAreaSqMi']);
wbd_df2['assessedGoodCatchmentAreaPercent']          = wbd_df2['ATTAINS_RESULTS'].apply(lambda x: x['assessedGoodCatchmentAreaPercent']);
wbd_df2['assessedUnknownCatchmentAreaSqMi']          = wbd_df2['ATTAINS_RESULTS'].apply(lambda x: x['assessedUnknownCatchmentAreaSqMi']);
wbd_df2['assessedUnknownCatchmentAreaPercent']       = wbd_df2['ATTAINS_RESULTS'].apply(lambda x: x['assessedUnknownCatchmentAreaPercent']);
wbd_df2['containImpairedWatersCatchmentAreaSqMi']    = wbd_df2['ATTAINS_RESULTS'].apply(lambda x: x['containImpairedWatersCatchmentAreaSqMi']);
wbd_df2['containImpairedWatersCatchmentAreaPercent'] = wbd_df2['ATTAINS_RESULTS'].apply(lambda x: x['containImpairedWatersCatchmentAreaPercent']);
wbd_df2['containRestorationCatchmentAreaSqMi']       = wbd_df2['ATTAINS_RESULTS'].apply(lambda x: x['containRestorationCatchmentAreaSqMi']);
wbd_df2['containRestorationCatchmentAreaPercent']    = wbd_df2['ATTAINS_RESULTS'].apply(lambda x: x['containRestorationCatchmentAreaPercent']);

wbd_df2.head()

Unnamed: 0,HUC12,Name,ATTAINS_RESULTS,assessmentUnitCount,totalCatchmentAreaSqMi,assessedCatchmentAreaSqMi,assessedCatchmentAreaPercent,assessedGoodCatchmentAreaSqMi,assessedGoodCatchmentAreaPercent,assessedUnknownCatchmentAreaSqMi,assessedUnknownCatchmentAreaPercent,containImpairedWatersCatchmentAreaSqMi,containImpairedWatersCatchmentAreaPercent,containRestorationCatchmentAreaSqMi,containRestorationCatchmentAreaPercent
0,140801010101,Headwaters East Fork of the San Juan River,"{'huc12': '140801010101', 'assessmentUnitCount...",3,17.088,17.086,99.987,15.957,93.38,1.129,6.607,0.0,0.0,0.0,0.0
1,140801010102,Quartz Creek,"{'huc12': '140801010102', 'assessmentUnitCount...",3,19.675,18.866,95.887,18.648,94.779,0.735,3.736,0.0,0.0,0.0,0.0
2,140801010103,Sand Creek,"{'huc12': '140801010103', 'assessmentUnitCount...",1,18.214,18.216,100.008,18.216,100.008,0.0,0.0,0.0,0.0,0.0,0.0
3,140801010104,The Clam Shell-East Fork San Juan River,"{'huc12': '140801010104', 'assessmentUnitCount...",3,35.945,35.945,99.999,35.63,99.123,0.315,0.876,0.0,0.0,0.0,0.0
4,140801010201,Beaver Creek,"{'huc12': '140801010201', 'assessmentUnitCount...",1,16.113,16.113,99.999,16.113,99.999,0.0,0.0,0.0,0.0,0.0,0.0


## Collect Category Values

In [15]:
ircats = [];
uses   = [];
usegroups = [];
attainments = [];

for row in wbd_df2.itertuples():
    if 'summaryByIRCategory' in row.ATTAINS_RESULTS:
        for item in row.ATTAINS_RESULTS['summaryByIRCategory']:
            if item['epaIRCategoryName'] not in ircats:
                ircats.append(item['epaIRCategoryName']);
                
    if 'summaryByUseGroup' in row.ATTAINS_RESULTS:
        for item in row.ATTAINS_RESULTS['summaryByUseGroup']:
            if item['useGroupName'] not in usegroups:
                usegroups.append(item['useGroupName']);
    
    if 'summaryByUse' in row.ATTAINS_RESULTS:
        for item in row.ATTAINS_RESULTS['summaryByUse']:
            if item['useName'] not in uses:
                uses.append(item['useName']);
                
            if 'useAttainmentSummary' in item:
                for smy in item['useAttainmentSummary']:
                    if smy['useAttainment'] not in attainments:
                        attainments.append(smy['useAttainment']);

ircats = sorted(ircats)
uses   = sorted(uses)
usegroups = sorted(usegroups)
attainments = sorted(attainments)


## Generate IRCategory Data Frame

In [7]:
import pandas as pd, numpy as np

wbd_df3 = wbd_df2;

def fetch_ircat(inp,val):
    if 'summaryByIRCategory' in inp:
        for item in inp['summaryByIRCategory']:
            if item['epaIRCategoryName'] == val:
                return(
                     item['catchmentSizeSqMi']
                    ,item['catchmentSizePercent']
                    ,item['assessmentUnitCount']
                );
    return (None,None,None);

irCatdf = pd.DataFrame(columns=['HUC12','IRCat','catchmentSizeSqMi','catchmentSizePercent','assessmentUnitCount'])

for i, row in wbd_df2.iterrows():
    
    for ircat in ircats:
        s = fetch_ircat(row['ATTAINS_RESULTS'],ircat);
        
        if s[0] is not None:
            i = irCatdf.index.max();
            if np.isnan(i):
                i = -1;
            irCatdf.loc[i + 1] = (row['HUC12'],ircat,s[0],s[1],s[2])

irCatdf.head()


Unnamed: 0,HUC12,IRCat,catchmentSizeSqMi,catchmentSizePercent,assessmentUnitCount
0,140801010101,1,15.957,93.38,1
1,140801010101,2,2.873,16.813,1
2,140801010101,3,1.129,6.607,1
3,140801010102,1,12.645,64.268,1
4,140801010102,2,7.956,40.436,1


## Generate Use Group Data Frame

In [17]:
def fetch_usegroup(inp,usegroup,attainment):
    if 'summaryByUseGroup' in inp:
        for item in inp['summaryByUseGroup']:
            if item['useGroupName'] == usegroup:
                for att in item['useAttainmentSummary']:
                    if att['useAttainment'] == attainment:
                        return (
                             att['catchmentSizeSqMi']
                            ,att['catchmentSizePercent']
                            ,att['assessmentUnitCount']
                        );
    return (None,None,None);

useGroupdf = pd.DataFrame(columns=['HUC12','UseGroup','Attainment','catchmentSizeSqMi','catchmentSizePercent','assessmentUnitCount'])

for i, row in wbd_df2.iterrows():
        
    for usegroup in usegroups:
        for attainment in attainments:
            s = fetch_usegroup(row['ATTAINS_RESULTS'],usegroup,attainment);
            
            if s[0] is not None:
                i = useGroupdf.index.max();
                if np.isnan(i):
                    i = -1;
                useGroupdf.loc[i + 1] = (row['HUC12'],usegroup,attainment,s[0],s[1],s[2])
            
useGroupdf.head()

Unnamed: 0,HUC12,UseGroup,Attainment,catchmentSizeSqMi,catchmentSizePercent,assessmentUnitCount
0,140801010101,DRINKINGWATER_USE,Fully Supporting,15.957,93.38,2
1,140801010101,DRINKINGWATER_USE,Not Assessed,1.129,6.607,1
2,140801010101,ECOLOGICAL_USE,Fully Supporting,15.957,93.38,1
3,140801010101,ECOLOGICAL_USE,Insufficient Information,2.873,16.813,1
4,140801010101,ECOLOGICAL_USE,Not Assessed,1.129,6.607,1


In [20]:
def fetch_use(inp,use,attainment):
    if 'summaryByUse' in inp:
        for item in inp['summaryByUse']:
            if item['useName'] == use:
                ug = item['useGroupName']
                for att in item['useAttainmentSummary']:
                    if att['useAttainment'] == attainment:
                        return (
                             ug
                            ,att['catchmentSizeSqMi']
                            ,att['catchmentSizePercent']
                            ,att['assessmentUnitCount']
                        );
    return (None,None,None,None);

usedf = pd.DataFrame(columns=['HUC12','UseGroup','Use','Attainment','catchmentSizeSqMi','catchmentSizePercent','assessmentUnitCount'])

for i, row in wbd_df2.iterrows():
        
    for use in uses:
        for attainment in attainments:
            s = fetch_use(row['ATTAINS_RESULTS'],use,attainment);
            
            if s[0] is not None:
                i = usedf.index.max();
                if np.isnan(i):
                    i = -1;
                usedf.loc[i + 1] = (row['HUC12'],s[0],use,attainment,s[1],s[2],s[3])
            
usedf.head()

Unnamed: 0,HUC12,UseGroup,Use,Attainment,catchmentSizeSqMi,catchmentSizePercent,assessmentUnitCount
0,140801010101,OTHER_USE,Agriculture,Fully Supporting,15.957,93.38,2
1,140801010101,OTHER_USE,Agriculture,Not Assessed,1.129,6.607,1
2,140801010101,ECOLOGICAL_USE,Aquatic Life Cold Water-Class 1,Fully Supporting,15.957,93.38,1
3,140801010101,ECOLOGICAL_USE,Aquatic Life Cold Water-Class 1,Insufficient Information,2.873,16.813,1
4,140801010101,ECOLOGICAL_USE,Aquatic Life Cold Water-Class 1,Not Assessed,1.129,6.607,1


## Tidy up the Data Frame and Export Product

In [27]:
wbd_df5 = wbd_df2.drop(columns="ATTAINS_RESULTS");

writer = pd.ExcelWriter("use_" + huc4 + ".xlsx");

wbd_df5.to_excel(writer,sheet_name = "HUCs",index = False);
irCatdf.to_excel(writer,sheet_name = "IRCat",index = False);
useGroupdf.to_excel(writer,sheet_name = "Use Groups",index = False);
usedf.to_excel(writer,sheet_name = "Uses",index = False);

writer.save()
