# This python code builds ScienceBase items that house and describe specific versions of data files from the NHDPlusV2.1 that are being used in the Biogeographic Information System.  Data were extracted from ftp://ftp.horizon-systems.com/NHDplus/NHDPlusV21/ and stored within ScienceBase as attachments.  Although reorganized, the files stored in the ScienceBase Items were not altered. In future iterations of this code we would like to avoid using local disk space and operations that may be dependent on a local operating system.  

In [1]:
import pysb
import urllib
import os
import getpass
import time
import subprocess
from zipfile import ZipFile
import zipfile

In [2]:
#Downloads Files of Interest, The next few steps should be done within memory when we get a chance but didn't find a complete workflow of methods that would get us where we needed to be in memory
import urllib.request as ur
ur.urlretrieve('ftp://ftp.horizon-systems.com/NHDplus/NHDPlusV21/Data/NHDPlusMS/NHDPlus08/NHDPlusV21_MS_08_NHDPLusAttributes_07.7z', 'NHDPlusV21_MS_08_NHDPLusAttributes_07.7z')
ur.urlretrieve('ftp://ftp.horizon-systems.com/NHDplus/NHDPlusV21/Data/NHDPlusMS/NHDPlus08/NHDPlusV21_MS_08_NHDSnapshot_07.7z', 'NHDPlusV21_MS_08_NHDSnapshot_07.7z')
ur.urlretrieve('ftp://ftp.horizon-systems.com/NHDplus/NHDPlusV21/Data/NHDPlusMS/NHDPlus08/NHDPlusV21_MS_08_NHDPlusCatchment_01.7z', 'NHDPlusV21_MS_08_NHDPlusCatchment_01.7z')

('NHDPlusV21_MS_08_NHDPlusCatchment_01.7z',
 <email.message.Message at 0x1601ba05198>)

In [3]:
#This code isn't currently doing anything in the SB item creation, but eventually something like this could be used to track the "last update" of the NHD file being harvested.
import urllib.request
with urllib.request.urlopen('ftp://ftp.horizon-systems.com/NHDplus/NHDPlusV21/Data/NHDPlusMS/NHDPlus08/') as response:
    html = response.read()
    print (html)

b'dr--r----- 1 anonymous anonymous          512 Apr 07 16:03 .\r\ndr--r----- 1 anonymous anonymous          512 Apr 07 16:03 ..\r\n-r--r----- 1 anonymous anonymous       234446 Dec 19  2016 0release_notes_VPU08.pdf\r\n-r--r----- 1 anonymous anonymous      1287621 May 01  2015 NHDPlusV21_MS_08_03g_CatSeed_01.7z\r\n-r--r----- 1 anonymous anonymous     22724784 May 01  2015 NHDPlusV21_MS_08_03g_FdrFac_01.7z\r\n-r--r----- 1 anonymous anonymous      7197622 May 01  2015 NHDPlusV21_MS_08_03g_FdrNull_01.7z\r\n-r--r----- 1 anonymous anonymous       509111 May 01  2015 NHDPlusV21_MS_08_03g_FilledAreas_01.7z\r\n-r--r----- 1 anonymous anonymous     76553388 Jun 22  2012 NHDPlusV21_MS_08_03g_Hydrodem_01.7z\r\n-r--r----- 1 anonymous anonymous     95485514 Jun 22  2012 NHDPlusV21_MS_08_03g_NEDSnapshot_01.7z\r\n-r--r----- 1 anonymous anonymous      6500841 Jun 22  2012 NHDPlusV21_MS_08_08a_Catseed_01.7z\r\n-r--r----- 1 anonymous anonymous    158654575 Jun 22  2012 NHDPlusV21_MS_08_08a_FdrFac_01.7z\r\

In [4]:
#Unzips the 7z files.  This may only run on windows?
subprocess.call(r'"C:\Program Files\7-Zip\7z.exe" x ' + 'NHDPlusV21_MS_08_NHDPLusAttributes_07.7z' )
subprocess.call(r'"C:\Program Files\7-Zip\7z.exe" x ' + 'NHDPlusV21_MS_08_NHDSnapshot_07.7z' )
subprocess.call(r'"C:\Program Files\7-Zip\7z.exe" x ' + 'NHDPlusV21_MS_08_NHDPlusCatchment_01.7z' )

0

In [5]:
#Selects only the files we are using and zips them into 3 directories (using .zip).  The three folders include Hydrography, NHDPlusAttributes, and Catchment
dataTypes = ['Hydrography', 'NHDPlusAttributes', 'Catchment']
for fileType in dataTypes:
    z = ZipFile((fileType + '.zip'), 'w')
    if fileType == 'Hydrography':
        ZipFileList = ['NHDWaterbody.dbf','NHDWaterbody.prj','NHDWaterbody.shp','NHDWaterbody.shx','NHDFlowline.dbf','NHDFlowline.prj','NHDFlowline.shp','NHDFlowline.shx' ]
        for file in ZipFileList:
            procFile = ('NHDPlusMS/NHDPlus08/NHDSnapshot/Hydrography/' + file)
            z.write(procFile, file)
    elif fileType == 'NHDPlusAttributes':
        ZipFileList = ['elevslope.dbf','PlusFlow.dbf','PlusFlowLineVAA.dbf']
        for file in ZipFileList:
            procFile = ('NHDPlusMS/NHDPlus08/NHDPlusAttributes/' + file)
            z.write(procFile, file)
    elif fileType == 'Catchment':
        target_dir = r'NHDPlusMS\NHDPlus08\NHDPlusCatchment'
        CatZip = ZipFile('Catchment.zip', 'w', zipfile.ZIP_DEFLATED)
        rootlen = len(target_dir) + 1
        for base, dirs, files in os.walk(target_dir):
            for file in files:
                fn = os.path.join(base, file)
                CatZip.write(fn, fn[rootlen:])

In [6]:
#Create ScienceBase Item

loginu=input("Username: ")  #asks user for username
sb = pysb.SbSession()
sb.loginc(str(loginu))
time.sleep(2)

ret = sb.upload_files_and_create_item(sb.get_my_items_id(), ['Catchment.zip', 'Hydrography.zip', 'NHDPlusAttributes.zip'])
SbItem = ret['id']


Username: dwieferich@usgs.gov
········


In [7]:
print (SbItem)

5983ab56e4b0e2f5d4651f1a


In [8]:
#Variables to populate the metadata in the SB Item

#Acquisition Date
import datetime
dNow = datetime.datetime.now()
AcqDate = dNow.strftime("%Y-%m-%d")
#AcqDate = dNow.isoformat()



In [9]:
UpdateItem = {'id': SbItem,
              'title': 'NHDPlusV2.1 Processing Region 8; Files Used in the Biogeographic Information System',
              'body': 'A subset of files from within processing region 8 of the NHDPlus Version 2.1.  Although reorganized, the files within the attachments are unaltered from the NHDPlus Version 2.1 as they were acquired (see acquisition date listed within this metadata). This item links to python code used to generate the item.',
              'purpose': 'This item is intended to preseve specific versions of files being used in the Biogeographic Information System.',
              'dates': [{'type': 'Acquisition', 'dateString': AcqDate, 'label': 'Acquisition'}],
              'webLinks': [{"type":"sourceCode","typeLabel":"Source Code","uri":"https://github.com/dwief-usgs/BCB_Ipython_Notebooks/blob/master/NHDPlus21_Into_SB_For_BIS/Reg8_NHDPlusV21_IntoSB_BIS.ipynb","rel":"related","title":"Python Code Used to Develop and Populate This SB Item","hidden":False},{"type":"webLink","typeLabel":"Web Link","uri":"http://www.horizon-systems.com/NHDPlus/NHDPlusV2_home.php","rel":"related","title":"Additional Information About the NHDPlusV2","hidden":False}],
              'contacts': [{"name":"Horizon Systems","type":"Data Owner","contactType":"organization","onlineResource":"http://www.horizon-systems.com","organization":{},"primaryLocation":{"streetAddress":{},"mailAddress":{}}},{"name":"Daniel J Wieferich","oldPartyId":66431,"type":"Contact","contactType":"person","email":"dwieferich@usgs.gov","active":True,"jobTitle":"Physical Scientist","firstName":"Daniel","middleName":"J","lastName":"Wieferich","organization":{"displayText":"Biogeographic Characterization"},"primaryLocation":{"name":"CN=Daniel J Wieferich,OU=CSS,OU=Users,OU=OITS,OU=DI,DC=gs,DC=doi,DC=net - Primary Location","building":"DFC Bldg 810","buildingCode":"KBT","officePhone":"3032024594","faxPhone":"3032024710","streetAddress":{"line1":"W 6th Ave Kipling St","city":"Lakewood","state":"CO","zip":"80225"},"mailAddress":{}},"orcId":"0000-0003-1554-7992"}],
              'tags': [{"type":"Theme","scheme":"BIS","name":"NHDPlusV2.1"},{"type":"Theme","scheme":"BIS","name":"Reg8"}]
             }

updateItem = sb.updateSbItem(UpdateItem)

In [1]:
#Remove unneeded local copies of files
import shutil
import os

os.remove('Catchment.zip')
os.remove('Hydrography.zip')
os.remove('NHDPlusAttributes.zip')
os.remove('NHDPlusV21_MS_08_NHDPLusAttributes_07.7z')
os.remove('NHDPlusV21_MS_08_NHDPlusCatchment_01.7z')
os.remove('NHDPlusV21_MS_08_NHDSnapshot_07.7z')
shutil.rmtree('NHDPlusMS')