# This python code builds ScienceBase items that house and describe specific versions of data files from the NHDPlusV2.1 that are being used in the Biogeographic Information System.  Data were extracted from ftp://ftp.horizon-systems.com/NHDplus/NHDPlusV21/ and stored within ScienceBase as attachments.  Although reorganized, the files stored in the ScienceBase Items were not altered. In future iterations of this code we would like to avoid using local disk space and operations that may be dependent on a local operating system.  

In [1]:
import pysb
import urllib
import os
import getpass
import time
import subprocess
from zipfile import ZipFile
import zipfile

In [2]:
#Downloads Files of Interest, The next few steps should be done within memory when we get a chance but didn't find a complete workflow of methods that would get us where we needed to be in memory
import urllib.request as ur
ur.urlretrieve('ftp://ftp.horizon-systems.com/NHDplus/NHDPlusV21/Data/NHDPlusSA/NHDPlus03S/NHDPlusV21_SA_03S_NHDPLusAttributes_06.7z', 'NHDPlusV21_SA_03S_NHDPLusAttributes_06.7z')
ur.urlretrieve('ftp://ftp.horizon-systems.com/NHDplus/NHDPlusV21/Data/NHDPlusSA/NHDPlus03S/NHDPlusV21_SA_03S_NHDSnapshot_06.7z', 'NHDPlusV21_SA_03S_NHDSnapshot_06.7z')
ur.urlretrieve('ftp://ftp.horizon-systems.com/NHDplus/NHDPlusV21/Data/NHDPlusSA/NHDPlus03S/NHDPlusV21_SA_03S_NHDPlusCatchment_01.7z', 'NHDPlusV21_SA_03S_NHDPlusCatchment_01.7z')

('NHDPlusV21_SA_03S_NHDPlusCatchment_01.7z',
 <email.message.Message at 0x246988c24e0>)

In [None]:
#This code isn't currently doing anything in the SB item creation, but eventually something like this could be used to track the "last update" of the NHD file being harvested.
import urllib.request
with urllib.request.urlopen('ftp://ftp.horizon-systems.com/NHDplus/NHDPlusV21/Data/NHDPlusSA') as response:
    html = response.read()
    print (html)

In [3]:
#Unzips the 7z files.  This may only run on windows?
subprocess.call(r'"C:\Program Files\7-Zip\7z.exe" x ' + 'NHDPlusV21_SA_03S_NHDPLusAttributes_06.7z' )
subprocess.call(r'"C:\Program Files\7-Zip\7z.exe" x ' + 'NHDPlusV21_SA_03S_NHDSnapshot_06.7z' )
subprocess.call(r'"C:\Program Files\7-Zip\7z.exe" x ' + 'NHDPlusV21_SA_03S_NHDPlusCatchment_01.7z' )

0

In [4]:
#Selects only the files we are using and zips them into 3 directories (using .zip).  The three folders include Hydrography, NHDPlusAttributes, and Catchment
dataTypes = ['Hydrography', 'NHDPlusAttributes', 'Catchment']
for fileType in dataTypes:
        if fileType == 'Hydrography':
            z = ZipFile((fileType + '.zip'), 'w')
            ZipFileList = ['NHDWaterbody.dbf','NHDWaterbody.prj','NHDWaterbody.shp','NHDWaterbody.shx','NHDFlowline.dbf','NHDFlowline.prj','NHDFlowline.shp','NHDFlowline.shx' ]
            for file in ZipFileList:
                procFile = ('NHDPlusSA/NHDPlus03S/NHDSnapshot/Hydrography/' + file)
                z.write(procFile, file)
        elif fileType == 'NHDPlusAttributes':
            z = ZipFile((fileType + '.zip'), 'w')
            ZipFileList = ['elevslope.dbf','PlusFlow.dbf','PlusFlowLineVAA.dbf']
            for file in ZipFileList:
                procFile = ('NHDPlusSA/NHDPlus03S/NHDPlusAttributes/' + file)
                z.write(procFile, file)
        elif fileType == 'Catchment':
            target_dir = r'NHDPlusSA\NHDPlus03S\NHDPlusCatchment'
            CatZip = ZipFile('Catchment.zip', 'w', zipfile.ZIP_DEFLATED)
            rootlen = len(target_dir) + 1
            for base, dirs, files in os.walk(target_dir):
                for file in files:
                    fn = os.path.join(base, file)
                    CatZip.write(fn, fn[rootlen:])
            CatZip.close()
            #os.rename('Catchment.zip', 'Catchment03S.zip')


In [5]:
#Create ScienceBase Item

loginu=input("Username: ")  #asks user for username
sb = pysb.SbSession()
sb.loginc(str(loginu))
time.sleep(2)

ret = sb.upload_files_and_create_item(sb.get_my_items_id(), ['Catchment.zip', 'Hydrography.zip', 'NHDPlusAttributes.zip'])
SbItem = ret['id']


Username: dwieferich@usgs.gov
········


In [6]:
print (SbItem)

599c6251e4b0b589267ed6ab


In [7]:
#Variables to populate the metadata in the SB Item

#Acquisition Date
import datetime
dNow = datetime.datetime.now()
AcqDate = dNow.strftime("%Y-%m-%d")
#AcqDate = dNow.isoformat()



In [8]:
UpdateItem = {'id': SbItem,
              'title': 'NHDPlusV2.1 Processing Region 3S; Files Used in the Biogeographic Information System',
              'body': 'A subset of files from within processing region 3S of the NHDPlus Version 2.1.  Although reorganized, the files within the attachments are unaltered from the NHDPlus Version 2.1 as they were acquired (see acquisition date listed within this metadata). This item links to python code used to generate the item.',
              'purpose': 'This item is intended to preseve specific versions of files being used in the Biogeographic Information System.',
              'dates': [{'type': 'Acquisition', 'dateString': AcqDate, 'label': 'Acquisition'}],
              'webLinks': [{"type":"sourceCode","typeLabel":"Source Code","uri":"https://github.com/dwief-usgs/BCB_Ipython_Notebooks/blob/master/Reg3S_NHDPlusV21_IntoSB_BIS.ipynb","rel":"related","title":"Python Code Used to Develop and Populate This SB Item","hidden":False},{"type":"webLink","typeLabel":"Web Link","uri":"http://www.horizon-systems.com/NHDPlus/NHDPlusV2_home.php","rel":"related","title":"Additional Information About the NHDPlusV2","hidden":False}],
              'contacts': [{"name":"Horizon Systems","type":"Data Owner","contactType":"organization","onlineResource":"http://www.horizon-systems.com","organization":{},"primaryLocation":{"streetAddress":{},"mailAddress":{}}},{"name":"Daniel J Wieferich","oldPartyId":66431,"type":"Contact","contactType":"person","email":"dwieferich@usgs.gov","active":True,"jobTitle":"Physical Scientist","firstName":"Daniel","middleName":"J","lastName":"Wieferich","organization":{"displayText":"Biogeographic Characterization"},"primaryLocation":{"name":"CN=Daniel J Wieferich,OU=CSS,OU=Users,OU=OITS,OU=DI,DC=gs,DC=doi,DC=net - Primary Location","building":"DFC Bldg 810","buildingCode":"KBT","officePhone":"3032024594","faxPhone":"3032024710","streetAddress":{"line1":"W 6th Ave Kipling St","city":"Lakewood","state":"CO","zip":"80225"},"mailAddress":{}},"orcId":"0000-0003-1554-7992"}],
              "tags":[{"type":"Theme","scheme":"BIS","name":"NHDPlusV2.1"},{"type":"Theme","scheme":"BIS","name":"Reg3S"}]
             }

updateItem = sb.updateSbItem(UpdateItem)

In [None]:
#Remove unneeded local copies of files
import shutil
import os

os.remove('Catchment.zip')
os.remove('Hydrography.zip')
os.remove('NHDPlusAttributes.zip')

os.remove('NHDPlusV21_SA_03S_NHDPLusAttributes_06.7z')
os.remove('NHDPlusV21_SA_03S_NHDSnapshot_06.7z')
os.remove('NHDPlusV21_SA_03S_NHDPlusCatchment_01.7z')

shutil.rmtree('NHDPlusSA')