# A script to remotely download NAIP imagery  

In [None]:
import re
import pandas as pd
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.common.exceptions import NoSuchElementException, WebDriverException
from selenium.webdriver.common.keys import Keys
import time
import os
import shutil
import glob
from zipfile import ZipFile

### Create a look-up dataframe pairing a county with it's NAIP identifier

In [None]:
# 's' is an exerpt from 'NAIP File Name.pdf' at https://nrcs.app.box.com/v/naip/file/168958604571
s = "NM001BERNALILLONM005CHAVESNM007COLFAXNM009CURRYNM011DE BACANM013DONA ANANM015EDDYNM017GRANTNM019GUADALUPENM021HARDINGNM023HIDALGONM025LEANM027LINCOLNNM029LUNANM033MORANM035OTERONM037QUAYNM039RIO ARRIBANM041ROOSEVELTNM043SANDOVALNM045SAN JUANNM047SAN MIGUELNM049SANTA FENM051SIERRANM053SOCORRONM055TAOSNM057TORRANCENM059UNIONNM061VALENCIA"

# Parse the string into a pandas DataFrame that will serve as a look-up table for downloading the imagery.
nm_counties = s.split('NM')
cty_list = []

for county in nm_counties:
    counties = re.split('(\d+)', county)
    cty_list.append(counties)

#McKinley County is missing the NAIP File Name document, and so needs to be added to the dataframe separately:
mckinley = ['','031', 'MCKINLEY']
cty_list.append(mckinley)

df = pd.DataFrame(sorted(cty_list), columns=["Index", "NAIP_ID", "County"])
df2 = df.iloc[1:]
df2


In [None]:
#Counties of interest, split up by page
counties_of_interest_page1 = ["QUAY", "LINCOLN", "ROOSEVELT", "LEA", "SAN JUAN", "RIO ARRIBA", "SANDOVAL", "MCKINLEY"]
counties_of_interest_page2 = ["EDDY", "CHAVES", "CURRY", "DE BACA"]

In [None]:
#Select counties of interest out of the look-up dataframe and store in a new dataframe.
sel_cty1 = df[df['County'].isin(counties_of_interest_page1)]
sel_cty2 = df[df['County'].isin(counties_of_interest_page2)]

sel_cty = sel_cty2.append(sel_cty1)
sel_cty['County'].replace(' ', '_', regex=True,inplace=True)

### Download the imagery

In [None]:
# A function that waits until a download is finished.
def downloads_done():
    for i in os.listdir(dl_loc):
        if ".crdownload" in i:
            print("Downloading...")
            time.sleep(300)
            downloads_done()

In [None]:
# A function that downloads imagery based on on lists.

def dl_naip(county_list):
    for id in county_list:
        print(id)
        try:
            imagery = driver.find_element_by_link_text('ortho_1-1_1n_s_nm{}_2016_1.zip'.format(id))
            imagery.click()

        except WebDriverException:
            
            try:
                imagery.send_keys(Keys.END)
                time.sleep(5)
                imagery.click()
                
            except NoSuchElementException:
                self.driver.execute_script("return arguments[0].scrollIntoView(true);", imagery)
                time.sleep(5)
                imagery.click()       

        finally:
            time.sleep(10)

            download = driver.find_element_by_xpath('/html/body/div[1]/span/div/span/div/div[1]/div[2]/button/span/span')
            download.click()

            time.sleep(5)

            driver.back()
            downloads_done()

In [None]:
# Initialize webdriver
site = "https://nrcs.app.box.com/v/naip/folder/18143730559"
dl_loc = r'Y:\NAIP\NM_2016'
options = Options()
prefs = {'download.default_directory' : dl_loc, 'useAutomationExtension': False, 'forceDevToolsScreenshot': True,
        'args': ['--disable-infobars', "--disable-extensions"]}
options.add_experimental_option('prefs', prefs)
driver = webdriver.Chrome(chrome_options=options)
driver.get(site)

In [None]:
# Download the imagery from page 1, then page 2 using selenium

dl_naip(sel_cty1['NAIP_ID'])

next_page = driver.find_element_by_xpath('/html/body/div[1]/span/div/main/div/div/div[2]/div[2]/div/a')
next_page.send_keys(Keys.END)
time.sleep(2)
next_page.click()

dl_naip(sel_cty2['NAIP_ID'])

### Organize imagery on disk

In [None]:
# Create folders named after county
cty_list = sel_cty['County'].tolist()
cty_list

for cty in cty_list:
    cap = cty.title()
    folder = os.mkdir(os.path.join(dl_loc, cap))

In [None]:
# Move downloaded imagery to the appropriate county folder
zip_files = [z for z in os.listdir(dl_loc) if os.path.isfile(os.path.join(dl_loc, z))]
    
for row in sel_cty.iterrows():
    Zip = [f for f in zip_files if (row[1][1]) in f]
    zip_str = ' '.join(Zip)
    zip_path = os.path.join(dl_loc, zip_str)
    dest_path = os.path.join(dl_loc, (row[1][2].title()))
    os.rename(zip_path, (dest_path + '\\' + Zip[0]))

In [None]:
# Unzip all imagery in the appropriate country folder

for path, dir_list, file_list in os.walk(dl_loc):
    for file_name in file_list:
        if file_name.endswith(".zip"):
            abs_file_path = os.path.join(path, file_name)

            parent_path = os.path.split(abs_file_path)[0]
            print(parent_path)

            zip_obj = ZipFile(abs_file_path, 'r')
            zip_obj.extractall(parent_path)
            zip_obj.close()

In [None]:
# After confirming success, delete zip file.

for path, dir_list, file_list in os.walk(dl_loc):
    for file_name in file_list:
        if file_name.endswith(".zip"):
            abs_file_path = os.path.join(path, file_name)
            os.remove(abs_file_path)