This notebooks uses the Microsoft Planetary Computer STAC API to access the NAIP dataset and collect the tile names and urls for  file pathway identified using the EIA, HFID, and other datasources.

## Load Packages

In [1]:
# Standard packages
import tempfile
import warnings
import urllib
import shutil
import os
# Less standard, but still pip- or conda-installable
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import rasterio
import re
import rtree
import shapely
import pickle

import progressbar # pip install progressbar2, not progressbar
from geopy.geocoders import Nominatim
from rasterio.windows import Window 
from tqdm import tqdm

import data_eng.az_proc as ap
# some_file.py
import sys
# insert at 1, 0 is the script path (or '' in REPL)
sys.path.insert(1, '/path/to/application/app/folder')

import file
from application.app.folder.file import func_name

ModuleNotFoundError: No module named 'rasterio'

## Define Microsoft Azure Blob Root

In [None]:
# The(preferred) copy of NAIP in the West Europe Azure region

warnings.filterwarnings("ignore")
%matplotlib inline

## Load the spatial index of NAIP tiles

In [None]:
# Spatial index that maps lat/lon to NAIP tiles; we'll load this when we first 
# need to access it.
index = None

if index is None:
    index = ap.NAIPTileIndex()

## EIA and HFID Petroleum Data Sources

## Define filepathways to save data 

In [17]:
#set folder and specify destination
natech_dir = os.path.join('/shared_space','natech') #use the shared_space folder as the base because there is ample storage capacilty 
os.makedirs(natech_dir,exist_ok=True)

naip_dir = os.path.join(natech_dir,'naip') #directory for the naip data in the base (csr33) directory 
os.makedirs(naip_dir,exist_ok=True)

naip_reshape_dir = os.path.join(natech_dir,'reshape') #directory to hold reshaped naip images
os.makedirs(naip_reshape_dir,exist_ok=True)

naip_chips_dir = os.path.join(natech_dir,'chips') #directory to hold reshaped naip images
os.makedirs(naip_chips_dir,exist_ok=True)

# EIA and HFID 

### Load Data

Homeland Infrastructure Foundation-Level Data (HIFLD) - Petroleum Terminals

https://hifld-geoplatform.opendata.arcgis.com/datasets/7841aba67178425cbf33995fc914e2fe_0/data

In [6]:
hfid_petroleum_terminals = pd.read_csv("Petroleum_Terminals_HFID.csv") #read in sheet of quadrangles
hfid_lons = hfid_petroleum_terminals["X"].tolist()
hfid_lats = hfid_petroleum_terminals["Y"].tolist()

EIA - Strategic Petroleum Reserves

https://atlas.eia.gov/datasets/strategic-petroleum-reserves?geometry=-159.521%2C0.792%2C-28.212%2C52.750

In [7]:
eia_strategic_petroleum_reserves = pd.read_csv("Strategic_Petroleum_Reserves.csv") #read in sheet of quadrangles
eia_spr_lons = eia_strategic_petroleum_reserves["X"].tolist()
eia_spr_lats = eia_strategic_petroleum_reserves["Y"].tolist()

EIA - Petroleum Product Terminals

https://atlas.eia.gov/datasets/petroleum-product-terminals 

In [8]:
eia_petroleum_product_terminals = pd.read_csv("Petroleum_Product_Terminals.csv") #read in sheet of quadrangles
eia_ppt_lons = eia_petroleum_product_terminals["X"].tolist()
eia_ppt_lats = eia_petroleum_product_terminals["Y"].tolist()

EIA - Northeast Petroleum Reserves

https://atlas.eia.gov/datasets/northeast-petroleum-reserves 

In [9]:
eia_northeast_petroleum_reserves = pd.read_csv("Northeast_Petroleum_Reserves.csv") #read in sheet of quadrangles
eia_npr_lons = eia_northeast_petroleum_reserves["X"].tolist()
eia_npr_lats = eia_northeast_petroleum_reserves["Y"].tolist()

EIA - Petroleum Refineries

https://atlas.eia.gov/datasets/petroleum-refineries?geometry=-13.914%2C-56.555%2C151.320%2C84.803

In [10]:
eia_petroleum_refineries = pd.read_csv("Petroleum_Refineries.csv") #read in sheet of quadrangles
eia_pr_lons = eia_petroleum_refineries["X"].tolist()
eia_pr_lats = eia_petroleum_refineries["Y"].tolist()

EIA - Natural Gas Processing Plants

https://atlas.eia.gov/datasets/natural-gas-processing-plants

In [11]:
eia_natural_gas_processing_plants = pd.read_csv("Natural_Gas_Processing_Plants.csv") #read in sheet of quadrangles
eia_ngpp_lons = eia_natural_gas_processing_plants["X"].tolist()
eia_ngpp_lats = eia_natural_gas_processing_plants["Y"].tolist()

### Get the Filepathways, tile name, tile URL for EIA HFID Data

In [14]:
hfid_file_pathways = collected_quads_to_tile_name_tile_url(hfid_lons, hfid_lats)
eia_spr_file_pathways = collected_quads_to_tile_name_tile_url(eia_spr_lons, eia_spr_lats)
eia_ppt_file_pathways = collected_quads_to_tile_name_tile_url(eia_ppt_lons, eia_ppt_lats)
eia_npr_file_pathways = collected_quads_to_tile_name_tile_url(eia_npr_lons, eia_npr_lats)
eia_pr_file_pathways = collected_quads_to_tile_name_tile_url(eia_pr_lons, eia_pr_lats)
eia_ngpp_file_pathways = collected_quads_to_tile_name_tile_url(eia_ngpp_lons, eia_ngpp_lats)

#filepathways 
petrol_file_pathways = np.vstack((hfid_file_pathways, eia_spr_file_pathways, eia_ppt_file_pathways, eia_npr_file_pathways, eia_pr_file_pathways, eia_ngpp_file_pathways)) 
petrol_file_pathways = np.unique(petrol_file_pathways , axis=0)

#tile names and urls 
tile_name_eia_hfid, tile_url_eia_hfid = filepaths_to_tile_name_tile_url(petrol_file_pathways)
tile_name_tile_url_eia_hfid = np.column_stack((tile_name_eia_hfid, tile_url_eia_hfid))

  9%|▊         | 199/2338 [00:00<00:02, 767.33it/s] 

No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile in

 24%|██▍       | 570/2338 [00:02<00:11, 149.46it/s]

No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections


100%|██████████| 2338/2338 [00:42<00:00, 55.27it/s]
100%|██████████| 4/4 [00:00<00:00, 1214.60it/s]
  8%|▊         | 112/1476 [00:00<00:01, 1105.08it/s]

No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections


 16%|█▌        | 229/1476 [00:00<00:01, 746.44it/s] 

No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections


 27%|██▋       | 399/1476 [00:00<00:03, 352.25it/s]

No tile intersections


 32%|███▏      | 466/1476 [00:01<00:03, 302.37it/s]

No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections


 38%|███▊      | 554/1476 [00:01<00:03, 257.54it/s]

No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections


 48%|████▊     | 703/1476 [00:02<00:03, 211.35it/s]

No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections


 52%|█████▏    | 766/1476 [00:02<00:03, 187.01it/s]

No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections


 59%|█████▊    | 866/1476 [00:03<00:04, 143.64it/s]

No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections


 71%|███████   | 1044/1476 [00:05<00:04, 92.66it/s]

No tile intersections
No tile intersections
No tile intersections
No tile intersections


 74%|███████▍  | 1091/1476 [00:05<00:05, 73.34it/s]

No tile intersections


 80%|███████▉  | 1177/1476 [00:07<00:04, 71.51it/s]

No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections


 84%|████████▍ | 1244/1476 [00:07<00:03, 71.90it/s]

No tile intersections
No tile intersections


 87%|████████▋ | 1284/1476 [00:08<00:02, 70.69it/s]

No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections


 89%|████████▉ | 1313/1476 [00:09<00:02, 56.82it/s]

No tile intersections
No tile intersections


 90%|████████▉ | 1326/1476 [00:09<00:02, 50.99it/s]

No tile intersections


 91%|█████████ | 1345/1476 [00:09<00:02, 53.77it/s]

No tile intersections


 92%|█████████▏| 1359/1476 [00:09<00:01, 59.09it/s]

No tile intersections
No tile intersections


100%|██████████| 1476/1476 [00:12<00:00, 115.88it/s]
100%|██████████| 6/6 [00:00<00:00, 1394.00it/s]
100%|██████████| 135/135 [00:00<00:00, 1204.73it/s]
  0%|          | 0/478 [00:00<?, ?it/s]

No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections
No tile intersections


100%|██████████| 478/478 [00:00<00:00, 507.98it/s] 


# Group Identified ASTs

## Thirty Ports

In [16]:
thirty_port_quads = pd.read_csv("Quadrangles_of_interest.csv") #read in sheet of quadrangles

tile_name_thirty_ports, tile_url_thirty_ports = collected_quads_to_tile_name_tile_url(thirty_port_quads) # identify filespaths/urls for quads of interest

tile_name_tile_url_thirty_ports = np.column_stack((tile_name_thirty_ports, tile_url_thirty_ports))

# Combine Filepaths from each source

In [17]:
tile_name_tile_url_eia_hfid_thirty_ports = np.vstack((tile_name_tile_url_eia_hfid, tile_name_tile_url_thirty_ports))
print(tile_name_tile_url_eia_hfid_thirty_ports.shape)
tile_name_tile_url_eia_hfid_thirty_ports = np.unique(tile_name_tile_url_eia_hfid_thirty_ports, axis=0)
print(tile_name_tile_url_eia_hfid_thirty_ports.shape)

(1459, 2)
(1389, 2)


### Save tile name and url

In [18]:
np.save("tile_name_tile_url_eia_hfid_thirty_ports", tile_name_tile_url_eia_hfid_thirty_ports)

In [14]:
os.getcwd()

'C:\\Users\\Student\\AST\\data_download_and_preprocessing\\label_work'

In [10]:
expanded_sites = pd.read_csv("identified_sites.csv") #read in sheet of quadrangles
expanded_sites_lat = expanded_sites["Lat"].tolist()
expanded_sites_lon = eia_petroleum_refineries["Lon"].tolist()

FileNotFoundError: [Errno 2] No such file or directory: 'identified_sites.csv'