In [None]:
####################

In [1]:
import os
import geopandas as gpd

In [2]:
fn = "/data/shared/src/arojas/NEON/data/raw/spatial/AOP_flightBoxes/AOP_flightboxesAllSites.shp"
aop_flightboxes_gdf = gpd.read_file(fn)
print(aop_flightboxes_gdf.columns)
print(aop_flightboxes_gdf.shape)

Index(['domain', 'domainName', 'siteName', 'siteID', 'siteType', 'sampleType',
       'priority', 'version', 'flightbxID', 'geometry'],
      dtype='object')
(103, 10)


In [3]:
aop_flightboxes_gdf[aop_flightboxes_gdf['siteID']=="HARV"].total_bounds

array([-72.28191288,  42.38363212, -72.10811745,  42.57509743])

In [4]:
####################
## Download Soilgrids for AOI using WCS
####################

In [3]:
import os
import geopandas as gpd
import numpy as np
from owslib.wcs import WebCoverageService

# Set up workspace
odir_soilgrids = "/data/shared/src/arojas/NEON/data/raster/SOILGRIDS/"

# Read in NEON AOP flight boundaries
fn = "/data/shared/src/arojas/NEON/data/raw/spatial/AOP_flightBoxes/AOP_flightboxesAllSites.shp"
aop_flightboxes_gdf = gpd.read_file(fn)
aop_flightboxes_gdf = aop_flightboxes_gdf[aop_flightboxes_gdf['sampleType']=="Terrestrial"].copy().reset_index()

# Set up variables
# List soil level strings in the filepaths for SOILGRIDS data (end)
soil_levels_list = ["_0-5cm_mean","_5-15cm_mean",
                    "_15-30cm_mean","_30-60cm_mean",
                    "_60-100cm_mean"]
# soilgrids_sand_vars = ["sand","silt","clay"]
soilgrids_sand_var = "sand"
wcs_url = f'http://maps.isric.org/mapserv?map=/map/{soilgrids_sand_var}.map'
res = 250 
proj_str_igh = "+proj=igh +lat_0=0 +lon_0=0 +datum=WGS84 +units=m +no_defs"

# Loop through each terrestrial site boundary
# get total bounds, download soil data
for site_id in aop_flightboxes_gdf['siteID'].unique():
    print(site_id)
    # Create output folder with sitename
    site_outdir = os.path.join(odir_soilgrids,soilgrids_sand_var,site_id)
    if not os.path.isdir(site_outdir):
        os.mkdir(site_outdir)
        
    # Filter for site and reproject to homolosine
    site_flightbox = aop_flightboxes_gdf[aop_flightboxes_gdf['siteID']==site_id].copy().to_crs(proj_str_igh)
    site_bbox = np.round(site_flightbox.total_bounds,3)
    site_bbox = tuple(coord for coord in site_bbox)
    
    for soil_level in soil_levels_list:
        
        var_id_str = "".join([soilgrids_sand_var,soil_level])
        # Generate url string
        wcs = WebCoverageService(wcs_url, version='1.0.0')
        response = wcs.getCoverage(
            identifier=var_id_str, 
            crs='urn:ogc:def:crs:EPSG::152160',
            bbox=site_bbox, 
            resx=250, resy=250,          # resolution [m]
            format='GEOTIFF_INT16')

        with open(os.path.join(site_outdir, f'{var_id_str}.tif'), 'wb') as file:
            file.write(response.read())
    break




BART


In [None]:
####################
## Parallel Processing
####################

In [86]:
import os
import geopandas as gpd
import numpy as np
from owslib.wcs import WebCoverageService

# Set up workspace
odir_soilgrids = "/data/shared/src/arojas/NEON/data/raster/SOILGRIDS/"

# Read in NEON AOP flight boundaries
fn = "/data/shared/src/arojas/NEON/data/raw/spatial/AOP_flightBoxes/AOP_flightboxesAllSites.shp"
aop_flightboxes_gdf = gpd.read_file(fn)
aop_flightboxes_gdf = aop_flightboxes_gdf[aop_flightboxes_gdf['sampleType']=="Terrestrial"].copy().reset_index()

# Set up variables
# List soil level strings in the filepaths for SOILGRIDS data (end)
soil_levels_list = ["_0-5cm_mean","_5-15cm_mean",
                    "_15-30cm_mean","_30-60cm_mean",
                    "_60-100cm_mean", "_100-200cm_mean"]
# soilgrids_sand_vars = ["sand","silt","clay","phh2o","cec"]
soilgrids_sand_var = "sand"
wcs_url = f'http://maps.isric.org/mapserv?map=/map/{soilgrids_sand_var}.map'
res = 250 
proj_str_igh = "+proj=igh +lat_0=0 +lon_0=0 +datum=WGS84 +units=m +no_defs"
site_id_list = aop_flightboxes_gdf['siteID'].unique()

# Single process function to use in parallel
def soilgrids_download(site_id):
    # Create output folder with sitename
    site_outdir = os.path.join(odir_soilgrids,soilgrids_sand_var,site_id)
    if not os.path.isdir(site_outdir):
        os.mkdir(site_outdir)
        
    # Filter for site and reproject to homolosine
    site_flightbox = aop_flightboxes_gdf[aop_flightboxes_gdf['siteID']==site_id].copy().to_crs(proj_str_igh)
    site_bbox = np.round(site_flightbox.total_bounds,3)
    site_bbox = tuple(coord for coord in site_bbox)
    
    for soil_level in soil_levels_list:
        
        var_id_str = "".join([soilgrids_sand_var,soil_level])
        # Generate url string
        wcs = WebCoverageService(wcs_url, version='1.0.0')
        response = wcs.getCoverage(
            identifier=var_id_str, 
            crs='urn:ogc:def:crs:EPSG::152160',
            bbox=site_bbox, 
            resx=250, resy=250,          # resolution [m]
            format='GEOTIFF_INT16')

        with open(os.path.join(site_outdir, f'{var_id_str}.tif'), 'wb') as file:
            file.write(response.read())
            
        wcs=None
        del wcs
        
    print("Data downloaded for: ", site_id)


In [3]:
import multiprocessing as mp
# init pool function
pool = mp.Pool(5)
# run function in parallel
site_id_list = aop_flightboxes_gdf['siteID'].unique()
results = pool.map(soilgrids_download, site_id_list)
# results = pool.map(soilgrids_download, ["BART", "BONA", "SERC", "UKFS", "TREE"])
# results = pool.map(soilgrids_download, ["STEI"])

Data downloaded for:  BONA
Data downloaded for:  UKFS
Data downloaded for:  TREE
Data downloaded for:  SERC
Data downloaded for:  BART


In [None]:
#########################
## Double check downloads did not fail!
#########################

In [87]:
import glob
fdirs_list = glob.glob(f"/data/shared/src/arojas/NEON/data/raster/SOILGRIDS/{soilgrids_sand_var}/*")
site_id_list = aop_flightboxes_gdf['siteID'].unique()

In [88]:
basenames = []
for fdir in fdirs_list:
    basenames.append(os.path.basename(fdir))
    fpaths_tif = glob.glob(os.path.join(fdir, "*"))
    if len(fpaths_tif)<6:
        print(fdir)

In [89]:
input_list = []
for siteid in site_id_list:
    if siteid not in basenames:
        print(siteid)
        input_list.append(siteid)

In [70]:
import multiprocessing as mp
# init pool function
# input_list = ["MLBS", "UNDE"]
pool = mp.Pool(len(input_list))
# run function in parallel
results = pool.map(soilgrids_download, input_list)

Data downloaded for:  UNDE
Data downloaded for:  MLBS
