In [1]:
# !pip install soilgrids

In [2]:
import numpy as np
import pandas as pd
import time
from itertools import product
from soilgrids import SoilGrids
import os

In [3]:
def grid_space(bbox, num_boxes):
    """Breaks a big rectangle into smaller ones.
    
    Given the bbox coordinates of a rectangle,
    it breaks it down into smaller squares and
    returns a list of bbox coordinates.
    """
    xmin, ymin, xmax, ymax = bbox
    x_values = np.linspace(xmin, xmax, num_boxes+1)
    y_values = np.linspace(ymin, ymax, num_boxes+1)
    min_edges = list(product(range(num_boxes), range(num_boxes)))
    max_edges = []

    for edge in min_edges:
        max_edges.append((edge[0]+1, edge[1]+1))

    boxes = []
    for min_edge, max_edge in zip(min_edges, max_edges):
        boxes.append((x_values[min_edge[0]], y_values[min_edge[1]], x_values[max_edge[0]], y_values[max_edge[1]]))
    return boxes


def get_soil_grids(bbox, service_id, coverage_id, resolution, out_filename, retries=3):
    soil_grids = SoilGrids()
    status = False
    df = None
    _retries = retries
    while _retries > 0:
        try:
            data = soil_grids.get_coverage_data(service_id=service_id, coverage_id=coverage_id,
                                               west=bbox[0], south=bbox[1], east=bbox[2], north=bbox[3],
                                               width=(bbox[2]-bbox[0])/resolution, height=(bbox[3]-bbox[1])/resolution,
                                               crs='urn:ogc:def:crs:EPSG::4326', output=out_filename)
            df = data.to_dataframe(coverage_id)
            status = True
            break
        except:
            _retries -= 1
            status = False
            time.sleep(1)
    return df, status

In [4]:
fao_bbox = [-78, -14, 86, 51]
bboxes = grid_space(fao_bbox, 5)

In [5]:
bboxes

[(-78.0, -14.0, -45.2, -1.0),
 (-78.0, -1.0, -45.2, 12.0),
 (-78.0, 12.0, -45.2, 25.0),
 (-78.0, 25.0, -45.2, 38.0),
 (-78.0, 38.0, -45.2, 51.0),
 (-45.2, -14.0, -12.400000000000006, -1.0),
 (-45.2, -1.0, -12.400000000000006, 12.0),
 (-45.2, 12.0, -12.400000000000006, 25.0),
 (-45.2, 25.0, -12.400000000000006, 38.0),
 (-45.2, 38.0, -12.400000000000006, 51.0),
 (-12.400000000000006, -14.0, 20.39999999999999, -1.0),
 (-12.400000000000006, -1.0, 20.39999999999999, 12.0),
 (-12.400000000000006, 12.0, 20.39999999999999, 25.0),
 (-12.400000000000006, 25.0, 20.39999999999999, 38.0),
 (-12.400000000000006, 38.0, 20.39999999999999, 51.0),
 (20.39999999999999, -14.0, 53.19999999999999, -1.0),
 (20.39999999999999, -1.0, 53.19999999999999, 12.0),
 (20.39999999999999, 12.0, 53.19999999999999, 25.0),
 (20.39999999999999, 25.0, 53.19999999999999, 38.0),
 (20.39999999999999, 38.0, 53.19999999999999, 51.0),
 (53.19999999999999, -14.0, 86.0, -1.0),
 (53.19999999999999, -1.0, 86.0, 12.0),
 (53.1999999999

# Clay 0-5 cm

In [6]:
service_id = "clay"
coverage_id = "clay_0-5cm_mean"
resolution = 0.01 # in degrees

os.makedirs(f"ISRIC Data/{coverage_id}", exist_ok=True)

In [None]:
chunks = dict()

for idx, bbox in enumerate(bboxes):
    filename = f"ISRIC Data/{coverage_id}/{coverage_id}_{idx}.tif"
    df, status = get_soil_grids(bbox, service_id, coverage_id, resolution, filename)
    chunks[bbox] = df

bboxes_not_found = []
for bbox, value in chunks.items():
    if value is None:
        bboxes_not_found.append(bbox)

for idx, bbox in enumerate(bboxes_not_found):
    filename = f"ISRIC Data/{coverage_id}/{coverage_id}_{idx}.tif"
    df, status = get_soil_grids(bbox, service_id, coverage_id, resolution, filename, retries=5)
    chunks[bbox] = df

In [None]:
bboxes_not_found = []
for bbox, value in chunks.items():
    if value is None:
        bboxes_not_found.append(bbox)
len(bboxes_not_found)

In [None]:
# clay_0_5cm_mean = pd.concat(chunks.values())
# clay_0_5cm_mean.to_csv("data/fao_clay_0_5cm_mean.csv")

# Clay 5-15 cm

In [None]:
service_id = "clay"
coverage_id = "clay_5-15cm_mean"
resolution = 0.01 # in degrees

os.makedirs(f"ISRIC Data/{coverage_id}", exist_ok=True)

In [None]:
chunks = dict()

for idx, bbox in enumerate(bboxes):
    filename = f"ISRIC Data/{coverage_id}/{coverage_id}_{idx}.tif"
    df, status = get_soil_grids(bbox, service_id, coverage_id, resolution, filename)
    chunks[bbox] = df

bboxes_not_found = []
for bbox, value in chunks.items():
    if value is None:
        bboxes_not_found.append(bbox)

for idx, bbox in enumerate(bboxes_not_found):
    filename = f"ISRIC Data/{coverage_id}/{coverage_id}_{idx}.tif"
    df, status = get_soil_grids(bbox, service_id, coverage_id, resolution, filename, retries=5)
    chunks[bbox] = df

In [None]:
bboxes_not_found = []
for bbox, value in chunks.items():
    if value is None:
        bboxes_not_found.append(bbox)
len(bboxes_not_found)

In [None]:
# clay_5_15cm_mean = pd.concat(chunks.values())
# clay_5_15cm_mean.to_csv("data/fao_clay_5_15cm_mean.csv")

In [None]:
# del chunks
# del clay_5_15cm_mean

# Sand 0-5 cm

In [None]:
service_id = "sand"
coverage_id = "sand_0-5cm_mean"
resolution = 0.01 # in degrees

os.makedirs(f"ISRIC Data/{coverage_id}", exist_ok=True)

In [None]:
chunks = dict()

for idx, bbox in enumerate(bboxes):
    filename = f"ISRIC Data/{coverage_id}/{coverage_id}_{idx}.tif"
    df, status = get_soil_grids(bbox, service_id, coverage_id, resolution, filename)
    chunks[bbox] = df

bboxes_not_found = []
for bbox, value in chunks.items():
    if value is None:
        bboxes_not_found.append(bbox)

for idx, bbox in enumerate(bboxes_not_found):
    filename = f"ISRIC Data/{coverage_id}/{coverage_id}_{idx}.tif"
    df, status = get_soil_grids(bbox, service_id, coverage_id, resolution, filename, retries=5)
    chunks[bbox] = df

In [None]:
bboxes_not_found = []
for bbox, value in chunks.items():
    if value is None:
        bboxes_not_found.append(bbox)
len(bboxes_not_found)

In [None]:
# sand_0_5cm_mean = pd.concat(chunks.values())
# sand_0_5cm_mean.to_csv("data/fao_sand_0_5cm_mean.csv")

In [None]:
# del chunks
# del sand_0_5cm_mean

# Sand 5-15 cm

In [None]:
service_id = "sand"
coverage_id = "sand_5-15cm_mean"
resolution = 0.01 # in degrees

os.makedirs(f"ISRIC Data/{coverage_id}", exist_ok=True)

In [None]:
chunks = dict()

for idx, bbox in enumerate(bboxes):
    filename = f"ISRIC Data/{coverage_id}/{coverage_id}_{idx}.tif"
    df, status = get_soil_grids(bbox, service_id, coverage_id, resolution, filename)
    chunks[bbox] = df

bboxes_not_found = []
for bbox, value in chunks.items():
    if value is None:
        bboxes_not_found.append(bbox)

for idx, bbox in enumerate(bboxes_not_found):
    filename = f"ISRIC Data/{coverage_id}/{coverage_id}_{idx}.tif"
    df, status = get_soil_grids(bbox, service_id, coverage_id, resolution, filename, retries=5)
    chunks[bbox] = df

In [None]:
bboxes_not_found = []
for bbox, value in chunks.items():
    if value is None:
        bboxes_not_found.append(bbox)
len(bboxes_not_found)

In [None]:
# sand_5_15cm_mean = pd.concat(chunks.values())
# sand_5_15cm_mean.to_csv("data/fao_sand_5_15cm_mean.csv")

# Silt 0-5 cm

In [None]:
service_id = "silt"
coverage_id = "silt_0-5cm_mean"
resolution = 0.01 # in degrees

os.makedirs(f"ISRIC Data/{coverage_id}", exist_ok=True)

In [None]:
chunks = dict()

for idx, bbox in enumerate(bboxes):
    filename = f"ISRIC Data/{coverage_id}/{coverage_id}_{idx}.tif"
    df, status = get_soil_grids(bbox, service_id, coverage_id, resolution, filename)
    chunks[bbox] = df

bboxes_not_found = []
for bbox, value in chunks.items():
    if value is None:
        bboxes_not_found.append(bbox)

for idx, bbox in enumerate(bboxes_not_found):
    filename = f"ISRIC Data/{coverage_id}/{coverage_id}_{idx}.tif"
    df, status = get_soil_grids(bbox, service_id, coverage_id, resolution, filename, retries=5)
    chunks[bbox] = df

In [None]:
bboxes_not_found = []
for bbox, value in chunks.items():
    if value is None:
        bboxes_not_found.append(bbox)
len(bboxes_not_found)

In [None]:
# silt_0_5cm_mean = pd.concat(chunks.values())
# silt_0_5cm_mean.to_csv("data/fao_silt_0_5cm_mean.csv")

# Silt 5-15 cm

In [None]:
service_id = "silt"
coverage_id = "silt_5-15cm_mean"
resolution = 0.01 # in degrees

os.makedirs(f"ISRIC Data/{coverage_id}", exist_ok=True)

In [None]:
chunks = dict()

for idx, bbox in enumerate(bboxes):
    filename = f"ISRIC Data/{coverage_id}/{coverage_id}_{idx}.tif"
    df, status = get_soil_grids(bbox, service_id, coverage_id, resolution, filename)
    chunks[bbox] = df

bboxes_not_found = []
for bbox, value in chunks.items():
    if value is None:
        bboxes_not_found.append(bbox)

for idx, bbox in enumerate(bboxes_not_found):
    filename = f"ISRIC Data/{coverage_id}/{coverage_id}_{idx}.tif"
    df, status = get_soil_grids(bbox, service_id, coverage_id, resolution, filename, retries=5)
    chunks[bbox] = df

In [None]:
bboxes_not_found = []
for bbox, value in chunks.items():
    if value is None:
        bboxes_not_found.append(bbox)
len(bboxes_not_found)

In [None]:
# silt_5_15cm_mean = pd.concat(chunks.values())
# silt_5_15cm_mean.to_csv("data/fao_silt_5_15cm_mean.csv")

In [66]:
import rasterio
from rasterio.merge import merge
import glob
import os

def merge_raster_in_folder(src_dir, out_filename):
    """Merge all the raster (TIFF extension) within the given directory"""

    # create list of raster files in source folder
    rasters = []
    for file in glob.glob(f"{src_dir}/*.tif"):
        rasters.append(file)

    # create list of raster objects from the list of raster filename
    sources = [rasterio.open(raster) for raster in rasters]
    assert len(rasters) == 25, 'Incomplete folder'
    src_crs = sources[0].profile['crs']
    # create array representing all source rasters mosaicked together
    dest_array, out_transform = merge(sources)
    
    with rasterio.Env():

        dest_profile = {
            "driver": 'GTiff',
            "height": dest_array.shape[1],
            "width": dest_array.shape[2],
            "count": 1,
            "dtype": dest_array.dtype,
            "crs": src_crs,
            "transform": out_transform,
            "compress":'lzw',
        }

        with rasterio.open(out_filename, 'w', **dest_profile) as dst:
            dst.write(dest_array[0], 1)

In [63]:
merge_raster_in_folder('/experiments/Locusts/ISRIC Data/clay_0-5cm_mean', '/experiments/Locusts/ISRIC Data/full_isric_data/clay_0-5cm_mean.tif')

['/experiments/Locusts/ISRIC Data/clay_0-5cm_mean/clay_0-5cm_mean_16.tif', '/experiments/Locusts/ISRIC Data/clay_0-5cm_mean/clay_0-5cm_mean_20.tif', '/experiments/Locusts/ISRIC Data/clay_0-5cm_mean/clay_0-5cm_mean_10.tif', '/experiments/Locusts/ISRIC Data/clay_0-5cm_mean/clay_0-5cm_mean_6.tif', '/experiments/Locusts/ISRIC Data/clay_0-5cm_mean/clay_0-5cm_mean_13.tif', '/experiments/Locusts/ISRIC Data/clay_0-5cm_mean/clay_0-5cm_mean_12.tif', '/experiments/Locusts/ISRIC Data/clay_0-5cm_mean/clay_0-5cm_mean_15.tif', '/experiments/Locusts/ISRIC Data/clay_0-5cm_mean/clay_0-5cm_mean_17.tif', '/experiments/Locusts/ISRIC Data/clay_0-5cm_mean/clay_0-5cm_mean_19.tif', '/experiments/Locusts/ISRIC Data/clay_0-5cm_mean/clay_0-5cm_mean_22.tif', '/experiments/Locusts/ISRIC Data/clay_0-5cm_mean/clay_0-5cm_mean_24.tif', '/experiments/Locusts/ISRIC Data/clay_0-5cm_mean/clay_0-5cm_mean_21.tif', '/experiments/Locusts/ISRIC Data/clay_0-5cm_mean/clay_0-5cm_mean_2.tif', '/experiments/Locusts/ISRIC Data/clay_0

In [67]:
merge_raster_in_folder('/experiments/Locusts/ISRIC Data/clay_5-15cm_mean', '/experiments/Locusts/ISRIC Data/full_isric_data/clay_5-15cm_mean.tif')


In [68]:
merge_raster_in_folder('/experiments/Locusts/ISRIC Data/sand_0-5cm_mean', '/experiments/Locusts/ISRIC Data/full_isric_data/sand_0-5cm_mean.tif')

In [69]:
merge_raster_in_folder('/experiments/Locusts/ISRIC Data/sand_5-15cm_mean', '/experiments/Locusts/ISRIC Data/full_isric_data/sand_5-15cm_mean.tif')

In [70]:
merge_raster_in_folder('/experiments/Locusts/ISRIC Data/silt_0-5cm_mean', '/experiments/Locusts/ISRIC Data/full_isric_data/silt_0-5cm_mean.tif')

In [71]:
merge_raster_in_folder('/experiments/Locusts/ISRIC Data/silt_5-15cm_mean', '/experiments/Locusts/ISRIC Data/full_isric_data/silt_5-15cm_mean.tif')

In [72]:
os.chdir('/experiments/Locusts/ISRIC Data')

In [73]:
!zip -r -q full_isric_data.zip "full_isric_data"