In [None]:
import os
import requests
from pathlib import Path
import json
import zipfile
import logging
import geopandas as gpd
from geopandas.tools import sjoin
import math
from shapely.geometry import Polygon, box
from itertools import product
from pyproj import Transformer
import shapely.speedups
shapely.speedups.enable()

In [None]:
#  FIXME: This will depends from where the notebook kernel is running so be careful
WORK_DIR =Path(os.getcwd())
BASE_DIR = f'{WORK_DIR.parents[3]}/datasets/raw'

# @TODO: Add expected data files source as an environment variable.
assert BASE_DIR == '/home/jovyan/work/datasets/raw', f'{BASE_DIR} is not the correct directory'

# Params for the notebook
path = Path(f'{BASE_DIR}/ne_10m_coastline/ne_10m_coastline.shp')
urlDownload = 'http//www.naturalearthdata.com/download/10m/physical/ne_10m_coastline.zip'
filename = Path(f'{BASE_DIR}/grid_test.shp')

size_km = 30
filter_bbox = box(-180,-50, 180, 40)

In [None]:
def downloadData(
    file_path: str, download_url: str, update: bool = False
) -> Union[int, str]:
    """
    Download a file to a path.
    Parameters
    ----------
    file_path : str - The path to the file to download.
    update : bool, optional - If True, the file will be downloaded again even if it already exists.
                            The default is False.

    Returns
    -------
    int - 0 if the file was downloaded successfully, 1 if the file download failed.
    """
    try:
        if update or not os.path.exists(file_path):
            logging.info("Downloading data...")

            response = requests.get(download_url, stream=True)

            with open(file_path, "wb") as f:
                for chunk in response.iter_content(chunk_size=128):
                    f.write(chunk)
        else:
            logging.info("data already downloaded.")

        return 0
    except Exception as e:
        logging.error(e)
        return 1

In [None]:
def reprojectBbox(extent: box, src: str = "EPSG:4326", dst: str = "EPSG:3410")-> box:
    transformer = Transformer.from_crs(src, dst, always_xy=True)
    
    xmin_deg, ymin_deg, xmax_deg, ymax_deg =  extent.bounds
    
    xmin, ymin = transformer.transform(xmin_deg, ymin_deg)
    xmax, ymax = transformer.transform(xmax_deg, ymax_deg)
    
    return box(xmin, ymin, xmax, ymax)

def square(width, heigh, xmin, ymin):
    xmax = xmin + width
    ymax = ymin + heigh
    return Polygon([(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)])


def squareGrid(extent: box, size: int) -> gpd.GeoDataFrame:
    """
    Create a grid of squares.

    Args:
        extent: A list of the form [xmin, ymin, xmax, ymax]
        size: The size of the squares in meters
    """
    xmin, ymin, xmax, ymax = reprojectBbox(extent).bounds

    width = size
    height = size
   
    rows = int(math.ceil((ymax-ymin) / height))
    cols = int(math.ceil((xmax-xmin) / width))
    polygons = []

    for i, j  in product(range(cols), range(rows)):
        x = xmin + i * width
        y = ymin + j * height
        polygons.append(square(width, height, x, y))
    
    return gpd.GeoDataFrame({'geometry':polygons}, crs = 'EPSG:3410').clip(
            box(xmin, ymin, xmax, ymax)).to_crs("EPSG:4326")

In [None]:
# download
downloadData(path.with_suffix('.zip'), urlDownload)

In [None]:
# unzip

with zipfile.ZipFile(path, 'r') as zip_ref:
    # sublist = filter(lambda file: format in file, zip_ref.namelist()) # only if filter

    zip_ref.extractall(path, members=zip_ref.namelist())

In [None]:
# Create a buffered polygon nkm for the coastline
coast_line = gpd.read_file(path)
capped_lines = coast_line.geometry.to_crs("epsg:3410"
    ).buffer(size_km*1000).clip(
        box(*coast_line.geometry.to_crs("epsg:3410").total_bounds.tolist())).to_crs(
            "epsg:4326").buffer(0).clip(filter_bbox)
#.exterior.apply(lambda x: Polygon(x))
coast_line_buffered = gpd.GeoDataFrame({'geometry': capped_lines.unary_union},
    crs="EPSG:4326")#.exterior.apply(lambda x: Polygon(x))
coast_line_buffered.to_file(
    f'{BASE_DIR}/ne_10m_coastline/ne_10m_coastline_buffered_{size_km}km.shp')

In [None]:
# Creates a square grid of the selected extent
grid = squareGrid(filter_bbox, math.floor(math.sqrt(10e9 * 30)))

In [None]:
# TODO: this should be able to be further filtered
pip_mask = grid.intersects(coast_line_buffered.geometry.unary_union)
sub = grid.loc[pip_mask].dropna()
sub.reset_index(drop=True, inplace=True)
sub['id'] = (sub.index + 1).astype(int)
sub['url'] = sub.apply(lambda row: f'https://storage.googleapis.com/mangrove_atlas/ee_export_tiffs/mangrove_blue_carbon/{row.id}.tif', axis=1)
sub['geom'] = sub.apply(lambda row: json.dumps(shapely.geometry.mapping(row.geometry)), axis=1)
print(sub.count())
sub.boundary.plot()

In [None]:
# export filtered grid
sub.to_file(filename)
sub[['id', 'geom', 'url']].to_csv(filename.with_suffix('.csv'), index=False)