https://www.hydrosheds.org/products


In [None]:
import concurrent.futures
import os
import re
from zipfile import ZipFile

import geopandas as gpd
import pandas as pd
import requests
from pyogrio import read_dataframe


def download_url(url, save_path, chunk_size=128):
    r = requests.get(url, stream=True)
    with open(save_path, "wb") as fd:
        for chunk in r.iter_content(chunk_size=chunk_size):
            fd.write(chunk)


def extrair(path, path_output):
    # loading the temp.zip and creating a zip object
    with ZipFile(path, "r") as z:
        # Extracting all the members of the zip
        # into a specific location.
        z.extractall(path=path_output)


def threads_tasks(funcion, list1, list2):
    with concurrent.futures.ThreadPoolExecutor() as executor:
        future = executor.map(funcion, list1, list2)  # noqa: F841


def tables(
    zip_names: list,
    links: list,
    skip: bool = False,
    flagment: bool = False,
    re_pattern: str = "",
    partition: str = "",
):
    if not skip:
        threads_tasks(download_url, links, zip_names)
        output_folders = [
            folder.replace(".zip", "") + "/" for folder in zip_names
        ]
        threads_tasks(extrair, zip_names, output_folders)
    folder_zip = zip_name[0].replace(".zip", "")
    folders_snp = [
        f"{folder_zip}/" + folder
        for folder in os.listdir(folder_zip)
        if folder.count("_shp")
    ]
    files_snp = [
        folders_snp[0] + "/" + folder
        for folder in os.listdir(folders_snp[0])
        if folder.count(".shp")
    ]

    if flagment:
        for file in files_snp:
            df_gio_fragment = read_dataframe("/content/" + file)
            df_gio_fragment.columns = df_gio_fragment.columns.str.lower()

            region_name = re.findall(re_pattern, file)[0]
            folder_name = f"output/{partition}=" + region_name
            create_folder([folder_name])

            df_gio_fragment.to_csv(
                folder_name + f"/{region_name}.csv", index=False
            )

    else:
        dfs = pd.concat(
            [read_dataframe("/content/" + file) for file in files_snp[:1]]
        )
        df_gio = gpd.GeoDataFrame(dfs)
        return df_gio


def create_folder(name_folders: list) -> None:
    for folder in name_folders:
        try:  # noqa: SIM105
            os.mkdir(folder)
        except:  # noqa: E722
            pass

# RiverAtlas


In [None]:
link = ["https://figshare.com/ndownloader/files/20087486"]
zip_name = ["Global_RiverATLAS.zip"]
create_folder(["output", "input"])
RiverATLASTESTE = tables(zip_name, link, False, True)

#BasinAtlas


In [None]:
link = ["https://figshare.com/ndownloader/files/20087237"]
zip_name = ["Global_BasinATLAS.zip"]
create_folder(["output", "input"])
BasinATLASTESTE = tables(
    zip_name, link, False, True, re_pattern="lev(\d*).shp", partition="level"
)

# LakeAtlas


In [None]:
link = ["https://figshare.com/ndownloader/files/35959547"]
zip_name = ["Global_LakeATLAS.zip"]
create_folder(["output", "input"])
LakeATLASTESTE = tables(
    zip_name,
    link,
    False,
    True,
    re_pattern="_([a-z]{3}_[a-z]{4}).shp",
    partition="region",
)