In [None]:
import os
import fsspec
import xarray as xr
from fsspec.implementations.sftp import SFTPFileSystem
import fsspec

Create SFTPFileSystem object to explore SFTP structure

In [None]:
sftp_host = os.environ["GLEAM_FTP"][7:-1] # host should not have sftp prefix
gleam_creds_sftp = dict(
    username = os.environ["GLEAM_USER"],
    password = os.environ["GLEAM_PASSWORD"],
    port = int(os.environ["GLEAM_PORT"])
    )

In [None]:
# create link
fs_sftp = SFTPFileSystem(host=sftp_host, **gleam_creds_sftp)

Explore file structure

In [None]:
fs_sftp.ls("./")

In [None]:
fs_sftp.ls("./data")

In [None]:
fs_sftp.ls("./data/v3.7b")

In [None]:
fs_sftp.ls("./data/v3.7b/daily")

In [None]:
fs_sftp.ls("./data/v3.7b/daily/2003")

In [None]:
# see info on file
fs_sftp.info('./data/v3.7b/daily/2003/Et_2003_GLEAM_v3.7b.nc')

Download files

In [None]:
# this will download the file
# fs_sftp.get('./data/v3.7b/daily/2003/Et_2003_GLEAM_v3.7b.nc', "")

In [None]:
glob_path = "./data/v3.7b/daily/*/Et*"

In [None]:
# use glob to get all Et files
Et_file_paths = fs_sftp.glob("./data/v3.7b/daily/*/Et*")
# [print(f) for f in Et_file_paths]

In [None]:
# set download directory
download_fp = "./download2/"

In [None]:
fs_sftp.get(rpath=glob_path, lpath=download_fp)

In [None]:
# download all files from SFTP server
for file in Et_file_paths[0:2]:
    fs_sftp.get(file, download_fp)

In [None]:
fs_sftp.get?

Create dictionary of environmental variables for GLEAM

In [None]:
fs_sftp?

In [None]:
gleam_creds = dict(
    username = os.environ["GLEAM_USER"],
    password = os.environ["GLEAM_PASSWORD"],
    port = int(os.environ["GLEAM_PORT"])
    )

Check that open file objects will work

In [None]:
years = range(2003, 2023)

for year in years:
    # format file path on SFTP
    filepath = f"data/v3.7b/daily/{year}/Et_{year}_GLEAM_v3.7b.nc"
    # create full URL
    urlpath = os.environ["GLEAM_FTP"] + filepath

    # add to credentials to send to fsspec.open
    gleam_creds["urlpath"] = urlpath

    # create OpenFile object
    file = fsspec.open(**gleam_creds)
    print(file)

## Start creating pangeo-forge-recipe

In [None]:
import pandas as pd
from pangeo_forge_recipes.patterns import ConcatDim, FilePattern
import apache_beam as beam
from pangeo_forge_recipes.transforms import OpenURLWithFSSpec, OpenWithXarray, StoreToZarr
from tempfile import TemporaryDirectory

Create time range

In [None]:
dates = pd.date_range("2003", "2005", freq="A")

In [None]:
time_concat_dim = ConcatDim("time", dates, nitems_per_file=1)
time_concat_dim

In [None]:
base_url = "sftp://hydras.ugent.be/data/v3.7b/daily/{time:%Y}/Et_{time:%Y}_GLEAM_v3.7b.nc"

In [None]:
def make_url(time):
    return base_url.format(time=time)

In [None]:
make_url(dates[-1])

In [None]:
pattern = FilePattern(make_url, time_concat_dim, fsspec_open_kwargs=gleam_creds)
pattern

In [None]:
for index, url in pattern.items():
    print(index)
    print(url)
    # Stop after the 3rd filepath (September 3rd, 1981)
    if '19810903' in url:
        break

In [None]:
td = TemporaryDirectory()
target_path = td.name
target_name = "output.zarr"
target_path

In [None]:
transforms = (
    beam.Create(pattern.items())
    | OpenURLWithFSSpec()
    | OpenWithXarray(file_type=pattern.file_type)
    | StoreToZarr(
        target_root=target_path,
        store_name=target_name,
        combine_dims=pattern.combine_dim_keys,
    )
)
transforms

Run!

In [None]:
with beam.Pipeline() as p:
    p | transforms