In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import geopandas as gpd
import pandas as pd
import numpy as np
import xarray as xr
import matplotlib.pyplot as plt

import os
import shutil
import regionmask
import rioxarray

from itertools import product
from zarr.errors import GroupNotFoundError

from shapely.geometry import Polygon

import carbonplan_trace.v1.utils as utils
from s3fs import S3FileSystem

fs = S3FileSystem()

https://lpdaac.usgs.gov/products/mcd12q1v006/  
https://lpdaac.usgs.gov/documents/101/MCD12_User_Guide_V6.pdf


In [None]:
d = "s3://carbonplan-climatetrace/inputs/igbp/"
files = [f for f in fs.ls(d) if not f.endswith("/") and not f.endswith("zarr")]
years = [f.split("/")[-1].split(".")[1] for f in files]
file_df = pd.DataFrame({"file_path": files, "year": years})

In [None]:
file_df.year.unique()

In [None]:
# concat all downloaded data into the entire globe by year

# for yr, group in file_df.groupby("year"):
#     print(yr[1:5], len(group))
#     igbp = []
#     for i, file in group.iterrows():
#         f = xr.open_rasterio(f"s3://{file.file_path}").squeeze(
#             dim="band", drop=True
#         )
#         igbp.append(
#             f.to_dataset(name="igbp", promote_attrs=True).chunk(
#                 {"x": 2400, "y": 2400}
#             )
#         )
#     igbp = xr.combine_by_coords(igbp, combine_attrs="drop_conflicts")
#     attrs = igbp.attrs
#     igbp = xr.where(igbp == 255, np.nan, igbp)
#     igbp = xr.where(igbp == 17, np.nan, igbp)
#     igbp.attrs = attrs
#     igbp = igbp.chunk(
#         {"x": 2400, "y": 2400}
#     )
#     mapper = fs.get_mapper(
#         f"s3://carbonplan-climatetrace/inputs/igbp/{yr[1:5]}.zarr"
#     )
#     igbp.to_zarr(mapper, mode="w")

In [None]:
# test plot

# igbp.igbp[::20, ::20].plot()

In [None]:
tilepaths = [
    f
    for f in fs.ls("s3://carbonplan-climatetrace/intermediate/ecoregions_mask/")
    if not f.endswith("/")
]
len(tilepaths)

In [None]:
def get_tile_in_xr(path):
    mapper = fs.get_mapper(path)
    try:
        ds = xr.open_zarr(mapper, chunks=None)
        ds.attrs["crs"] = "EPSG:4326"

        return ds
    except GroupNotFoundError:
        print(f"{path} empty, skipping")


def convert_raster_into_tiles(tile_ds, raster):
    output = raster.rio.reproject_match(tile_ds)

    return output

In [None]:
# reproject match data into 10x hansen grid, processing done for the igbp data used for allometric equation assignment

# for each tile
for tp in tilepaths[103:]:
    print(tp)
    # load tile
    target_tile = get_tile_in_xr("s3://" + tp)
    # preprocess
    target_tile = target_tile.rename(lon="x", lat="y")

    target_tile = target_tile.coarsen({"x": 10, "y": 10}).mean()

    target_tile.attrs["crs"] = "EPSG:4326"
    # get file names
    fn = tp.split("/")[-1].split(".")[0]
    local_path = f"/home/jovyan/temp/{fn}.zarr"
    cloud_path = f"s3://carbonplan-climatetrace/intermediate/igbp/{fn}.zarr"
    if os.path.exists(local_path):
        shutil.rmtree(local_path)

    fs.get(cloud_path, local_path, recursive=True)
    # copy local path to a backup location
    if os.path.exists(local_path + ".bak"):
        shutil.rmtree(local_path + ".bak")
    shutil.copytree(local_path, local_path + ".bak")

    # load igbp per year
    for yr in np.arange(2010, 2019):
        print(yr)
        mapper = fs.get_mapper(f"s3://carbonplan-climatetrace/inputs/igbp/{yr}.zarr")
        igbp = xr.open_zarr(mapper)
        attrs = igbp.attrs
        igbp = igbp.igbp
        igbp.attrs = attrs
        igbp.attrs[
            "crs"
        ] = "+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +R=6371007.181 +units=m +no_defs=True"

        # transform
        output_da = convert_raster_into_tiles(tile_ds=target_tile, raster=igbp)
        output_da.attrs = {"crs": "EPSG:4326"}
        output_da.coords["x"] = target_tile.x
        output_da.coords["y"] = target_tile.y
        output_da = output_da.rename(x="lon", y="lat")
        output_da = output_da.assign_coords(year=yr).expand_dims("year")

        if not os.path.exists(local_path):
            output_da.to_dataset(promote_attrs=True).to_zarr(local_path, mode="w")
        else:
            output_da.to_dataset(promote_attrs=True).to_zarr(local_path, append_dim="year")

    fs.put(local_path, cloud_path, recursive=True)
    shutil.rmtree(local_path)
    shutil.rmtree(local_path + ".bak")

In [None]:
mapper = fs.get_mapper(f"s3://carbonplan-climatetrace/intermediate/igbp/10N_010E.zarr")
test = xr.open_zarr(mapper)

test.sel(year=2018).igbp[::10, ::10].plot()

In [None]:
# directly reproject the entire globe data into latlon without matching hansen grid, data used for setting global domain

local_path = f"/home/jovyan/temp/global_igbp.zarr"
cloud_path = f"s3://carbonplan-climatetrace/intermediate/global_igbp.zarr"

# for yr in np.arange(2010, 2019):
#     print(yr)
#     mapper = fs.get_mapper(f"s3://carbonplan-climatetrace/inputs/igbp/{yr}.zarr")
#     igbp = xr.open_zarr(mapper)
#     attrs = igbp.attrs
#     igbp = igbp.igbp
#     igbp.attrs = attrs
#     igbp.attrs["crs"] = "+proj=sinu +lon_0=0 +x_0=0 +y_0=0 +R=6371007.181 +units=m +no_defs=True"
#     igbp_latlon = igbp.rio.reproject(dst_crs='EPSG:4326')
#     igbp_latlon.attrs = {"crs": "EPSG:4326"}

#     if yr == 2010:
#         x = igbp_latlon.x
#         y = igbp_latlon.y
#     else:
#         igbp_latlon.coords["x"] = x
#         igbp_latlon.coords["y"] = y

#     igbp_latlon = igbp_latlon.rename(x="lon", y="lat")
#     igbp_latlon = igbp_latlon.assign_coords(year=yr).expand_dims("year")

#     if not os.path.exists(local_path):
#         igbp_latlon.to_dataset(promote_attrs=True).to_zarr(
#             local_path, mode="w"
#         )
#     else:
#         igbp_latlon.to_dataset(promote_attrs=True).to_zarr(
#             local_path, append_dim="year"
#         )

#     del igbp
#     del igbp_latlon

# fs.put(local_path, cloud_path, recursive=True)
# shutil.rmtree(local_path)

In [None]:
test = xr.open_zarr(cloud_path)

In [None]:
test

In [None]:
test.igbp.sel(year=2018, lat=slice(0, -2), lon=slice(-70, -68)).plot()