In [None]:
import numpy as np
import xarray as xr

### Exploring Chloropyll A

Here we will focus on accessing and visualizing one global layer for chloropyll for just one monthly time step (which is its own `netcdf` file) representing January 2020. 

In [None]:
# using locally downloaded dataset - on cholorphyll
ds = xr.open_dataset(
    "data/ESACCI-OC-L3S-CHLOR_A-MERGED-1M_MONTHLY_4km_GEO_PML_OCx-202001-fv5.0.nc"
)

In [None]:
ds

In [None]:
print(ds.dims)

In [None]:
print(ds.data_vars)

In [None]:
print(ds.variables.keys())

In [None]:
# extracting only cholorphyll data
chlor_a = ds.variables["chlor_a"]
chlor_a

In [None]:
chlor_a.attrs["units"]

In [None]:
import matplotlib.pyplot as plt

chlor_a2d = chlor_a.isel(time=0)
chlor_a2d

In [None]:
import matplotlib.pyplot as plt

plt.rcParams["figure.figsize"] = (12.0, 8.0)

# Image is upside-down so we have to flip it here.
f = np.flip(chlor_a2d, 0)

plt.contourf(f, 20, cmap=plt.get_cmap("YlGnBu_r"))
plt.colorbar()
plt.show()

Note that this is an ocean based dataset - so we'd have to calculate values for each administrative boundary of interest based on proximity - and only for those areas that touched the ocean. 

Campbell et al. (2020) extracted oceanic variable data for coastal areas extending from the district shoreline to one decimal degree offshore. They then calculated a mean value for each coastal district area. 

Additionally, displaying the data visually, we'll want to explore min/max values and be able to `zoom in` to particular regions as chloropyll A concentrations will accumulate only in those areas closer to the shore. 

### Exploring Land Surface Temperature

Now let's explore Land Surface Temperature, but this time let's try to access directly from the URL

In [None]:
# url = ("https://dap.ceda.ac.uk/neodc/esacci/land_surface_temperature/data/MULTISENSOR_IRCDR/L3S/0.01/v2.00/monthly/2020/12/ESACCI-LST-L3S-LST-IRCDR_-0.01deg_1MONTHLY_DAY-20201201000000-fv2.00.nc?download=1")

# original link above doesn't work, must add `#mode=bytes` to the end (see: https://github.com/Unidata/netcdf4-python/issues/1043)
url = "https://dap.ceda.ac.uk/neodc/esacci/land_surface_temperature/data/MULTISENSOR_IRCDR/L3S/0.01/v2.00/monthly/2020/12/ESACCI-LST-L3S-LST-IRCDR_-0.01deg_1MONTHLY_DAY-20201201000000-fv2.00.nc#mode=bytes"

In [None]:
ds_disk = xr.open_dataset(url)
ds_disk

Before we do any processing of the data, we will want to clip the global dataset down to our AOI (continent of Africa) to reduce the size and processing time. To do this, we'll import the national administrative boundaries (admin 0) for Africa in order to create a bounding box for the continent.

In [None]:
import geopandas as gpd

admin0_gdf = gpd.read_file(
    "https://geoportal.icpac.net/geoserver/ows?service=WFS&version=1.0.0&request=GetFeature&typename=geonode%3Aafr_g2014_2013_0&outputFormat=json&srs=EPSG%3A4326&srsName=EPSG%3A4326"
)
# admin0_gdf

print(admin0_gdf.crs)

In [None]:
xmin, ymin, xmax, ymax = admin0_gdf.geometry.total_bounds
print(xmin, ymin, xmax, ymax)

In [None]:
# Subset to bounding box of African continent
ds_disk.rio.write_crs("epsg:4326", inplace=True)

In [None]:
lst_clip = ds_disk["lst"].rio.clip_box(
    minx=xmin,
    miny=ymin,
    maxx=xmax,
    maxy=ymax,
    crs="epsg:4326",
)

In [None]:
lst_clip

In [None]:
# Convert from Kelvin to Celsius
lst_africa_c = lst_clip - 273.15
lst_africa_c

# does this work correctly? It appears to, but bounds still fairly high in some pleaces

We'll visualize only 1 time step here - but it would be great to visualize the whole time series:
* we could visualize instead as a line graph
* or as visualization that loops over the entire time period 

But more importantly, we're going to have to aggregate these daily values to monthly. 

In [None]:
lst_africa_c.dims

In [None]:
# selecting single time point (first time-step)
lst_2d = lst_africa_c.isel(time=0)
lst_2d

In [None]:
lst_2d.dims

In [None]:
plt.contourf(lst_2d, 20, cmap=plt.get_cmap("coolwarm"))
plt.colorbar()
plt.show()

# Would prefer hvplot, but datashader not yet supported on python 3.11

In addition to aggregating these daily values into monthly means, we would also require that these calculatons were done for the admin level of interest (i.e., using zonal statistics). 

Will also have to account for missing data.

Exploring development of a function to complete all of the above:

In [None]:
import time as ts

In [None]:
url_recipe = "https://dap.ceda.ac.uk/neodc/esacci/land_surface_temperature/data/MULTISENSOR_IRCDR/L3S/0.01/v2.00/monthly/{year:04d}/{month:02d}/ESACCI-LST-L3S-LST-IRCDR_-0.01deg_1MONTHLY_DAY-f'ArithmeticError{ts.strftime('%Y%m%d%H%M%S')}'-fv2.00.nc#mode=bytes"

In [None]:
url_recipe

This is failing as there is an `arithmetic error` on the final date read. Difficult to understand if this `netcdf` file is one day or one month. Need to also look into `dtime` value as in nanoseconds. 

In [None]:
import os
import requests
import pandas as pd

In [None]:
bbox = admin0_gdf.geometry.total_bounds

Creating a function to iterate through the URL folders to allow for filtering first, so as to create most efficient workflow before downloading data to process. Below is a work-in-progress (not yet successfully run through) using the bounding box for the continent in this first pass. We'll instead want to iterate through bounding boxes for each admin2 level. 

In [None]:
def calc_monthly_means(url_recipe, bounding_box):
    monthly_means = {}

    # loop through each month
    for year in range(2020, 2020):
        for month in range(1, 13):
            # generate URL for netcdf file
            url = url_recipe.format(year=year, month=month)

            # download the netcdf file
            response = requests.get(url)
            if response.status_code == 200:
                filename = url.split("/")[-1]
                with open(filename, "wb") as file:
                    file.write(response.content)

                    # Open the NetCDF file using xarray
                dataset = xr.open_dataset(filename)

                # Clip the dataset to the bounding box
                clipped_dataset = dataset.sel(
                    latitude=slice(bounding_box[2], bounding_box[3]),
                    longitude=slice(bounding_box[0], bounding_box[1]),
                )

                # Calculate the mean values for the administrative boundary
                mean_values = clipped_dataset.mean(dim=["latitude", "longitude"])

                # Store the mean values in a DataFrame
                month_str = f"{year}-{month:02d}"
                monthly_means[month_str] = mean_values.to_dataframe()

                # Remove the downloaded NetCDF file
                os.remove(filename)

    # Combine the monthly mean values into a single DataFrame
    combined_df = pd.concat(monthly_means)

    return combined_df

In [None]:
africa_monthly = calc_monthly_means(url_recipe, bbox)
africa_monthly

Ignore from here forwards...

In [None]:
import pandas as pd
import rasterio as rio
import rasterstats as rs
from rasterstats import zonal_stats
import rioxarray

In [None]:
admin0_lst = rs.zonal_stats(
    admin0_gdf,
    lst_2d.squeeze().values,
    categorical=True,
    affine=lst_2d.rio.transform(),
    stats="mean",
)

In [None]:
print(np.shape(lst_2d))

In [None]:
print(np.shape(admin0_gdf))