# Calculate monthly mean data from daily inputs

## notes

## imports

In [17]:
%%time
import cartopy.crs as ccrs
import matplotlib.pyplot as plt
import numpy as np
import os
import xarray as xr
import xcdat as xc

CPU times: user 18 µs, sys: 8 µs, total: 26 µs
Wall time: 29.6 µs


## functions

In [18]:
def nearestNeighbourFill(data, missingValue=0):
    """
    Documentation for nearestNeighbourFill():
    -------
    The nearestNeighbourFill() function iteratively infills a 2D matrix
    with values from immediately neighbouring cells

    Author: Paul J. Durack : pauldurack@llnl.gov

    Inputs:
    -----

    |  **data** - a numpy 2D array
    |  **missingValue** - missing value of data matrix

    Returns:
    -------

    |  **filledData** - a numpy array with no missingValues

    Usage:
    ------
        data = np.array([[1, 2, 3, 4],
                         [5, 0, 7, 8],
                         [9, 10, 11, 12]])

        filledData = nearestNeighborFill(data, missingValue=0)
        print(filledData)
    
    Notes:
    -----
    * PJD 28 Nov 2023 - Started
    """

    # Make copy of input matrix
    filledData = data.copy()

    # Find indices of missing values
    missingIndices = np.argwhere(data == missingValue)

    for idx in missingIndices:
        row, col = idx
        neighbors = []

        # Iterate over neighbouring cells
        for i in range(max(0, row - 1), min(data.shape[0], row + 2)):
            for j in range(max(0, col - 1), min(data.shape[1], col + 2)):
                if (i, j) != (row, col) and data[i, j] != missingValue:
                    neighbours.append(data[i, j])

        # Fill missing value with the mean of neighbours
        if neighbours:
            filledData[row, col] = np.mean(neighbours)

    return filledData


def iterativeZonalFill(data, missingValue=0):
    """
    Documentation for iterativeZonalFill():
    -------
    The iterativeZonalFill() function iteratively infills a 2D matrix
    with values zonal neighbouring cells

    Author: Paul J. Durack : pauldurack@llnl.gov

    Inputs:
    -----

    |  **data** - a numpy 2D array
    |  **missingValue** - missing value of data matrix

    Returns:
    -------

    |  **filledData** - a numpy array with no missingValues

    Usage:
    ------
        data = np.array([[1, 2, 3, 4],
                         [5, 0, 7, 8],
                         [9, 10, 11, 12]])

        filledData = iterativeZonalFill(data, missingValue=0)
        print(filledData)
    
    Notes:
    -----
    * PJD 28 Nov 2023 - Started
    """
   
    # Make copy of input matrix
    filledData = data.copy()

    # Find indices of missing values
    missingIndices = np.argwhere(data == missingValue)

    # Define directions for iteration (right, down, left, up)
    directions = [(0, 1), (1, 0), (0, -1), (-1, 0)]

    for direction in directions:
        dx, dy = direction

        # Iterate over the data in the specified direction
        for i in range(1, max(data.shape) + 1):
            for idx in missingIndices:
                row, col = idx
                new_row, new_col = row + i * dx, col + i * dy

                # Check if the new indices are within the data boundaries
                if 0 <= new_row < data.shape[0] and 0 <= new_col < data.shape[1]:
                    if data[new_row, new_col] != missingValue:
                        filledData[row, col] = data[new_row, new_col]

    return filledData



## set data paths

In [19]:
obsPath = "/p/user_pub/PCMDIobs/obs4MIPs_input/RSS/RSS-MW5-1/v20230605/"

## lazy load data

In [56]:
def setCalendar(ds):
    # https://github.com/pydata/xarray/issues/6259
    ds.time.attrs["calendar"] = "standard"
    ds.time.attrs["units"] = "seconds since 1981-01-01 00:00:00"
    return ds
    #return xr.decode_cf(ds)

#dataPath = os.path.join(obsPath, "199801*.nc")
dataPath = os.path.join(obsPath, "1998*.nc")
print("dataPath:", dataPath)
ds = xc.open_mfdataset(dataPath, preprocess=setCalendar)
print("done!")

dataPath: /p/user_pub/PCMDIobs/obs4MIPs_input/RSS/RSS-MW5-1/v20230605/1998*.nc
done!


## view dataset

In [57]:
ds

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.41 GiB 3.96 MiB Shape (365, 720, 1440) (1, 720, 1440) Dask graph 365 chunks in 731 graph layers Data type float32 numpy.ndarray",1440  720  365,

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.41 GiB 3.96 MiB Shape (365, 720, 1440) (1, 720, 1440) Dask graph 365 chunks in 731 graph layers Data type float32 numpy.ndarray",1440  720  365,

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.41 GiB 3.96 MiB Shape (365, 720, 1440) (1, 720, 1440) Dask graph 365 chunks in 731 graph layers Data type float32 numpy.ndarray",1440  720  365,

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.41 GiB 3.96 MiB Shape (365, 720, 1440) (1, 720, 1440) Dask graph 365 chunks in 731 graph layers Data type float32 numpy.ndarray",1440  720  365,

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


## add calendar attribute to time axis

In [37]:
ds.time.attrs['calendar'] = 'standard'

In [38]:
ds

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.41 GiB 3.96 MiB Shape (365, 720, 1440) (1, 720, 1440) Dask graph 365 chunks in 731 graph layers Data type float32 numpy.ndarray",1440  720  365,

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.41 GiB 3.96 MiB Shape (365, 720, 1440) (1, 720, 1440) Dask graph 365 chunks in 731 graph layers Data type float32 numpy.ndarray",1440  720  365,

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.41 GiB 3.96 MiB Shape (365, 720, 1440) (1, 720, 1440) Dask graph 365 chunks in 731 graph layers Data type float32 numpy.ndarray",1440  720  365,

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.41 GiB 3.96 MiB Shape (365, 720, 1440) (1, 720, 1440) Dask graph 365 chunks in 731 graph layers Data type float32 numpy.ndarray",1440  720  365,

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


## add missing time_bnds

In [39]:
ds = ds.bounds.add_missing_bounds(axes="T")
#ds.time.attrs["bounds"] = "time_bnds"

In [40]:
ds

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.41 GiB 3.96 MiB Shape (365, 720, 1440) (1, 720, 1440) Dask graph 365 chunks in 731 graph layers Data type float32 numpy.ndarray",1440  720  365,

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.41 GiB 3.96 MiB Shape (365, 720, 1440) (1, 720, 1440) Dask graph 365 chunks in 731 graph layers Data type float32 numpy.ndarray",1440  720  365,

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.41 GiB 3.96 MiB Shape (365, 720, 1440) (1, 720, 1440) Dask graph 365 chunks in 731 graph layers Data type float32 numpy.ndarray",1440  720  365,

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 1.41 GiB 3.96 MiB Shape (365, 720, 1440) (1, 720, 1440) Dask graph 365 chunks in 731 graph layers Data type float32 numpy.ndarray",1440  720  365,

Unnamed: 0,Array,Chunk
Bytes,1.41 GiB,3.96 MiB
Shape,"(365, 720, 1440)","(1, 720, 1440)"
Dask graph,365 chunks in 731 graph layers,365 chunks in 731 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [41]:
mean_monthXr = ds.analysed_sst.resample(time='M').mean()
# https://stackoverflow.com/questions/50564459/using-xarray-to-make-monthly-average

In [43]:
mean_monthXr

Unnamed: 0,Array,Chunk
Bytes,47.46 MiB,3.96 MiB
Shape,"(12, 720, 1440)","(1, 720, 1440)"
Dask graph,12 chunks in 792 graph layers,12 chunks in 792 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 47.46 MiB 3.96 MiB Shape (12, 720, 1440) (1, 720, 1440) Dask graph 12 chunks in 792 graph layers Data type float32 numpy.ndarray",1440  720  12,

Unnamed: 0,Array,Chunk
Bytes,47.46 MiB,3.96 MiB
Shape,"(12, 720, 1440)","(1, 720, 1440)"
Dask graph,12 chunks in 792 graph layers,12 chunks in 792 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [48]:
mean_monthXc2 = mean_monthXc.bounds.add_time_bounds(method="freq", freq="month")

In [49]:
mean_monthXc2

Unnamed: 0,Array,Chunk
Bytes,94.92 MiB,7.91 MiB
Shape,"(12, 720, 1440)","(1, 720, 1440)"
Dask graph,12 chunks in 912 graph layers,12 chunks in 912 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 94.92 MiB 7.91 MiB Shape (12, 720, 1440) (1, 720, 1440) Dask graph 12 chunks in 912 graph layers Data type float64 numpy.ndarray",1440  720  12,

Unnamed: 0,Array,Chunk
Bytes,94.92 MiB,7.91 MiB
Shape,"(12, 720, 1440)","(1, 720, 1440)"
Dask graph,12 chunks in 912 graph layers,12 chunks in 912 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [50]:
mean_monthXc3 = mean_monthXc2.temporal.group_average("analysed_sst", freq="month", weighted=True)

In [51]:
mean_monthXc3

Unnamed: 0,Array,Chunk
Bytes,94.92 MiB,7.91 MiB
Shape,"(12, 720, 1440)","(1, 720, 1440)"
Dask graph,12 chunks in 1043 graph layers,12 chunks in 1043 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 94.92 MiB 7.91 MiB Shape (12, 720, 1440) (1, 720, 1440) Dask graph 12 chunks in 1043 graph layers Data type float64 numpy.ndarray",1440  720  12,

Unnamed: 0,Array,Chunk
Bytes,94.92 MiB,7.91 MiB
Shape,"(12, 720, 1440)","(1, 720, 1440)"
Dask graph,12 chunks in 1043 graph layers,12 chunks in 1043 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [45]:
mean_monthXc2 = ds.temporal.average("analysed_sst", weighted=True)

In [46]:
mean_monthXc2

Unnamed: 0,Array,Chunk
Bytes,7.91 MiB,7.91 MiB
Shape,"(720, 1440)","(720, 1440)"
Dask graph,1 chunks in 753 graph layers,1 chunks in 753 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 7.91 MiB 7.91 MiB Shape (720, 1440) (720, 1440) Dask graph 1 chunks in 753 graph layers Data type float64 numpy.ndarray",1440  720,

Unnamed: 0,Array,Chunk
Bytes,7.91 MiB,7.91 MiB
Shape,"(720, 1440)","(720, 1440)"
Dask graph,1 chunks in 753 graph layers,1 chunks in 753 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [None]:
mean_monthXc.time

In [30]:
mean_monthXc2 = mean_monthXc.bounds.add_missing_bounds(axes="T")



In [31]:
mean_monthXc2

Unnamed: 0,Array,Chunk
Bytes,7.91 MiB,7.91 MiB
Shape,"(1, 720, 1440)","(1, 720, 1440)"
Dask graph,1 chunks in 85 graph layers,1 chunks in 85 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 7.91 MiB 7.91 MiB Shape (1, 720, 1440) (1, 720, 1440) Dask graph 1 chunks in 85 graph layers Data type float64 numpy.ndarray",1440  720  1,

Unnamed: 0,Array,Chunk
Bytes,7.91 MiB,7.91 MiB
Shape,"(1, 720, 1440)","(1, 720, 1440)"
Dask graph,1 chunks in 85 graph layers,1 chunks in 85 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [12]:
ds.analysed_sst

Unnamed: 0,Array,Chunk
Bytes,122.61 MiB,3.96 MiB
Shape,"(31, 720, 1440)","(1, 720, 1440)"
Dask graph,31 chunks in 63 graph layers,31 chunks in 63 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 122.61 MiB 3.96 MiB Shape (31, 720, 1440) (1, 720, 1440) Dask graph 31 chunks in 63 graph layers Data type float32 numpy.ndarray",1440  720  31,

Unnamed: 0,Array,Chunk
Bytes,122.61 MiB,3.96 MiB
Shape,"(31, 720, 1440)","(1, 720, 1440)"
Dask graph,31 chunks in 63 graph layers,31 chunks in 63 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
