In [1]:
import pandas as pd
import xarray as xr
import numpy as np
from sqlalchemy import create_engine
#import sqlalchemy as sa
from urllib.parse import quote 
import os, time
from dotenv import load_dotenv

load_dotenv()
DBUSER = os.getenv('DBUSER')
DBPASS = os.getenv('DBPASS')
DBHOST = os.getenv('DBHOST')
DBPORT = os.getenv('DBPORT')
DBNAME = os.getenv('DBNAME')
MHWTABLE = os.getenv('MHWTABLE')

conn_uri = 'postgresql://' + DBUSER + ':%s@' + DBHOST + ':' + DBPORT + '/' + DBNAME
#conn_url = sa.engine.URL.create(
#    drivername="postgresql",
#    username=USER,
#    password=PASS,
#    host=HOST,
#    port=PORT,
#    database=DBNAME,
#)
# Connect to PostgreSQL database
engine = create_engine(conn_uri % quote(DBPASS))
#engine

In [2]:
test1 = "SELECT date, lat, lon, sst_anomaly, td, level FROM " +  MHWTABLE + " LIMIT 200000"
st = time.time()
df = pd.read_sql_query(test1, engine)
et = time.time()
print(df.head(20))
print('Pandas read_sql_query: ', et-st, 'sec')


          date     lat    lon  sst_anomaly  td  level
0   2016-12-01 -89.875  0.125          NaN NaN      0
1   2016-12-01 -89.625  0.125          NaN NaN      0
2   2016-12-01 -89.375  0.125          NaN NaN      0
3   2016-12-01 -89.125  0.125          NaN NaN      0
4   2016-12-01 -88.875  0.125          NaN NaN      0
5   2016-12-01 -88.625  0.125          NaN NaN      0
6   2016-12-01 -88.375  0.125          NaN NaN      0
7   2016-12-01 -88.125  0.125          NaN NaN      0
8   2016-12-01 -87.875  0.125          NaN NaN      0
9   2016-12-01 -87.625  0.125          NaN NaN      0
10  2016-12-01 -87.375  0.125          NaN NaN      0
11  2016-12-01 -87.125  0.125          NaN NaN      0
12  2016-12-01 -86.875  0.125          NaN NaN      0
13  2016-12-01 -86.625  0.125          NaN NaN      0
14  2016-12-01 -86.375  0.125          NaN NaN      0
15  2016-12-01 -86.125  0.125          NaN NaN      0
16  2016-12-01 -85.875  0.125          NaN NaN      0
17  2016-12-01 -85.625  0.12

In [3]:
engine.dispose()

In [3]:
import polars as pl
import urllib.parse
PASSX = urllib.parse.quote_plus(DBPASS)
plconn_uri = 'postgres://' + DBUSER + ':' + PASSX + '@' + DBHOST + ':' + DBPORT + '/' + DBNAME
st = time.time()
dp = pl.read_database(test1, plconn_uri)
et = time.time()
print('Polars read_database: ', et-st, 'sec')
print(dp)
# Now we pivot the DataFrame to 3D (time, latitude, longitude) structure
# Note: Polars doesn't currently support multi-index like pandas. 
# For this operation, we convert back to pandas DataFrame.
# pandas_df = df.to_pandas().set_index(['date', 'lat', 'lon']).unstack(level=-1)


Polars read_database:  2.285454511642456 sec
shape: (200_000, 6)
┌────────────┬─────────┬────────┬─────────────┬─────┬───────┐
│ date       ┆ lat     ┆ lon    ┆ sst_anomaly ┆ td  ┆ level │
│ ---        ┆ ---     ┆ ---    ┆ ---         ┆ --- ┆ ---   │
│ date       ┆ f64     ┆ f64    ┆ f64         ┆ f64 ┆ i32   │
╞════════════╪═════════╪════════╪═════════════╪═════╪═══════╡
│ 2016-12-01 ┆ -89.875 ┆ 0.125  ┆ NaN         ┆ NaN ┆ 0     │
│ 2016-12-01 ┆ -89.625 ┆ 0.125  ┆ NaN         ┆ NaN ┆ 0     │
│ 2016-12-01 ┆ -89.375 ┆ 0.125  ┆ NaN         ┆ NaN ┆ 0     │
│ 2016-12-01 ┆ -89.125 ┆ 0.125  ┆ NaN         ┆ NaN ┆ 0     │
│ …          ┆ …       ┆ …      ┆ …           ┆ …   ┆ …     │
│ 2023-03-01 ┆ -52.375 ┆ 63.875 ┆ 0.646537    ┆ NaN ┆ 1     │
│ 2023-03-01 ┆ -52.125 ┆ 63.875 ┆ 0.734473    ┆ NaN ┆ 1     │
│ 2023-03-01 ┆ -51.875 ┆ 63.875 ┆ 0.811161    ┆ NaN ┆ 2     │
│ 2023-03-01 ┆ -51.625 ┆ 63.875 ┆ 0.889086    ┆ NaN ┆ 1     │
└────────────┴─────────┴────────┴─────────────┴─────┴───────┘


In [127]:
# Convert date column to datetime
df['date'] = pd.to_datetime(df['date'])

# Here you have multiple variables ('sst_anomaly', 'level', 'td') 
# So, it's better to convert them separately and then merge
variables = ['sst_anomaly', 'level', 'td']
datasets = []

for var in variables:
    df_var = df[['date', 'lat', 'lon', var]]
    df_var = df_var.set_index(['date', 'lat', 'lon']).to_xarray()
    datasets.append(df_var)

In [128]:
datasets

[<xarray.Dataset>
 Dimensions:      (date: 12, lat: 720, lon: 1440)
 Coordinates:
   * date         (date) datetime64[ns] 1983-01-01 1983-02-01 ... 1983-12-01
   * lat          (lat) float64 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88
   * lon          (lon) float64 0.125 0.375 0.625 0.875 ... 359.4 359.6 359.9
 Data variables:
     sst_anomaly  (date, lat, lon) float64 nan nan nan ... 0.03431 0.03431,
 <xarray.Dataset>
 Dimensions:  (date: 12, lat: 720, lon: 1440)
 Coordinates:
   * date     (date) datetime64[ns] 1983-01-01 1983-02-01 ... 1983-12-01
   * lat      (lat) float64 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88
   * lon      (lon) float64 0.125 0.375 0.625 0.875 ... 359.1 359.4 359.6 359.9
 Data variables:
     level    (date, lat, lon) int64 0 0 0 0 0 0 0 0 ... -1 -1 -1 -1 -1 -1 -1 -1,
 <xarray.Dataset>
 Dimensions:  (date: 12, lat: 720, lon: 1440)
 Coordinates:
   * date     (date) datetime64[ns] 1983-01-01 1983-02-01 ... 1983-12-01
   * lat      (lat) float64 -

In [4]:
import zarr
import dask


In [60]:
# Concatenate all datasets along a new dimension
ds = xr.concat(datasets, dim=pd.Index(variables, name='var'))
ds

In [61]:
#compressor = zarr.Blosc(cname='zstd', clevel=3, shuffle=2)
dask.config.set(scheduler='single-threaded')
# Save to a Zarr file
ds.to_zarr('sst_anomaly_tmp.zarr', mode='w', group='anomaly') 


<xarray.backends.zarr.ZarrStore at 0x7fa8cfa79b60>

In [9]:
st = time.time()
dz = xr.open_zarr(
    'sst_anomaly.zarr', chunks='auto', 
    group='anomaly', decode_times=True)

et = time.time()
print('Exe time: ', et-st, 'sec')
dz

Exe time:  2.500122547149658 sec


Unnamed: 0,Array,Chunk
Bytes,94.92 MiB,0.99 MiB
Shape,"(12, 720, 1440)","(2, 180, 360)"
Dask graph,96 chunks in 2 graph layers,96 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray
"Array Chunk Bytes 94.92 MiB 0.99 MiB Shape (12, 720, 1440) (2, 180, 360) Dask graph 96 chunks in 2 graph layers Data type int64 numpy.ndarray",1440  720  12,

Unnamed: 0,Array,Chunk
Bytes,94.92 MiB,0.99 MiB
Shape,"(12, 720, 1440)","(2, 180, 360)"
Dask graph,96 chunks in 2 graph layers,96 chunks in 2 graph layers
Data type,int64 numpy.ndarray,int64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,94.92 MiB,0.99 MiB
Shape,"(12, 720, 1440)","(2, 180, 360)"
Dask graph,96 chunks in 2 graph layers,96 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 94.92 MiB 0.99 MiB Shape (12, 720, 1440) (2, 180, 360) Dask graph 96 chunks in 2 graph layers Data type float64 numpy.ndarray",1440  720  12,

Unnamed: 0,Array,Chunk
Bytes,94.92 MiB,0.99 MiB
Shape,"(12, 720, 1440)","(2, 180, 360)"
Dask graph,96 chunks in 2 graph layers,96 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,94.92 MiB,0.99 MiB
Shape,"(12, 720, 1440)","(2, 180, 360)"
Dask graph,96 chunks in 2 graph layers,96 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 94.92 MiB 0.99 MiB Shape (12, 720, 1440) (2, 180, 360) Dask graph 96 chunks in 2 graph layers Data type float64 numpy.ndarray",1440  720  12,

Unnamed: 0,Array,Chunk
Bytes,94.92 MiB,0.99 MiB
Shape,"(12, 720, 1440)","(2, 180, 360)"
Dask graph,96 chunks in 2 graph layers,96 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [20]:
import sys
print(sys.getsizeof(dz))
print(dz['sst_anomaly'].nbytes/(1024 ** 3))
#print(dz['td'].nbytes/(1024 ** 3))
#print(dz['level'].nbytes/(1024 ** 3))
print(dz.dims)
print("Lon range: ", dz.lon.values.min(), " to ", dz.lon.values.max())
print("Lat range: ", dz.lat.values.min(), " to ", dz.lat.values.max())
print("Date range: ", dz.date.values.min(), " to ", dz.date.values.max())

#base_date = pd.Timestamp('1982-01-01')
#dz['date'] = base_date + pd.to_timedelta(dz.date.values, unit='D')
#print("Date range: ", dz.date.values.min(), " to ", dz.date.values.max())


112
0.185394287109375
Frozen({'date': 24, 'lat': 720, 'lon': 1440})
Lon range:  0.125  to  359.875
Lat range:  -89.875  to  89.875
Date range:  1982-01-01T00:00:00.000000000  to  1983-12-01T00:00:00.000000000


In [5]:
def to_nearest_grid_point(lon: float, lat: float) -> tuple:
    grid_lon = round(lon * 4) / 4
    grid_lat = round(lat * 4) / 4
    return (grid_lon, grid_lat)

In [21]:
st = time.time()
start_date='1982-01-01'
end_date='1983-12-01'
gridSz=0.25
lon0, lat0 = to_nearest_grid_point(135, 15)
data_subset = dz.sel(lon=slice(lon0+180, lon0+180+gridSz-0.01), lat=slice(
                lat0, lat0+gridSz-0.01), date=slice(start_date, end_date))
print(data_subset.nbytes)
df = data_subset.to_dataframe().reset_index()
df['lon'] = df['lon'] - 180
df = df[['lon', 'lat', 'date'] +
        variables].dropna(how='all', subset=variables)

df['date'] = df['date'].apply(
    lambda x: x.isoformat() if not pd.isnull(x) else '')
et = time.time()
print('Exe time: ', et-st, 'sec')
print(df)


784
Exe time:  0.04998946189880371 sec
        lon     lat                 date  sst_anomaly  level  td
0   135.125  15.125  1982-01-01T00:00:00    -0.046890      0 NaN
1   135.125  15.125  1982-02-01T00:00:00     0.054705      0 NaN
2   135.125  15.125  1982-03-01T00:00:00    -0.094711      0 NaN
3   135.125  15.125  1982-04-01T00:00:00    -0.532612      0 NaN
4   135.125  15.125  1982-05-01T00:00:00     0.196516      0 NaN
5   135.125  15.125  1982-06-01T00:00:00     0.178499      0 NaN
6   135.125  15.125  1982-07-01T00:00:00    -0.056334      0 NaN
7   135.125  15.125  1982-08-01T00:00:00    -0.315397      0 NaN
8   135.125  15.125  1982-09-01T00:00:00    -0.229530      0 NaN
9   135.125  15.125  1982-10-01T00:00:00    -0.411741      0 NaN
10  135.125  15.125  1982-11-01T00:00:00    -0.739923      0 NaN
11  135.125  15.125  1982-12-01T00:00:00    -0.628590      0 NaN
12  135.125  15.125  1983-01-01T00:00:00    -0.404955      0 NaN
13  135.125  15.125  1983-02-01T00:00:00    -0.0549

In [6]:
# Set the initial start and end dates
start_date = '1982-01-01'
end_date = '1982-12-31'

# Load the first year of data
query = f"SELECT date, lat, lon, sst_anomaly, level, td FROM {MHWTABLE} WHERE date >= '{start_date}' AND date <= '{end_date}';"
df = pd.read_sql_query(query, engine)

# Convert to xarray Dataset and save to Zarr #need 40s for 1 yr
df['date'] = pd.to_datetime(df['date'])

df.head(20)

Unnamed: 0,date,lat,lon,sst_anomaly,level,td
0,1982-12-01,-89.875,0.125,,0,
1,1982-12-01,-89.625,0.125,,0,
2,1982-12-01,-89.375,0.125,,0,
3,1982-12-01,-89.125,0.125,,0,
4,1982-12-01,-88.875,0.125,,0,
5,1982-12-01,-88.625,0.125,,0,
6,1982-12-01,-88.375,0.125,,0,
7,1982-12-01,-88.125,0.125,,0,
8,1982-12-01,-87.875,0.125,,0,
9,1982-12-01,-87.625,0.125,,0,


In [15]:
# Here you have multiple variables ('sst_anomaly', 'level', 'td') 
# So, it's better to convert them separately and then merge
# variables = ['sst_anomaly', 'level', 'td']
# datasets = []

# for var in variables:
#    df_var = df[['date', 'lat', 'lon', var]]
#    df_var = df_var.set_index(['date', 'lat', 'lon']).to_xarray()
#    datasets.append(df_var)

#ds = xr.concat(datasets, dim=pd.Index(variables, name='var'))    
#ds.to_zarr('sst_anomaly.zarr', mode='w', group='anomaly') #need 6 sec to write
#Old cause level, sst_anomaly data-variable appear at different rows problem with the same lon/lat/date

In [7]:
#Test level and anomaly not on the same row after saving to zarr problem

variables = ['sst_anomaly', 'level', 'td']
dfs = []

for var in variables:
    df_var = df[['date', 'lat', 'lon', var]].copy()
    dfs.append(df_var)

# Merge dataframes on date, lat, lon
df_final = dfs[0]
for df_var in dfs[1:]:
    df_final = df_final.merge(df_var, on=['date', 'lat', 'lon'], how='outer')

# Convert to xarray Dataset and save to Zarr
ds = df_final.set_index(['date', 'lat', 'lon']).to_xarray()


In [8]:
ds.to_zarr('sst_anomaly.zarr', mode='w', group='anomaly') #'sst_anomaly_test.zarr'

<xarray.backends.zarr.ZarrStore at 0x7f1c4fca5cb0>

In [10]:
for year in range(1983, 2024): #note the last yr will not be involved in range
    start_date = f'{year}-01-01'
    end_date = f'{year}-12-31'
    query = f"SELECT lat, lon, sst_anomaly, level, td, date FROM sst_anomaly_without_detrend WHERE date >= '{start_date}' AND date <= '{end_date}';"
    df = pd.read_sql_query(query, engine)
    df['date'] = pd.to_datetime(df['date'])

    dfs = []

    for var in variables:
        df_var = df[['date', 'lat', 'lon', var]].copy()
        dfs.append(df_var)

    # Merge dataframes on date, lat, lon
    df_final = dfs[0]
    for df_var in dfs[1:]:
        df_final = df_final.merge(df_var, on=['date', 'lat', 'lon'], how='outer')

    # Convert to xarray Dataset and save to Zarr
    ds = df_final.set_index(['date', 'lat', 'lon']).to_xarray()

    # Append to the Zarr store
    ds.to_zarr('sst_anomaly.zarr', mode='a', append_dim='date', group='anomaly')


In [17]:
#Test to append new data
ZARR_PATH = 'tmp/mhw.zarr'
if True:
    dz = xr.open_zarr(ZARR_PATH, group='anomaly', decode_times=True)

    # Check duplicates in the existing Zarr dataset
    duplicate_dates_dz = dz['date'].to_index().duplicated()
    if duplicate_dates_dz.any():
        print("Error! Duplicate dates in dz:", dz['date'][duplicate_dates_dz].values)

print(dz)

<xarray.Dataset>
Dimensions:      (date: 498, lat: 720, lon: 1440)
Coordinates:
  * date         (date) datetime64[ns] 1982-01-01 1982-02-01 ... 2023-06-01
  * lat          (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88
  * lon          (lon) float32 0.125 0.375 0.625 0.875 ... 359.4 359.6 359.9
Data variables:
    level        (date, lat, lon) int64 dask.array<chunksize=(2, 180, 360), meta=np.ndarray>
    sst          (date, lat, lon) float32 dask.array<chunksize=(2, 180, 360), meta=np.ndarray>
    sst_anomaly  (date, lat, lon) float64 dask.array<chunksize=(2, 180, 360), meta=np.ndarray>
    td           (date, lat, lon) float64 dask.array<chunksize=(2, 180, 360), meta=np.ndarray>


In [13]:
def get_max_date_from_zarr(dz):
    # dz = xr.open_zarr(zarr_path, group='anomaly')
    maxdate = pd.to_datetime(dz['date'].values[-1])
    print("Get max date from zarr: ", maxdate)
    return maxdate


def get_max_date_from_postgres(engine):
    #engine = create_engine(connection_string)
    query = "SELECT max(date) as max_date FROM sst_anomaly_without_detrend"
    df = pd.read_sql_query(query, engine)
    print("Get max date from Database: ", df['max_date'][0])
    return df['max_date'][0]



In [18]:
last_date_in_zarr = get_max_date_from_zarr(dz)
print(last_date_in_zarr)


Get max date from zarr:  2023-06-01 00:00:00
2023-06-01 00:00:00


In [None]:
#connet the Postgres engine
query = f"SELECT lat, lon, sst_anomaly, level, td, date FROM sst_anomaly_without_detrend WHERE date > '{last_date_in_zarr}';"
df = pd.read_sql_query(query, engine)


In [19]:
print(df['date'].unique())

<DatetimeArray>
['2023-07-01 00:00:00']
Length: 1, dtype: datetime64[ns]


In [20]:
if not df.empty:
        print("---- Merge data from DB start ----")
        df['date'] = pd.to_datetime(df['date'])
        dfs = [df[['date', 'lat', 'lon', var]].copy() for var in ['sst_anomaly', 'level', 'td']]

        # Merge dataframes on date, lat, lon
        df_final = dfs[0]
        for df_var in dfs[1:]:
            df_final = df_final.merge(df_var, on=['date', 'lat', 'lon'], how='outer')

        # Convert to xarray Dataset
        ds_db = df_final.set_index(['date', 'lat', 'lon']).to_xarray()

        # Merge this data with the existing Zarr dataset
        dz = xr.concat([dz, ds_db], dim='date')


---- Merge data from DB start ----


In [10]:
print(dz)

<xarray.Dataset>
Dimensions:      (date: 499, lat: 720, lon: 1440)
Coordinates:
  * date         (date) datetime64[ns] 1982-01-01 1982-02-01 ... 2023-07-01
  * lat          (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88
  * lon          (lon) float32 0.125 0.375 0.625 0.875 ... 359.4 359.6 359.9
Data variables:
    level        (date, lat, lon) int64 dask.array<chunksize=(2, 180, 360), meta=np.ndarray>
    sst          (date, lat, lon) float32 dask.array<chunksize=(2, 180, 360), meta=np.ndarray>
    sst_anomaly  (date, lat, lon) float64 dask.array<chunksize=(2, 180, 360), meta=np.ndarray>
    td           (date, lat, lon) float64 dask.array<chunksize=(2, 180, 360), meta=np.ndarray>


In [21]:
from update_mhw import download_noaa_data

if last_date_in_zarr.month == 12:
    next_month_date = f"{last_date_in_zarr.year + 1}-01-01"
else:
    next_month_date = f"{last_date_in_zarr.year}-{last_date_in_zarr.month + 1:02}-01"

print("next_month_date: ", next_month_date)
# Download the NOAA data for the next month
filename = download_noaa_data(next_month_date, 'tmp/')
print(filename)

next_month_date:  2023-07-01
File oisst-avhrr-v02r01.20230701.nc already exists in the destination directory.
tmp/oisst-avhrr-v02r01.20230701.nc


In [37]:
if True:
        ds_nc = xr.open_mfdataset(filename, parallel=True, chunks={'time': '500MB'})
        msst = ds_nc["sst"].resample(time='1MS').mean()
        ds_msst = msst.compute()
        ds_msst = ds_msst.squeeze('zlev').rename({'time': 'date'}).drop('zlev')

        # Check if the 'next_month_date' already exists in dz['date']
        if np.datetime64(next_month_date) in dz['date'].values:
            # Align the ds_msst Dataset with dz along 'lat' and 'lon'
            print(ds_msst)
            print("----after align----")
            ds_msst_aligned, _ = xr.align(ds_msst, dz['sst'], join='inner', exclude=['date'])
            print(ds_msst_aligned)

            # Update the 'sst' values in dz for the specific date
            dz['sst'].loc[dict(date=next_month_date)] = ds_msst_aligned.sel(date=next_month_date)
        else:
            # If 'next_month_date' doesn't exist in dz['date'], simply concatenate as before
            dz['sst'] = xr.concat([dz['sst'], ds_msst], dim='date')


<xarray.DataArray 'sst' (date: 1, lat: 720, lon: 1440)>
array([[[  nan,   nan,   nan, ...,   nan,   nan,   nan],
        [  nan,   nan,   nan, ...,   nan,   nan,   nan],
        [  nan,   nan,   nan, ...,   nan,   nan,   nan],
        ...,
        [-1.73, -1.74, -1.75, ..., -1.76, -1.75, -1.73],
        [-1.74, -1.77, -1.78, ..., -1.79, -1.78, -1.74],
        [-1.8 , -1.8 , -1.8 , ..., -1.8 , -1.8 , -1.8 ]]], dtype=float32)
Coordinates:
  * lat      (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88
  * lon      (lon) float32 0.125 0.375 0.625 0.875 ... 359.1 359.4 359.6 359.9
  * date     (date) datetime64[ns] 2023-07-01
Attributes:
    long_name:  Daily sea surface temperature
    units:      Celsius
    valid_min:  -300
    valid_max:  4500
----after align----
<xarray.DataArray 'sst' (date: 1, lat: 720, lon: 1440)>
array([[[  nan,   nan,   nan, ...,   nan,   nan,   nan],
        [  nan,   nan,   nan, ...,   nan,   nan,   nan],
        [  nan,   nan,   nan, ...,   nan,  

In [38]:
print(dz)

<xarray.Dataset>
Dimensions:      (date: 499, lat: 720, lon: 1440)
Coordinates:
  * date         (date) datetime64[ns] 1982-01-01 1982-02-01 ... 2023-07-01
  * lat          (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88
  * lon          (lon) float32 0.125 0.375 0.625 0.875 ... 359.4 359.6 359.9
Data variables:
    level        (date, lat, lon) int64 dask.array<chunksize=(2, 180, 360), meta=np.ndarray>
    sst          (date, lat, lon) float32 dask.array<chunksize=(2, 180, 360), meta=np.ndarray>
    sst_anomaly  (date, lat, lon) float64 dask.array<chunksize=(2, 180, 360), meta=np.ndarray>
    td           (date, lat, lon) float64 dask.array<chunksize=(2, 180, 360), meta=np.ndarray>


In [40]:
new_data_slice = dz.sel(date=next_month_date)
print(new_data_slice)
print(new_data_slice.dims)
expanded_data_slice = new_data_slice.expand_dims('date')
print(expanded_data_slice)


<xarray.Dataset>
Dimensions:      (lat: 720, lon: 1440)
Coordinates:
    date         datetime64[ns] 2023-07-01
  * lat          (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88
  * lon          (lon) float32 0.125 0.375 0.625 0.875 ... 359.4 359.6 359.9
Data variables:
    level        (lat, lon) int64 dask.array<chunksize=(180, 360), meta=np.ndarray>
    sst          (lat, lon) float32 dask.array<chunksize=(180, 360), meta=np.ndarray>
    sst_anomaly  (lat, lon) float64 dask.array<chunksize=(180, 360), meta=np.ndarray>
    td           (lat, lon) float64 dask.array<chunksize=(180, 360), meta=np.ndarray>
Frozen({'lat': 720, 'lon': 1440})
<xarray.Dataset>
Dimensions:      (date: 1, lat: 720, lon: 1440)
Coordinates:
  * date         (date) datetime64[ns] 2023-07-01
  * lat          (lat) float32 -89.88 -89.62 -89.38 -89.12 ... 89.38 89.62 89.88
  * lon          (lon) float32 0.125 0.375 0.625 0.875 ... 359.4 359.6 359.9
Data variables:
    level        (date, lat, lon) in