In [None]:
##### Weather Bench Work Attempt 3

In [1]:
import apache_beam

In [2]:
import os

In [3]:
import weatherbench2

In [4]:
import xarray as xr

In [5]:
import numpy as np
import math
from weatherbench2.regions import SliceRegion, ExtraTropicalRegion
from weatherbench2.evaluation import evaluate_in_memory
from weatherbench2 import config
from weatherbench2.metrics import MSE, ACC

In [6]:
forecast_path = 'gs://weatherbench2/datasets/hres/2016-2022-0012-64x32_equiangular_conservative.zarr'
obs_path = 'gs://weatherbench2/datasets/era5/1959-2022-6h-64x32_equiangular_conservative.zarr'


In [8]:
paths1 = config.Paths(
    forecast=forecast_path,
    obs=obs_path,
    output_dir='./',   # Directory to save evaluation results
)

selection1 = config.Selection(
    variables=[
        'geopotential',
    ],
    levels=[500],
    time_slice=slice('2020-01-01', '2020-12-31'),
)

#https://weatherbench2.readthedocs.io/en/latest/init-vs-valid-time.html

#by init-time is set to true. (offical convention is init time true)
#
data_config1 = config.Data(selection=selection1, paths=paths1)

regions1 = {
    'global': SliceRegion(),
}

eval_configs1 = {
  'Attempt3': config.Eval(
      metrics={
          'mse': MSE(),
      },
      regions=regions1
  )
}


evaluate_in_memory(data_config1, eval_configs1)

In [27]:
results = xr.open_dataset('./Attempt3.nc')
truemse = results['geopotential'][0,0,:,0].values
truemse

array([5.24170483e+02, 4.97454895e+02, 1.03091648e+03, 1.03016873e+03,
       1.76827782e+03, 2.03027457e+03, 3.21184755e+03, 3.90278466e+03,
       5.58552620e+03, 6.83367337e+03, 9.45787348e+03, 1.16435743e+04,
       1.55987561e+04, 1.92291128e+04, 2.49300264e+04, 3.04311393e+04,
       3.85501098e+04, 4.64172527e+04, 5.73736502e+04, 6.82692969e+04,
       8.23327166e+04, 9.62770632e+04, 1.13742191e+05, 1.31081855e+05,
       1.52276297e+05, 1.73156767e+05, 1.97812150e+05, 2.21829931e+05,
       2.49627266e+05, 2.76054276e+05, 3.05749609e+05, 3.34062576e+05,
       3.65618779e+05, 3.95114041e+05, 4.26826580e+05, 4.55806308e+05,
       4.86726017e+05, 5.14979569e+05, 5.45780111e+05, 5.73745537e+05,
       6.03819242e+05])

In [28]:
forecast = xr.open_zarr(forecast_path)
observations = xr.open_zarr(obs_path)

In [None]:
a1 =forecast['geopotential'].sel(level = 500, time = slice('2020-01-01', '2020-12-31'))[:,0,:,:]
b1 = observations['geopotential'].sel(level=500, time = slice('2020-01-01', '2020-12-31'))[::2,:,:]

errors = a1-b1
errors[:,:,:]

latitude = forecast['latitude'][:].values
latitude
delta = 2.8125
theta_upper = latitude + delta
theta_lower = latitude - delta

# Calculate weights based on the provided formula
weights = (np.sin(np.radians(theta_upper)) - np.sin(np.radians(theta_lower)))
weights /= weights.sum()
weights *= 32

#print(weights) #same as functions

weightedmatrix = (errors.values**2) * weights[None,None,:]

np.sum(weightedmatrix/(64*32*732))

524.1704827154813

In [None]:
#Wrong forecast method
a = forecast['geopotential'].sel(level = 500, time = slice('2020-01-01', '2020-12-31'))[:,:,:,:]
b = observations['geopotential'].sel(level=500, time = slice('2020-01-01', '2020-12-31'))[:,:,:]
adjustedb = b[::2,:,:]

weightedstuff = ((a[:-1,2,:,:].values-adjustedb[1:,:,:].values)**2) * weights[None,None,:]

np.sum(weightedstuff/(64*32*731))

1030.9059537729684

In [90]:
#Corrected forecast method
a = forecast['geopotential'].sel(level = 500, time = slice('2020-01-01', '2020-12-31'))[:,:,:,:]
b = observations['geopotential'].sel(level=500, time = slice('2020-01-01', '2021-01-01'))[:,:,:]
adjustedb = b[::2,:,:]
#adjustedb[1:-1,:,:]

weightedstuff = ((a[:,2,:,:].values-adjustedb[1:-1,:,:].values)**2) * weights[None,None,:]

np.sum(weightedstuff/(64*32*732))

1030.916475361522

In [None]:
#Corrected forecast method next 12 hour method of day incrementation works
a = forecast['geopotential'].sel(level = 500, time = slice('2020-01-01', '2020-12-31'))[:,:,:,:]
b = observations['geopotential'].sel(level=500, time = slice('2020-01-01', '2021-01-02'))[:,:,:]
adjustedb = b[::2,:,:]

weightedstuff = ((a[:,4,:,:].values-adjustedb[2:-2,:,:].values)**2) * weights[None,None,:]

np.sum(weightedstuff/(64*32*732))

1768.2778175289834

In [102]:
#6 Hour prediction?
a = forecast['geopotential'].sel(level = 500, time = slice('2020-01-01', '2020-12-31'))[:,:,:,:]
b = observations['geopotential'].sel(level=500, time = slice('2020-01-01', '2020-12-31'))[:,:,:]
adjustedb=b[1::2,:,:]

weightedstuff = ((a[:,1,:,:].values-adjustedb.values)**2) * weights[None,None,:]

np.sum(weightedstuff/(64*32*732))

497.4548952177726

In [100]:
a[:,1,:,:]

Unnamed: 0,Array,Chunk
Bytes,5.72 MiB,32.00 kiB
Shape,"(732, 64, 32)","(4, 64, 32)"
Dask graph,184 chunks in 4 graph layers,184 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 5.72 MiB 32.00 kiB Shape (732, 64, 32) (4, 64, 32) Dask graph 184 chunks in 4 graph layers Data type float32 numpy.ndarray",32  64  732,

Unnamed: 0,Array,Chunk
Bytes,5.72 MiB,32.00 kiB
Shape,"(732, 64, 32)","(4, 64, 32)"
Dask graph,184 chunks in 4 graph layers,184 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray


In [101]:
adjustedb

Unnamed: 0,Array,Chunk
Bytes,5.72 MiB,400.00 kiB
Shape,"(732, 64, 32)","(50, 64, 32)"
Dask graph,15 chunks in 4 graph layers,15 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
"Array Chunk Bytes 5.72 MiB 400.00 kiB Shape (732, 64, 32) (50, 64, 32) Dask graph 15 chunks in 4 graph layers Data type float32 numpy.ndarray",32  64  732,

Unnamed: 0,Array,Chunk
Bytes,5.72 MiB,400.00 kiB
Shape,"(732, 64, 32)","(50, 64, 32)"
Dask graph,15 chunks in 4 graph layers,15 chunks in 4 graph layers
Data type,float32 numpy.ndarray,float32 numpy.ndarray
