In [1]:
!hostname

v320-002.ls6.tacc.utexas.edu


In [2]:
import os
import xarray as xr
import pandas as pd
pd.options.plotting.backend ='plotly'

import os

WORK = os.environ.get('WORK')
SCRATCH = os.environ.get('SCRATCH')

In [3]:
ROOT_DIR = '/scratch/09295/naveens/hindcast'

In [4]:
## Permission error for graphcast stuff
# graphcast_files = [filename for filename in os.listdir(ROOT_DIR) if ('graphcast' in filename) and ('.nc' in filename)]
# xr.open_dataset(f'{ROOT_DIR}/{graphcast_files[0]}')

```
unique_filenames = set([fn.rsplit('_',maxsplit=1)[0] for fn in os.listdir(f'{ROOT_DIR}/1979')])
unique_filenames
```
The available file types for 1979 appear to be:
'q_pressure_levels',
'surface_variables',
't_pressure_levels',
'u_pressure_levels',
'v_pressure_levels',
'w_pressure_levels',
'z_pressure_levels'

For precipation, we care about the surface variables file.

In [5]:
class nCDF_Reader:
    FILE_PREFIXES = [
        'surface_variables',
    ]
    def __init__(self,root_dir='/scratch/09295/naveens/hindcast/') -> None:
        self.root_dir = root_dir
        self.dset = None

    def set_file(self,date):
        dsets = {
            prefix:xr.open_dataset(f"{self.root_dir}/{date.split('-',maxsplit=1)[0]}/{prefix}_{date}.nc",engine='h5netcdf')
            for prefix in self.FILE_PREFIXES
        }
        self.dsets = dsets
    
    def select(self,*,lat,lon,prefix):
        return self.dsets[prefix].sel(lat=lat,lon=lon)

In [6]:
reader = nCDF_Reader()
reader.set_file('1979-01-03')

In [7]:
lat = 30.25
lon = 97.75
subset = reader.select(lat=lat,lon = lon,prefix='surface_variables')
subset

In [8]:
attributes = {short_name:subset.data_vars[short_name].attrs for short_name in subset.data_vars}
name_map = {short_name:f'{attributes['long_name']} [{attributes['units']}]' for short_name,attributes in attributes.items()}

In [9]:
fig = subset.to_pandas().drop(columns=['lat','lon']).rename(columns=name_map).plot(template='simple_white')
fig.update_traces(
    line_width=3
)
fig.update_layout(
    xaxis_linewidth = 2,
    yaxis_linewidth = 2,
    font_size = 18,
    # autosize=False,
    legend_orientation='v',
    legend_y=1.10,
    title=f'Surface variables for lat={lat},lon={lon}'
)

In [10]:
os.path.getsize('/scratch/09295/naveens/hindcast/1979/surface_variables_1979-01-01.nc')

1266690300

In [11]:
reader.dsets['surface_variables'].to_netcdf(f'{SCRATCH}/hindcast/test.nc',engine='h5netcdf')

In [12]:
os.path.getsize(f'{SCRATCH}/hindcast/test.nc')

1266692616

In [13]:
compression_params = {var:dict(compression='zlib',shuffle=True,complevel=5) for var in reader.dsets['surface_variables'].data_vars}
compression_params

{'u10m': {'compression': 'zlib', 'shuffle': True, 'complevel': 5},
 'v10m': {'compression': 'zlib', 'shuffle': True, 'complevel': 5},
 't2m': {'compression': 'zlib', 'shuffle': True, 'complevel': 5},
 'msl': {'compression': 'zlib', 'shuffle': True, 'complevel': 5},
 'tp06': {'compression': 'zlib', 'shuffle': True, 'complevel': 5}}

In [14]:
reader.dsets['surface_variables'].u10m.encoding

{'chunksizes': None,
 'fletcher32': False,
 'shuffle': False,
 'source': '/scratch/09295/naveens/hindcast/1979/surface_variables_1979-01-03.nc',
 'original_shape': (61, 721, 1440),
 'dtype': dtype('<f4'),
 '_FillValue': np.float32(nan)}

In [15]:
for algo in ['zlib','szip','zstd','bzip2','blosc_lz','blosc_lz4','blosc_lz4hc', 'blosc_zlib' ,'blosc_zstd']:
    try:
        compression_params = {var:dict(compression=algo,shuffle=True,complevel=5) for var in reader.dsets['surface_variables'].data_vars}
        reader.dsets['surface_variables'].to_netcdf(f'{SCRATCH}/hindcast/test4.nc',format='NETCDF4',engine='netcdf4',encoding=compression_params)
        print(f'{algo}:{os.path.getsize(f'{SCRATCH}/hindcast/test4.nc')}')
    except:
        pass

zlib:777185556
szip:800693725
zstd:895766455
bzip2:838796927
blosc_lz:882472490
blosc_lz4:870625281
blosc_lz4hc:804130980
blosc_zlib:777650881
blosc_zstd:790535009


In [16]:
compression_params2 = {var:dict(compression='zstd',shuffle=True,complevel=5) for var in reader.dsets['surface_variables'].data_vars}
compression_params2
reader.dsets['surface_variables'].to_netcdf(f'{SCRATCH}/hindcast/test5.nc',format='NETCDF4',engine='h5netcdf',encoding=compression_params2)
os.path.getsize(f'{SCRATCH}/hindcast/test5.nc')

In [None]:
(1266690300-776420864)/1266690300


0.38704759640142505

In [None]:
os.path.getsize(f'{SCRATCH}/hindcast/test3.zarr')

9

In [None]:
xr.open_dataset(f"{SCRATCH}/hindcast/test3.nc").close()