Look here: /glade/campaign/collections/rda/data/ds628.1/anl_surf125/1958/anl_surf125.001_pres.195801_195812

## Do this with bash because grib to netcdf

In [217]:
import xarray as xr
import glob
import os
import numpy as np
import pandas as pd
from dask.diagnostics import ProgressBar
import time
from dask import delayed
from dask import delayed, persist
import dask
import matplotlib.pyplot as plt
import xesmf as xe


## Functions

In [138]:
def calculate_nansum(dataset,var_bad,variable_name='t2m'):
    
    bv1 = var_bad[0]
    bv2 = var_bad[1]
    
    # Check if A002 and A003 variables exist in the dataset
    if bv1 not in dataset.data_vars or bv2 not in dataset.data_vars:
        raise ValueError("The dataset must contain 'A002' and 'A003' variables.")

    # Extract the 'A002' and 'A003' variables
    A002 = dataset[bv1]
    A003 = dataset[bv2]

    # Sum the variables along the time dimension
    # t2m = A002 + A003    
    t2m = xr.zeros_like(A002)
    
    for ee,ii in enumerate(A002['time']):
        
        #ensure that either A002 or A003 is nan's
        #Check if there are any non-NaN values in the result
        
        if (not np.isnan(A002.sel(time=ii)).all()) and (not np.isnan(A003.sel(time=ii)).all()):
            raise ValueError("Both bad vars contain real values ... ")
        if (np.isnan(A002.sel(time=ii)).all()) and (np.isnan(A003.sel(time=ii)).all()):
            raise ValueError("Both bad vars contain nan values ... ")
        
        t2m[ee, :, :] = np.nansum([A002.sel(time=ii), A003.sel(time=ii)], axis=0)
    dataset = dataset.assign({variable_name: t2m})

    return dataset

## Replace any NAN values

In [194]:
dir_JRA55_obs = '/glade/scratch/wchapman/JRA55_regrid_out/'
vary = 'anl_p125.011_tmp.'
vary_name = 't'
FNS = sorted(glob.glob(dir_JRA55_obs+vary+'*.nc'))
bad_fils =[]
bad_vary_names = []
for fn in FNS: 
    DScheck = xr.open_dataset(fn)
    if vary_name not in DScheck.data_vars.keys():
        print('inspect: ',fn)
        bad_fils.append(fn)
        bad_vary_names.append(DScheck.data_vars.keys())
keep_names = []

for ee,fils in enumerate(bad_fils): 
    keep_names = []
    for vv in bad_vary_names[ee]:
        keep_names.append(vv)
    
    DSchange = xr.open_dataset(fils)
    t2m =calculate_nansum(DSchange,keep_names,variable_name=vary_name)
    t2m.to_netcdf(dir_JRA55_obs + 'V2_'+fils.split('/')[-1])
    
#rename the v2 files and over write
files_to_rename = sorted(glob.glob('/glade/scratch/wchapman/JRA55_regrid_out/V2_*'))
for fl in files_to_rename: 
    print(fl)
    os.rename(fl,fl.split('V2_')[0]+fl.split('V2_')[1])

## Regrid and Append everything:

In [245]:
dir_JRA55_obs = '/glade/scratch/wchapman/JRA55_regrid_out/'
vary = 'anl_surf125.002_prmsl.'
vary_name = 'msl'
svout = '/glade/work/wchapman/JRA55_Obs/'+'JRA55.'+vary+'1979-2010.nc'
svout2 = '/glade/work/wchapman/JRA55_Obs/'+'JRA55.'+vary+'camgrid.1979-2010.nc'

FNS = sorted(glob.glob(dir_JRA55_obs+vary+'*.nc'))
for ee,fn in enumerate(FNS): 
    print(fn)
    DScheck = xr.open_dataset(fn)
    if ee ==0:
        DSfin = DScheck[vary_name].to_dataset()
    else:
        DSfin = xr.concat([DSfin, DScheck[vary_name].to_dataset()],dim='time')
DSfin.sel(time=slice('1979','2010')).to_netcdf(svout)


#regrid to camgrid ... 
#grab an example file to get the grid: 
fns = sorted(glob.glob('/glade/campaign/cisl/aiml/wchapman/CAM_runs/Nudge_DA/f.e21.DAcompset.f09_d025_Seasonal_DA_stochai_UV_03_1982_r1/atm/hist/*h0*.nc'))
DScamgrid = xr.open_dataset(fns[0])
DScamgrid

#set lat/lon to regrid to: 
latcam = DScamgrid.lat
loncam = DScamgrid.lon

#make regridder structure:
ds_out = xr.Dataset(
    {
        "lat": (["lat"], np.array(DScamgrid.lat)),
        "lon": (["lon"], np.array(DScamgrid.lon)),
    }
)
#open the GPCP file: 
FNS = sorted(glob.glob('/glade/scratch/wchapman/TEMPERAI/ERA_tp_daily_*.nc'))

DSall = xr.open_dataset(svout)
# ##build regridder method: 
regridder = xe.Regridder(DSall, ds_out, "bilinear")
# #regrid. 
ds_out = regridder(DSall) #this is an xarray instance now. 
    #compute from xarray in dask 
    #save:
print('saving... ',svout2)
ds_out.to_netcdf(svout2)

/glade/scratch/wchapman/JRA55_regrid_out/anl_surf125.002_prmsl.195801_195812.nc
/glade/scratch/wchapman/JRA55_regrid_out/anl_surf125.002_prmsl.195901_195912.nc
/glade/scratch/wchapman/JRA55_regrid_out/anl_surf125.002_prmsl.196001_196012.nc
/glade/scratch/wchapman/JRA55_regrid_out/anl_surf125.002_prmsl.196101_196112.nc
/glade/scratch/wchapman/JRA55_regrid_out/anl_surf125.002_prmsl.196201_196212.nc
/glade/scratch/wchapman/JRA55_regrid_out/anl_surf125.002_prmsl.196301_196312.nc
/glade/scratch/wchapman/JRA55_regrid_out/anl_surf125.002_prmsl.196401_196412.nc
/glade/scratch/wchapman/JRA55_regrid_out/anl_surf125.002_prmsl.196501_196512.nc
/glade/scratch/wchapman/JRA55_regrid_out/anl_surf125.002_prmsl.196601_196612.nc
/glade/scratch/wchapman/JRA55_regrid_out/anl_surf125.002_prmsl.196701_196712.nc
/glade/scratch/wchapman/JRA55_regrid_out/anl_surf125.002_prmsl.196801_196812.nc
/glade/scratch/wchapman/JRA55_regrid_out/anl_surf125.002_prmsl.196901_196912.nc
/glade/scratch/wchapman/JRA55_regrid_out