In [1]:
# Read in the CloudSat R05 data and create the monthly mean of 
# information of variables: http://www.cloudsat.cira.colostate.edu/data-products/level-2c/2c-snow-profile?term=90
# Documentation: http://www.cloudsat.cira.colostate.edu/sites/default/files/products/files/2C-SNOW-PROFILE_PDICD.P1_R05.rev0_.pdf


# 1D variables
# 'snowfall_rate_sfc'

# 2D variables
# 'Height'
# 'snowfall_rate'
# 'snow_water_content'


# necessary variables
        # 'Latitude'
        # 'Longitude'
        # 'Vertical_binsize'
        # profile times as YYYYMMDD-HH-MM-SS

In [2]:
# supress warnings
import warnings
warnings.filterwarnings('ignore') # don't output warnings

# import packages
from imports import(glob, pySD, pyHDF, read_var_eos, xr, np, datetime, timedelta, fct)

# reload imports
%load_ext autoreload
%autoreload 2

In [3]:
year = 2008


one_D = False
two_D = True



available_month = {
                   '1':'01',
                   '2':'02',
                   '3':'03',
                   '4':'04', 
                   '5':'05', 
                   '6':'06', 
                   '7':'07',
                   '8':'08', 
                   '9':'09', 
                   '10':'10', 
                   '11':'11', 
                   '12':'12'
                  }



In [4]:
# datapath = '/tos-project2/NS9600K/data'
path = '/scratch/franzihe'
datapath = '{:}/input/cloudsat/2C-SNOW-PROFILE.P1_R05'.format(path)
ff_cs = sorted(glob('{}/{}/*/*.hdf'.format(datapath, year, )))


In [5]:
filepath = '{:}/input/cloudsat/ECMWF-AUX.P_R05'.format(path)
ff_ec = sorted(glob('{}/{}/*/*.hdf'.format(filepath, year, )))


In [6]:
if one_D == True:
    # 1D variables
    variables = {
                # 'DEM_elevation'               : 'm',        #Elevation in meters above Mean Sea Level. A value of -9999 indicates ocean. A value of 9999 indicates an error in calculation of the elevation.
                # 'Vertical_binsize'            : '',         #effective vertical height of the radar range bin.
                'snowfall_rate_sfc'           : 'mm h-1',   #Surface snowfall rate in mm of liquid water per hour. The specified range is typical. 
                # 'snowfall_rate_sfc_uncert'    : '',         #The estimated 1-sigma uncertainty of the surface snowfall rate in mm of liquid water per hour. The specified range is typical.
                # 'snowfall_rate_sfc_confidence': '',         #Flag indicating the relative quality of the surface snowfall rate estimate. 4: High confidence
    }

if two_D == True:
    # 2D variables
    variables = {
                # 'Height'                    : '',               #Height of the radar range bins in meters above mean sea level.
                'snowfall_rate'             : 'mm h-1',         #Profile of snowfall rates in the precipitating column in mm of liquid water per hour. The specified range is typical.
                # 'snowfall_rate_uncert'      : '',               #The estimated 1-sigma uncertainties of the snowfall rates in the precipitating column. The specified range is typical.
                # 'snow_water_content'        : 'g kg-1',          #Profile of snow water content in the precipitating column in grams per m^3. The specified range is typical.
                # 'snow_water_content_uncert' : '',                #The estimated 1-sigma uncertainties of the snow water contents in the precipitating column in grams per m^3. The specified range is typical.
    }

In [None]:
pressure_grid = np.array([24., 25., 26., 27., 28., 29., 30., 32., 33., 
                            34., 35., 37., 38., 40., 41., 43., 44., 45.,
                            48., 50., 52., 54., 55., 58., 60., 63., 65., 
                            68., 70., 73., 75., 80., 83., 85., 90., 93., 
                            98.,100., 105., 110., 113., 115., 120., 125., 130., 
                            135., 140., 145., 155., 160., 165., 170., 180., 185., 
                            190., 200., 210., 215., 225., 230., 240., 250., 260., 
                            270., 280., 290., 300., 310., 320., 330., 345., 360., 
                            370., 380., 395., 400., 425., 440., 450., 470., 480., 
                            500., 515., 530., 550., 570., 585., 600., 625., 645., 
                            665., 685., 700., 725., 750., 770., 800., 825., 850.,
                            870., 900., 925., 950., 988., 1000., 1010,  1015,  1020,
                            np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan,
                            np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan])

In [7]:
counter = 0
for month, mm in available_month.items():
    if one_D == True:
        ds = xr.Dataset(
                data_vars=dict(
                    Profile_time=(['nray'], np.empty(shape = (0,), dtype='datetime64[s]')),
                    Latitude    =(['nray'], np.empty(shape = (0,), )),
                    Longitude   =(['nray'], np.empty(shape = (0,), )),
                    Data_quality=(['nray'], np.empty(shape = (0,), )),
                                ),
                                coords=dict(nray=([]), nbin=([])), 
                                attrs=None)
    if two_D == True:
        ds = xr.Dataset(
                data_vars=dict(
                    Profile_time=(['nray'], np.empty(shape = (0,), dtype='datetime64[s]')),
                    Latitude    =(['nray'], np.empty(shape = (0,), )),
                    Longitude   =(['nray'], np.empty(shape = (0,), )),
                    Data_quality=(['nray'], np.empty(shape = (0,), )),
                    pressure    =(['nray', 'nbin'], np.empty(shape = (0, 0), )),
                    temperature =(['nray', 'nbin'], np.empty(shape = (0, 0), ))
                                ),
                                coords=dict(nray=([]), nbin=([])), 
                                attrs=None)
    for var, unit in variables.items():
        # create new variable 
        if one_D == True:
            ds[var] = xr.DataArray(
                    data = np.full(shape = (0,), fill_value=np.nan),
                    dims = ['nray'],
                    attrs= {'units': unit}
                )
        if two_D == True:
            ds[var] = xr.DataArray(
                    data = np.full(shape = (0, 0), fill_value=np.nan ),
                    dims = ['nray', 'nbin'],
                    attrs= {'units': unit}
                )
        filename = '{var}_{year}{month}.nc'.format(var = var, year = year, month = mm)
        savepath = '{path}/output/cloudsat/2C-SNOW_onemonth_onevariable/{year}/'.format(path = path, year = year)
        files = glob(savepath + filename)
            
        if savepath + filename in files:
            print('{savepath}{filename} is downloaded'.format(savepath = savepath, filename = filename))
            counter += 1
            print('Have downloaded in total: {:} files'.format(str(counter)))
        else:
            for i in range(2):#len(ff_cs)):
                # for i in range(6): # read in one file and bring 2D Variables on a common pressure grid 
                year = int(ff_cs[i].split('/')[-3])
                doy  = int(ff_cs[i].split('/')[-2])   # day of the year
                _t = datetime(year, 1, 1) + timedelta(doy -1)    # create date

                if _t.month != int(month):
                    continue
                elif _t.month == int(month):
                    
                    # Read in CloudSat
                    f_SD_ptr = pySD.SD(ff_cs[i], pySD.SDC.READ)
                    f_VD_ptr = pyHDF.HDF(ff_cs[i], pyHDF.HC.READ)

                    # get profile times from file
                    Profile_time = read_var_eos.get_profile_times(f_VD_ptr)

                    # get geolocation
                    _lat = read_var_eos.get_1D_var(f_VD_ptr, 'Latitude') #Spacecraft Geodetic Latitude.
                    _lon = read_var_eos.get_1D_var(f_VD_ptr, 'Longitude') #Spacecraft geodetic longitude

                    # get data quality
                    _Data_quality = read_var_eos.get_1D_var(f_VD_ptr, 'Data_quality') #Flags indicating data quality. If 0, then data is of good quality.

                    # get variable
                    if one_D == True:
                        _var = read_var_eos.get_1D_var(f_VD_ptr, var) 
                    if two_D == True:
                        _var = read_var_eos.get_2D_var(f_SD_ptr, f_VD_ptr, var)
                        
                    # assign np.nan where missing vallues
                    _var[np.where(_var == -999.)] = np.nan
                                
                    f_VD_ptr.close()
                    f_SD_ptr.end()

                    # create dataset
                    if one_D == True:
                        _ds = fct.create_xr_1D_ds(Profile_time, _lat, _lon, _Data_quality, var, unit, _var)


                        
                    if two_D == True:
                        # Read in ECMWF-Aux files for pressure averaging for 2D files
                        f_SD_ptr = pySD.SD(ff_ec[i], pySD.SDC.READ)
                        f_VD_ptr = pyHDF.HDF(ff_ec[i], pyHDF.HC.READ)

                        # # Sometimes different data products don’t have the same dimensions, e.g. 2007 granule 3853
                        # if lwc.shape != iwc.shape:
                        #     dimension_failure += 1
                        #     print(‘Skipping granule (dimension failure)...’)
                        #     continue
                                    
                        # get 2D variable
                        pressure = read_var_eos.get_2D_var(f_SD_ptr, f_VD_ptr, 'Pressure')
                        temperature = read_var_eos.get_2D_var(f_SD_ptr, f_VD_ptr, 'Temperature')
                                    
                        # convert pressure into hPa
                        pressure[np.where(pressure == -999.)] = np.nan
                        pressure = pressure /100.
                                    
                        # assign np.nan where missing vallues
                        temperature[np.where(temperature == -999.)] = np.nan
                                            
                        f_VD_ptr.close()
                        f_SD_ptr.end()
                        # create dataset
                        _ds = fct.create_xr_2D_ds(Profile_time, _lat, _lon, _Data_quality, pressure, temperature, var, unit, _var)

                        # assign pressure grid coordinate
                        _ds = _ds.assign_coords(pressure_grid=pressure_grid)

                        # define new variable to be on the pressure grid
                        _ds[var+'_regrid'] = xr.DataArray(data=np.full(shape = (len(_ds.nray), len(_ds.pressure_grid)), fill_value = np.nan), dims=dict(nray=([]), pressure_grid=([])),)

                        # put the 2D variable on equal pressure grid
                        for t in range(len(_ds.nray)):
                            for k in range(len(_ds.pressure_grid)):
                                # First, find the index of the grid point nearest a specific pressure level
                                abs_pressure = np.abs(_ds.pressure.isel(nray = t) - _ds.pressure_grid.isel(pressure_grid = k))
                                c = abs_pressure

                                try:
                                    ([xloc, ]) = np.where(c == np.nanmin(c))
                                    # Now I can use that index location to get the values at the x/y diminsion
                                    _ds[var+'_regrid'][t, xloc] = _ds.snowfall_rate.isel(nray = t).sel(nbin = xloc)
                                
                                except:
                                    print('c values', np.nanmin(c))
                                    _ds[var+'_regrid'][t, xloc] = np.nan
                                    
                        _ds = _ds.drop_vars(var)                        

                    ds = xr.concat([ds, _ds], dim = 'nray')

            ds.to_netcdf(path = '{savepath}{filename}'.format(savepath = savepath, filename = filename))
            print('file saved: {savepath}{filename}'.format(savepath = savepath, filename = filename))



In [None]:
_ds