# CAST
Collocated
Allong
Satellite
Track

### ToDo
- [x] convert ADM dictionary into a xarray dataset
- [x] read model data into a xarray dataset
- [x] collocate model to ADM grid

# Conda environment at PPI/lustre

```bash
# load right (ana)conda module
module load aerocom-pyaerocom-griesie_work

# activate coda environment
source `which conda | sed 's:bin/conda:etc/profile.d/conda.sh:'`
conda activate coda.2.19

# start notebook
jupyter notebook --no-browser --ip=$HOSTNAME.met.no

```

In [1]:
import numpy as np
import pandas as pd
import xarray as xr

for m in [np, pd, xr]:
    print(m.__name__, m.__version__)

numpy 1.14.3
pandas 0.23.4
xarray 0.10.8


## ADM pyaerocom tools

In [2]:
from pyaerocom.io.read_aeolus_l2b_data import ReadAeolusL2bData
ADM = ReadAeolusL2bData(verbose=True)

### Read dataset
All datasets on `$CODA_DEFINITION/download/`

In [3]:
ADM.read(vars_to_read=['ec550aer'])

2018-08-17 14:22:53,402:INFO:searching for data files. This might take a while...
2018-08-17 14:22:54,354:INFO:time for file find: 0.952
2018-08-17 14:23:00,069:INFO:reading file /lustre/storeA/project/aerocom/aerocom1/ADM_CALIPSO_TEST/download/AE_OPER_ALD_U_N_2A_20070101T002249149_002772000_003606_0001.DBL
2018-08-17 14:23:04,959:INFO:time for single file read [s]: 4.890
2018-08-17 14:23:04,960:INFO:reading file /lustre/storeA/project/aerocom/aerocom1/ADM_CALIPSO_TEST/download/AE_OPER_ALD_U_N_2A_20070101T020142709_002772000_003607_0001.DBL
2018-08-17 14:23:06,813:INFO:time for single file read [s]: 1.853
2018-08-17 14:23:06,816:INFO:reading file /lustre/storeA/project/aerocom/aerocom1/ADM_CALIPSO_TEST/download/AE_OPER_ALD_U_N_2A_20070101T034035509_002772000_003608_0001.DBL
2018-08-17 14:23:16,368:INFO:time for single file read [s]: 9.552
2018-08-17 14:23:16,371:INFO:reading file /lustre/storeA/project/aerocom/aerocom1/ADM_CALIPSO_TEST/download/AE_OPER_ALD_U_N_2A_20070101T051928319_002

## ADM to XArray

In [4]:
adm = xr.Dataset.from_dict(dict(
    coords = dict(
        time =  dict(
            dims = 'time', 
            data = pd.to_datetime(ADM.data[:,ReadAeolusL2bData._TIMEINDEX],unit='s'),
            attrs = {'long_name':'time'},
        ),
        lat =  dict(
            dims = 'time', 
            data = ADM.data[:,ReadAeolusL2bData._LONINDEX],
            attrs = {'long_name':'latitude', 'units':'degrees_north'},
        ),
        lon =  dict(
            dims = 'time', 
            data = ADM.data[:,ReadAeolusL2bData._LATINDEX],
            attrs = {'long_name':'longitude', 'units':'degrees_east'},
        ),
        alt =  dict(
            dims = 'time', 
            data = ADM.data[:,ReadAeolusL2bData._ALTITUDEINDEX],
            attrs = {'long_name':'altitude', 'units':'m'},
        ),
    ),
    dims = 'time', 
    data_vars = dict(
        ec550 =  dict(
            dims = 'time', 
            data = ADM.data[:,ReadAeolusL2bData._EC550INDEX],
            attrs = {'long_name':'ec550', 'units':'1'},
        ),
    ),
))

adm

<xarray.Dataset>
Dimensions:  (time: 79512)
Coordinates:
  * time     (time) datetime64[ns] 2007-01-01T00:22:49.346999884 ...
    lat      (time) float64 nan 173.4 173.4 173.4 173.4 173.4 173.4 173.4 ...
    lon      (time) float64 nan 72.33 72.33 72.33 72.33 72.33 72.33 72.33 ...
    alt      (time) float64 nan 3.047e+04 2.669e+04 2.417e+04 2.164e+04 ...
Data variables:
    ec550    (time) float64 nan 0.0 0.0 55.44 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...

## CAMS50 forecat to XArray

In [5]:
metproduction = '/lustre/storeB/project/metproduction/products/%s'
cams50 = dict(
    forecast = metproduction%'cwf_ctm/CWF_12FC-%s_hourInst.nc',
    analysis = metproduction%'cwf_ctm/CWF_00AN-%s_hourInst.nc',
    reanalysis = metproduction%'cwf_ctm/CWF_00RE-%s_hourInst.nc',
)

ncfile = cams50['forecast']%'20180606'
emep = xr.open_dataset(ncfile)
emep

<xarray.Dataset>
Dimensions:                (ilev: 9, lat: 369, lev: 8, lon: 301, time: 121)
Coordinates:
  * lon                    (lon) float64 -30.0 -29.75 -29.5 -29.25 -29.0 ...
  * lat                    (lat) float64 30.0 30.12 30.25 30.38 30.5 30.62 ...
  * lev                    (lev) float64 0.9946 0.9838 0.9703 0.9509 0.8932 ...
  * ilev                   (ilev) float64 0.9892 0.9784 0.9621 0.9396 0.8756 ...
  * time                   (time) datetime64[ns] 2018-06-05 ...
Data variables:
    P0                     float64 ...
    hyam                   (lev) float64 ...
    hybm                   (lev) float64 ...
    hyai                   (ilev) float64 ...
    hybi                   (ilev) float64 ...
    SURF_ug_O3             (time, lat, lon) float32 ...
    SURF_ug_NO2            (time, lat, lon) float32 ...
    SURF_ug_PM25_rh50      (time, lat, lon) float32 ...
    SURF_ug_PM10_rh50      (time, lat, lon) float32 ...
    SURF_ug_NO             (time, lat, lon) float32 

## CAST

### Only ADM inside the forecast domain

In [6]:
domain = dict(
    lat = emep.lat[[0, -1]].values,
    lon = emep.lon[[0, -1]].values,
)
domain

{'lat': array([30., 76.]), 'lon': array([-30.,  45.])}

In [7]:
in_range = lambda x, k: np.logical_and(x > domain[k][0], x < domain[k][1])
in_latlon = lambda x: np.logical_and(in_range(x.lat, 'lat'), in_range(x.lon, 'lon'))
in_latlon(adm)

<xarray.DataArray (time: 79512)>
array([False, False, False, ..., False, False, False])
Coordinates:
  * time     (time) datetime64[ns] 2007-01-01T00:22:49.346999884 ...
    lat      (time) float64 nan 173.4 173.4 173.4 173.4 173.4 173.4 173.4 ...
    lon      (time) float64 nan 72.33 72.33 72.33 72.33 72.33 72.33 72.33 ...
    alt      (time) float64 nan 3.047e+04 2.669e+04 2.417e+04 2.164e+04 ...

In [8]:
ec550 = adm.where(in_latlon(adm), drop=True).ec550
ec550

<xarray.DataArray 'ec550' (time: 2239)>
array([0., 0., 0., ..., 0., 0., 0.])
Coordinates:
  * time     (time) datetime64[ns] 2007-01-01T22:04:55.558000088 ...
    lat      (time) float64 63.0 63.0 63.0 63.0 63.0 63.0 63.0 63.0 63.0 ...
    lon      (time) float64 44.8 44.8 44.8 44.8 44.8 44.8 44.8 44.8 44.8 ...
    alt      (time) float64 3.051e+04 2.672e+04 2.42e+04 2.168e+04 1.978e+04 ...
Attributes:
    long_name:  ec550
    units:      1

In [9]:
print('ADM total: %6d'%adm.ec550.count().values)
print('In domain: %6d'%ec550.count())

ADM total:  71397
In domain:   2239


### Collocate

In [10]:
lon = ec550.lon.rename({'time':'adm'})
lat = ec550.lat.rename({'time':'adm'})
%time aod550 = emep.load().sel(lon=lon, lat=lat, method='nearest')
aod550

CPU times: user 1min 46s, sys: 15.4 s, total: 2min 2s
Wall time: 2min 23s


<xarray.Dataset>
Dimensions:                (adm: 2239, ilev: 9, lev: 8, time: 121)
Coordinates:
    lon                    (adm) float64 44.75 44.75 44.75 44.75 44.75 44.75 ...
    lat                    (adm) float64 63.0 63.0 63.0 63.0 63.0 63.0 63.0 ...
  * lev                    (lev) float64 0.9946 0.9838 0.9703 0.9509 0.8932 ...
  * ilev                   (ilev) float64 0.9892 0.9784 0.9621 0.9396 0.8756 ...
  * time                   (time) datetime64[ns] 2018-06-05 ...
  * adm                    (adm) datetime64[ns] 2007-01-01T22:04:55.558000088 ...
    alt                    (adm) float64 3.051e+04 2.672e+04 2.42e+04 ...
Data variables:
    P0                     float64 1.013e+03
    hyam                   (lev) float64 0.6 1.8 3.3 5.45 11.85 26.7 33.05 47.85
    hybm                   (lev) float64 0.994 0.982 0.967 0.9455 0.8815 ...
    hyai                   (ilev) float64 1.2 2.4 4.2 6.7 13.8 29.7 36.4 ...
    hybi                   (ilev) float64 0.988 0.976 0.958 0.933