# CAST
Collocated
Allong
Satellite
Track

### ToDo
- [x] convert ADM dictionary into a xarray dataset
- [x] read model data into a xarray dataset
- [x] collocate model to ADM grid

# Conda environment at PPI/lustre

```bash
# load right (ana)conda module
module load aerocom-pyaerocom-griesie_work

# activate coda environment
source `which conda | sed 's:bin/conda:etc/profile.d/conda.sh:'`
conda activate coda.2.19

# start notebook
jupyter notebook --no-browser --ip=$HOSTNAME.met.no

```

In [1]:
import numpy as np
import pandas as pd
import xarray as xr

for m in [np, pd, xr]:
    print(m.__name__, m.__version__)

numpy 1.14.3
pandas 0.23.4
xarray 0.10.8


## ADM pyaerocom tools

In [2]:
from pyaerocom.io.read_aeolus_l2b_data import ReadAeolusL2bData
ADM = ReadAeolusL2bData(verbose=True)

### Read dataset
All datasets on `$CODA_DEFINITION/download/`

In [3]:
%time ADM.read(vars_to_read=['ec550aer'])

2018-08-20 14:08:00,952:INFO:searching for data files. This might take a while...
2018-08-20 14:08:11,302:INFO:time for file find: 10.350
2018-08-20 14:08:11,595:INFO:reading file /lustre/storeA/project/aerocom/aerocom1/ADM_CALIPSO_TEST/download/AE_OPER_ALD_U_N_2A_20070101T002249149_002772000_003606_0001.DBL
2018-08-20 14:08:15,933:INFO:time for single file read [s]: 4.337
2018-08-20 14:08:15,934:INFO:reading file /lustre/storeA/project/aerocom/aerocom1/ADM_CALIPSO_TEST/download/AE_OPER_ALD_U_N_2A_20070101T020142709_002772000_003607_0001.DBL
2018-08-20 14:08:22,448:INFO:time for single file read [s]: 6.514
2018-08-20 14:08:22,450:INFO:reading file /lustre/storeA/project/aerocom/aerocom1/ADM_CALIPSO_TEST/download/AE_OPER_ALD_U_N_2A_20070101T034035509_002772000_003608_0001.DBL
2018-08-20 14:08:30,547:INFO:time for single file read [s]: 8.097
2018-08-20 14:08:30,553:INFO:reading file /lustre/storeA/project/aerocom/aerocom1/ADM_CALIPSO_TEST/download/AE_OPER_ALD_U_N_2A_20070101T051928319_00

CPU times: user 23 s, sys: 312 ms, total: 23.4 s
Wall time: 1min 30s


## ADM to XArray

In [4]:
adm = xr.Dataset.from_dict(dict(
    coords = dict(
        time =  dict(
            dims = 'time', 
            data = pd.to_datetime(ADM.data[:,ReadAeolusL2bData._TIMEINDEX],unit='s'),
            attrs = {'long_name':'time'},
        ),
        lat =  dict(
            dims = 'time', 
            data = ADM.data[:,ReadAeolusL2bData._LONINDEX],
            attrs = {'long_name':'latitude', 'units':'degrees_north'},
        ),
        lon =  dict(
            dims = 'time', 
            data = ADM.data[:,ReadAeolusL2bData._LATINDEX],
            attrs = {'long_name':'longitude', 'units':'degrees_east'},
        ),
        alt =  dict(
            dims = 'time', 
            data = ADM.data[:,ReadAeolusL2bData._ALTITUDEINDEX],
            attrs = {'long_name':'altitude', 'units':'m'},
        ),
    ),
    dims = 'time', 
    data_vars = dict(
        ec550 =  dict(
            dims = 'time', 
            data = ADM.data[:,ReadAeolusL2bData._EC550INDEX],
            attrs = {'long_name':'ec550', 'units':'1'},
        ),
    ),
))

adm

<xarray.Dataset>
Dimensions:  (time: 79512)
Coordinates:
  * time     (time) datetime64[ns] 2007-01-01T00:22:49.346999884 ...
    lat      (time) float64 nan 173.4 173.4 173.4 173.4 173.4 173.4 173.4 ...
    lon      (time) float64 nan 72.33 72.33 72.33 72.33 72.33 72.33 72.33 ...
    alt      (time) float64 nan 3.047e+04 2.669e+04 2.417e+04 2.164e+04 ...
Data variables:
    ec550    (time) float64 nan 0.0 0.0 55.44 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ...

## CAMS50 forecat to XArray

In [5]:
metproduction = '/lustre/storeB/project/metproduction/products/%s'
cams50 = dict(
    forecast = metproduction%'cwf_ctm/CWF_12FC-%s_hourInst.nc',
    analysis = metproduction%'cwf_ctm/CWF_00AN-%s_hourInst.nc',
    reanalysis = metproduction%'cwf_ctm/CWF_00RE-%s_hourInst.nc',
)

ncfile = cams50['forecast']%'20180606'
emep = xr.open_dataset(ncfile)
emep

<xarray.Dataset>
Dimensions:                (ilev: 9, lat: 369, lev: 8, lon: 301, time: 121)
Coordinates:
  * lon                    (lon) float64 -30.0 -29.75 -29.5 -29.25 -29.0 ...
  * lat                    (lat) float64 30.0 30.12 30.25 30.38 30.5 30.62 ...
  * lev                    (lev) float64 0.9946 0.9838 0.9703 0.9509 0.8932 ...
  * ilev                   (ilev) float64 0.9892 0.9784 0.9621 0.9396 0.8756 ...
  * time                   (time) datetime64[ns] 2018-06-05 ...
Data variables:
    P0                     float64 ...
    hyam                   (lev) float64 ...
    hybm                   (lev) float64 ...
    hyai                   (ilev) float64 ...
    hybi                   (ilev) float64 ...
    SURF_ug_O3             (time, lat, lon) float32 ...
    SURF_ug_NO2            (time, lat, lon) float32 ...
    SURF_ug_PM25_rh50      (time, lat, lon) float32 ...
    SURF_ug_PM10_rh50      (time, lat, lon) float32 ...
    SURF_ug_NO             (time, lat, lon) float32 

## CAST

### Discard ADM outside the forecast domain

In [6]:
domain = dict(
    time = emep.time[[0, -1]].values,
    lat = emep.lat[[0, -1]].values,
    lon = emep.lon[[0, -1]].values,
)
in_range = lambda x, k: np.logical_and(x >= domain[k][0], x <= domain[k][1])

domain

{'time': array(['2018-06-05T00:00:00.000000000', '2018-06-10T00:00:00.000000000'],
       dtype='datetime64[ns]'),
 'lat': array([30., 76.]),
 'lon': array([-30.,  45.])}

In [7]:
in_latlon = lambda x: np.logical_and(in_range(x.lat, 'lat'), in_range(x.lon, 'lon'))

'in_latlon: %6d'%in_latlon(adm).sum()

'in_latlon:   3979'

### Forecast time window
Skip time filtering for now. The example dataset is old and will not match any current forecast.

In [8]:
in_domain = lambda x: np.logical_and(in_latlon(x), in_range(x.time, 'time'))

'in_domain: %6d'%in_domain(adm).sum()

'in_domain:      0'

### Filtered ADM

In [9]:
ec550 = adm.where(in_latlon(adm), drop=True).ec550
ec550

<xarray.DataArray 'ec550' (time: 3979)>
array([0., 0., 0., ..., 0., 0., 0.])
Coordinates:
  * time     (time) datetime64[ns] 2007-01-01T20:39:02.746999979 ...
    lat      (time) float64 75.95 75.95 75.95 75.95 75.95 75.95 75.95 75.95 ...
    lon      (time) float64 -2.185 -2.185 -2.185 -2.185 -2.185 -2.185 -2.185 ...
    alt      (time) float64 3.057e+04 2.679e+04 2.427e+04 2.174e+04 ...
Attributes:
    long_name:  ec550
    units:      1

In [10]:
print('ADM total: %6d'%adm.ec550.count())
print('in_latlon: %6d'%ec550.count())

ADM total:  71397
in_latlon:   3979


### Collocated forecast

In [11]:
collocate = lambda model, obs: model.load().sel(
    lon=obs.lon, lat=obs.lat, time=obs.time, method='nearest'
)

In [12]:
%time aod550 = collocate(emep.AOD_550nm, ec550)

aod550

CPU times: user 1.04 s, sys: 60 ms, total: 1.1 s
Wall time: 1.1 s


<xarray.DataArray 'AOD_550nm' (time: 3979)>
array([0., 0., 0., ..., 0., 0., 0.], dtype=float32)
Coordinates:
    lon      (time) float64 -2.25 -2.25 -2.25 -2.25 -2.25 -2.25 -2.25 -2.25 ...
    lat      (time) float64 76.0 76.0 76.0 76.0 76.0 76.0 76.0 76.0 76.0 ...
  * time     (time) datetime64[ns] 2018-06-05 2018-06-05 2018-06-05 ...
    alt      (time) float64 3.057e+04 2.679e+04 2.427e+04 2.174e+04 ...
Attributes:
    long_name:           AOD_550nm
    units:               
    class:               AOD:GROUP
    current_date_first:  [2018    6    5    0]
    numberofrecords:     121
    current_date_last:   [2018    6   10    0]