# Higher Dimensional Data

In [1]:
import pathlib 
from collections import defaultdict

import h5py
import pandas as pd
import numpy as np
import xarray as xr

import matplotlib.pyplot as plt
from matplotlib.colors import LogNorm,Normalize
import matplotlib.cm as cm

from bluesky_tutorial_utils import fetch

In [2]:
cat = fetch.rsoxs_catalog()

## Working with Multiple xArrays: Gathering Data

### Build Index Table (Table of Contents)

In [3]:
def build_pandas_index(cat):
    return pd.DataFrame(h.metadata['start'] for _, h in cat.items())

In [4]:
toc = build_pandas_index(cat)

In [6]:
toc

In [7]:
toc.describe().loc[['count','min','max']]

### Select subset of data From Index

In [8]:
sdf = toc.query('Radius==40.0 & EndAngle==360.0 & PhysSize==5 & NumX==512')
sdf = sdf.sort_values('Energy')
sdf.describe().loc[['count','min','max']]

### Filter via search

You can also do the same filtering by searching the catalog

In [9]:
filtered_cat = cat.search({"Radius": 40.0, "PhysSize": 5})
fdf = build_pandas_index(filtered_cat)
fdf.describe().loc[['count','min','max']]

In [10]:
data = cat[-1]['primary'].read()

In [11]:
data

### Gather Data

In [32]:
def gather(df, cat):
    coords = []
    data_arrays = []
    for row_index, row in df.iterrows():
        data = cat[row['uid']]['primary'].read()
        # convert xr.DataSet to xr.DataArray in a very brute-foce way
        nd = xr.DataArray(data['image'].squeeze(), 
                          dims=['Qy', 'Qx'], 
                          coords={'Qy': np.asarray(data['Qy']).squeeze(), 
                                  'Qx': np.asarray(data['Qx']).squeeze()})
        data_arrays.append(nd)
        c = dict(row)
        for k in ['uid', 'time', 'input_file', 'source']:
            del c[k]
        coords.append(c)
        
    return data_arrays, pd.DataFrame(coords)

In [33]:
data_arrays,coords = gather(fdf, cat)

In [34]:
data_arrays[0]

In [35]:
coords

In [17]:
data_arrays[1].plot(norm=LogNorm(1e-9,1),aspect=1.2,size=5)

## Multiple xArrays: simple xr.concat

In [18]:
sdf = toc.query('Radius==40.0 & EndAngle==360.0 & PhysSize==5 & NumX==512')
sdf = sdf.sort_values('Energy')
display(sdf.describe().loc[['count','min','max']])

data_arrays,coords = gather(sdf, cat)

da = xr.concat(data_arrays,dim='Energy')
da

In [19]:
da = da.assign_coords(Energy=sdf.Energy.values)
da

In [20]:
da.sel(Qy=0,method='nearest').plot(norm=LogNorm(1e-9,1),yscale='log')

## Building xArrays: Multi-Index

In [22]:
sdf = toc.query('EndAngle==360.0 & PhysSize==5 & NumX==512')
sdf = sdf.sort_values(['Energy','Radius'])
display(sdf.describe().loc[['count','min','max']])

In [23]:
data_arrays,coords = gather(sdf, cat)

da = xr.concat(data_arrays,dim=['Energy','Radius'])

hmmm...that didn't work...


Let's try a multi-index

In [24]:
keys =  ['Energy','Radius']
tuples = [(i,j) for i,j in sdf[keys].values]
index = pd.MultiIndex.from_tuples(tuples,names=keys)
index.name = 'system'
da = xr.concat(data_arrays,dim=index)
da

In [25]:
da.sel(Energy=285.,method='nearest')

In [26]:
da.sel(Energy=285.,Qy=0,method='nearest').plot.line(x='Qx',yscale='log',xscale='log')#(norm=LogNorm(1e-9,1))

In [27]:
da.sel(Energy=285.,method='nearest').plot(col='Radius',col_wrap=3,norm=LogNorm(1e-9,1))