In [None]:
#| default_exp callbacks

# Callbacks
> Callback used in handlers

In [None]:
#| export
import copy
import fastcore.all as fc
from operator import attrgetter
from cftime import date2num
import numpy as np

from marisco.configs import cfg

In [None]:
import pandas as pd

In [None]:
#| export
class Callback(): order = 0

In [None]:
#| export
def run_cbs(cbs, obj=None):
    for cb in sorted(cbs, key=attrgetter('order')):
        if cb.__doc__: obj.logs.append(cb.__doc__)
        cb(obj)

In [None]:
#| export
class Transformer():
    def __init__(self, dfs, cbs=None): 
        self.cbs = cbs
        self.dfs = {k: v.copy() for k, v in dfs.items()}
        self.logs = []
        
    def callback(self):
        run_cbs(self.cbs, self)
        
    def unique(self, col_name):
        "Distinct values of a specific column present in all groups"
        columns = [df.get(col_name) for df in self.dfs.values() if df.get(col_name) is not None]
        values = np.concatenate(columns) if columns else []
        return np.unique(values)
        
    def __call__(self):
        if self.cbs: self.callback()
        return self.dfs

Example:

In [None]:
dfs = {'biota': pd.DataFrame({'id': [0, 1, 2], 'species': [0, 2, 0], 'depth': [2, 3, 4]}),
       'seawater': pd.DataFrame({'id': [0, 1, 2], 'depth': [3, 4, 5]})}
tfm = Transformer(dfs); tfm()
tfm.unique('species')

array([0, 2])

In [None]:
tfm.unique('non_existing_var')

array([], dtype=float64)

## Generic

In [None]:
#| export
class EncodeTimeCB(Callback):
    "Encode time as `int` representing seconds since xxx"    
    def __init__(self, cfg): fc.store_attr()
    def __call__(self, tfm): 
        def format_time(x): return date2num(x, units=self.cfg['units']['time'])
        
        for k in tfm.dfs.keys():
            tfm.dfs[k]['time'] = tfm.dfs[k]['time'].apply(format_time)

In [None]:
#| export
class SanitizeLonLatCB(Callback):
    "Drop row when both longitude & latitude equal 0."
    def __init__(self, verbose=False): fc.store_attr()
    def __call__(self, tfm):
        for grp, df in tfm.dfs.items():
            mask = (df.lon == 0) & (df.lat == 0)
            nZeroes = mask.sum()
            if nZeroes and self.verbose: 
                print(f'The "{grp}" group contains {nZeroes} data points whose (lon, lat) = (0, 0)')
            tfm.dfs[grp] = df.loc[~mask]

In [None]:
dfs = {'biota': pd.DataFrame({'lon': [0, 1, 0], 'lat': [0, 2, 0]})}
tfm = Transformer(dfs, cbs=[SanitizeLonLatCB(verbose=True)])
tfm()['biota']

The "biota" group contains 2 data points whose (lon, lat) = (0, 0)


Unnamed: 0,lon,lat
1,1,2
