In [1]:
from copy import copy
import numpy as np
from pprint import pprint

from noworkflow.now.tagging.var_tagging import backward_deps, global_backward_deps, store_operations, resume_trials, trial_diff, trial_intersection_diff, var_tag_plot, var_tag_values

### Test case

Here starts a small test case. The workflow goes through cells defining *module*, *rmse* and a *stress_function* aiming to add some random behaviour in our test. The *var_final* is our target variable, being dependent of its previous cells.


In [2]:
def module(num): 
    if num >= 0:
        return int(num)
    else:
        return int(-1*num)

def mae(x, y):
    return np.mean(np.abs((x - y)))

def squared_diff(p, q):
    diff = p - q
    diff = diff*diff
    
    return diff

def rmse(x, y):
    
    sqr_diff = squared_diff(x, y)
    mean = np.mean(sqr_diff)
    squared_root = np.sqrt(mean)
    
    return squared_root

def stress_func(m, n):
    x = np.random.rand()
    y = np.random.rand()
    
    temp1 = rmse(m, n)
    temp2 = np.random.rand()*temp1
    
    final = np.max([int(x+y), int(temp2), mae(temp1, temp2)])
    
    return final

In [3]:
var_a = np.random.rand()

In [4]:
var_b = np.random.rand()

In [5]:
k = np.random.rand()
m = np.random.rand()

In [6]:
rme_err = mae(var_a, var_b)

In [7]:
var_c = stress_func(k, m)

In [8]:
var_d = var_c * np.random.rand()

In [9]:
var_e = mae(var_c, var_b)

In [10]:
target_var = now_variable('target_var', rmse(var_c, var_e))

Evaluation(id=179, checkpoint=0.763895043, code_component_id=354, activation_id=172, repr=0.7512567487269266)


### backward_deps example
Here we recollect the backward dependencies liked with var_final tagged variable. Two options are available, with glanulatiry on and off.

In [11]:
dict_ops = backward_deps('target_var', False)
dict_ops

{9: ('k', '0.659516540561134'),
 8: ('m', '0.8090699374308168'),
 7: ('stress_func(k, m)', '1.0'),
 6: ('var_c', '1.0'),
 5: ('var_b', '0.7512567487269266'),
 4: ('mae(var_c, var_b)', '0.2487432512730734'),
 3: ('var_e', '0.2487432512730734'),
 2: ('rmse(var_c, var_e)', '0.7512567487269266'),
 1: ("now_variable('target_var', rmse(var_c, var_e))", '0.7512567487269266'),
 0: ('target_var', '0.7512567487269266')}

When the flag is set True, the content of functions steps are displayed.

In [12]:
backward_deps('target_var', glanularity_level=True)

{41: ('diff', '0.7512567487269266'),
 40: ('diff', '0.5643867025077526'),
 39: ('squared_diff(x, y)', '0.5643867025077526'),
 38: ('sqr_diff', '0.5643867025077526'),
 37: ('mean', '0.5643867025077526'),
 36: ('squared_root', '0.7512567487269266'),
 35: ('x', '0.25364802317643087'),
 34: ('y', '0.9694480364222919'),
 33: ('int(x+y)', '1'),
 32: ('int(temp2)', '0'),
 31: ('diff', '-0.14955339686968283'),
 30: ('diff', '0.02236621851526086'),
 29: ('squared_diff(x, y)', '0.02236621851526086'),
 28: ('sqr_diff', '0.02236621851526086'),
 27: ('mean', '0.02236621851526086'),
 26: ('squared_root', '0.14955339686968283'),
 25: ('rmse(m, n)', '0.14955339686968283'),
 24: ('temp1', '0.14955339686968283'),
 23: ('temp2', '0.14474647534479015'),
 22: ('mae(temp1, temp2)', '0.004806921524892677'),
 21: ('[int(x+y), int(temp2), mae(temp1, temp2)]', 'complex data type'),
 20: ('final', '1.0'),
 19: ('k', '0.659516540561134'),
 18: ('m', '0.8090699374308168'),
 17: ('stress_func(k, m)', '1.0'),
 16: (

### global_backward_deps example
Here we recollect all the backward dependencies liked with var_final tagged variable. If user redefines a tagged variable in more than one cell, this options will return all operations over this tagged variable across the trial. 
Same two options available, with glanulatiry on and off.

In [13]:
global_backward_deps('target_var', False)

{51: ('k', '0.659516540561134'),
 50: ('m', '0.8090699374308168'),
 49: ('stress_func(k, m)', '1.0'),
 48: ('var_c', '1.0'),
 47: ('var_b', '0.7512567487269266'),
 46: ('mae(var_c, var_b)', '0.2487432512730734'),
 45: ('var_e', '0.2487432512730734'),
 44: ('rmse(var_c, var_e)', '0.7512567487269266'),
 43: ("now_variable('target_var', rmse(var_c, var_e))", '0.7512567487269266'),
 42: ('target_var', '0.7512567487269266'),
 41: ('diff', '0.7512567487269266'),
 40: ('diff', '0.5643867025077526'),
 39: ('squared_diff(x, y)', '0.5643867025077526'),
 38: ('sqr_diff', '0.5643867025077526'),
 37: ('mean', '0.5643867025077526'),
 36: ('squared_root', '0.7512567487269266'),
 35: ('x', '0.25364802317643087'),
 34: ('y', '0.9694480364222919'),
 33: ('int(x+y)', '1'),
 32: ('int(temp2)', '0'),
 31: ('diff', '-0.14955339686968283'),
 30: ('diff', '0.02236621851526086'),
 29: ('squared_diff(x, y)', '0.02236621851526086'),
 28: ('sqr_diff', '0.02236621851526086'),
 27: ('mean', '0.02236621851526086'),


### trial storage example
Here we save the current trial in order to make further compariosions with other experiments. The dictionaries aren't stored in the .noworkflow/db.sqlite, but in a shelve object named *ops.db* in the current notebook local folder. **Remove it means exeperimental data being lost.**

In [14]:
store_operations(__noworkflow__.trial_id, dict_ops)

Dictionary stored in shelve.


### resume_trials

In order to support the management of experiments, the user can see the trial ids of all experiments stored in the ops.db available to comparision/analysis.

In [15]:
list_id = resume_trials()
list_id

['e61e96ed-c2ae-4de7-aa66-e8bb3419ea08']

### trial_intesection_diff

Two types of comparision are available now. This one refers to values returned from operations. Each variable or function call available in both experiments (intersection) have its values exhibited. 

There is a limitation in showing for complex values. Matrices and tensors cannot be properly displayed, so only an indication that is a complex type is indicated.

Here we visually plot a diff table between two trials

In [16]:
trial_intersection_diff(list_id[-2], list_id[-1])

Unnamed: 0,key,9ea08
0,var_c,1.0
1,"now_variable('target_var', rmse(var_c, var_e))",0.7512567487269266
2,k,0.659516540561134
3,"mae(var_c, var_b)",0.2487432512730734
4,target_var,0.7512567487269266
5,"stress_func(k, m)",1.0
6,"rmse(var_c, var_e)",0.7512567487269266
7,var_b,0.7512567487269266
8,var_e,0.2487432512730734
9,m,0.8090699374308168


### trial_diff

Here is the second way into displaying differences. The values of variables and function calls are exhibited in a diff file format, emphasizing the order of the operations carried out. The goal here is to show that between two experiments, the order of operations were different.

In [18]:
trial_diff(list_id[-2], list_id[-1])

0,1,2,3,4,5
t,1,"k, 0.659516540561134",t,1,"k, 0.659516540561134"
,2,"m, 0.8090699374308168",,2,"m, 0.8090699374308168"
,3,"stress_func(k, m), 1.0",,3,"stress_func(k, m), 1.0"
,4,"var_c, 1.0",,4,"var_c, 1.0"
,5,"var_b, 0.7512567487269266",,5,"var_b, 0.7512567487269266"
,6,"mae(var_c, var_b), 0.2487432512730734",,6,"mae(var_c, var_b), 0.2487432512730734"
,7,"var_e, 0.2487432512730734",,7,"var_e, 0.2487432512730734"
,8,"rmse(var_c, var_e), 0.7512567487269266",,8,"rmse(var_c, var_e), 0.7512567487269266"
,9,"now_variable('target_var', rmse(var_c, var_e)), 0.7512567487269266",,9,"now_variable('target_var', rmse(var_c, var_e)), 0.7512567487269266"
,10,"target_var, 0.7512567487269266",,10,"target_var, 0.7512567487269266"


### var_tag_plot

Chart the evolution of a given variable across multiple trials in the database. In this case, all experiments stored in ops.db and tagged as *target_var* has its values plotted

In [None]:
var_tag_plot('target_var')

### var_tag_values

In the event that the user wants to manipulate the results of all rounds, the pandas dataframe is available.

In [None]:
var_tag_values('target_var')