# Input data validation


## Define basic variables first:
* Directory variables
* Modules to be loaded
* Default cut selections

The working directory will be the TPCwithDNN repository in order to load necessary modules. Therefore, the paths in which the figures should be stored need to be absolute paths.

In [None]:
## directory variables
working_dir = "/home/maja/CERN_part/CERN/TPCML/TPCwithDNN/tpcwithdnn/"
base_dir = "/home/maja/CERN_part/CERN/TPCML/TPCwithDNN/tpcwithdnn/"
plot_dir = base_dir + "plots/"
# model_dir = working_dir + "model_new_random/"
# input_dir = "/lustre/alice/users/mkleiner/NOTESData/JIRA/ATO-439/nobias/SC-33-33-180/"
import os
os.chdir(working_dir)

## import modules
import numpy as np
import pandas as pd
from RootInteractive.InteractiveDrawing.bokeh.bokehDrawSA import *
from RootInteractive.Tools.aliTreePlayer import *
from bokeh.io import output_notebook
output_notebook()

## print outs
print("Working directory: %s" % working_dir)
# print("Model directory: %s" % model_dir)
# print("Input directory: %s" % input_dir)
print("Storage directory: %s" % plot_dir)


## global plot parameters
tooltips = [("r", "(@r)"), ("phi", "(@phi)"), ("z", "(@z)")]
figureLayout = [
    [0, {'plot_height': 150}],
    {'sizing_mode':'scale_width'}
    ]
widgetLayout = [[0,1,2], {'sizing_mode':'scale_width'}]

## Load trees

In [None]:
val_list_path = base_dir + "valtrees.list"
tree, treeList, fileList = LoadTrees("cat " + val_list_path, "validation", "xxx", "", 0)
tree.Show(1)

## Validation plots

Default cuts:
* z < 5 cm
* r < 120

TODO: Do we want also 1-D distributions? (flucDist{R,RPhi,Z}, meanDist{R,RPhi,Z}, derRefMeanDist{R,RPhi,Z}).

In [None]:
## default cut selections - without them it is too big
selection = "z<5 && r<120"

## load quantities to plot
df = tree2Panda(tree, ["phi", "r", "z", ".*flucSC", ".*meanSC", ".*derRefMeanSC"], selection, exclude = ["validation.*", "meanSC", "flucSC", "derRefMeanSC"], columnMask = [["validation_", ""], ["model_", ""]])
display(df.columns)

# print(df)

### 3D distributions vs r

In [None]:
output_file(plot_dir + "figValidation_SC_r.html")
figureArraySec = [
    [['r'], ['mean0_9_meanSC', 'mean1_0_meanSC', 'mean1_1_meanSC'], {"size": 4}],
    [['r'], ['mean0_9_flucSC', 'mean1_0_flucSC', 'mean1_1_flucSC'], {"size": 4}],
    [['r'], ['mean0_9_derRefMeanSC', 'mean1_0_derRefMeanSC', 'mean1_1_derRefMeanSC',], {"size": 4}],
    ]
figureLayoutSec = [
    [0, {'plot_height': 150}],
    [1, {'plot_height': 150}],
    [2, {'plot_height': 150}],
    {'sizing_mode':'scale_width'}
    ]
widgetParamsSec = [
    ['range', ['r']],
    ['range', ['phi']],
    ]
widgetLayoutSec = [[0,1], {'sizing_mode':'scale_width'}]
selectionSec = "z<5 & r<120"
bokehDrawSA.fromArray(df, selectionSec, figureArraySec, widgetParamsSec, layout=figureLayoutSec, tooltips=tooltips, widgetLayout=widgetLayoutSec, sizing_mode='scale_width')

### 3D distributions vs phi

In [None]:
output_file(plot_dir + "figValidation_SC_phi.html")
figureArraySec = [
    [['phi'], ['mean0_9_meanSC', 'mean1_0_meanSC', 'mean1_1_meanSC'], {"size": 4}],
    [['phi'], ['mean0_9_flucSC', 'mean1_0_flucSC', 'mean1_1_flucSC'], {"size": 4}],
    [['phi'], ['mean0_9_derRefMeanSC', 'mean1_0_derRefMeanSC', 'mean1_1_derRefMeanSC',], {"size": 4}],
    ]
figureLayoutSec = [
    [0, {'plot_height': 150}],
    [1, {'plot_height': 150}],
    [2, {'plot_height': 150}],
    {'sizing_mode':'scale_width'}
    ]
widgetParamsSec = [
    ['range', ['r']],
    ['range', ['phi']],
    ]
widgetLayoutSec = [[0,1], {'sizing_mode':'scale_width'}]
selectionSec = "z<5 & r<120"
bokehDrawSA.fromArray(df, selectionSec, figureArraySec, widgetParamsSec, layout=figureLayoutSec, tooltips=tooltips, widgetLayout=widgetLayoutSec, sizing_mode='scale_width')

### 1D R distributions

In [None]:
## default cut selections - without them it is too big
selection = "z<5 && r<120"

## load quantities to plot
dfr = tree2Panda(tree, ["phi", "r", "z", ".*flucDistR", ".*meanDistR", ".*derRefMeanDistR", ".*flucDistRPred"], selection, exclude = ["validation.*", "meanDistR", "flucDistR", "derRefMeanDistR"], columnMask = [["validation_", ""], ["model_", ""]])
display(dfr.columns)

# print(dfr)

In [None]:
output_file(plot_dir + "figValidation_distr_r.html")
figureArraySec = [
    [['r'], ['mean0_9_meanDistR', 'mean1_0_meanDistR', 'mean1_1_meanDistR'], {"size": 4}],
    [['r'], ['mean0_9_flucDistR', 'mean1_0_flucDistR', 'mean1_1_flucDistR'], {"size": 4}],
    [['r'], ['mean0_9_derRefMeanDistR', 'mean1_0_derRefMeanDistR', 'mean1_1_derRefMeanDistR'], {"size": 4}],
    ]
figureLayoutSec = [
    [0, {'plot_height': 150}],
    [1, {'plot_height': 150}],
    [2, {'plot_height': 150}],
    {'sizing_mode':'scale_width'}
    ]
widgetParamsSec = [
    ['range', ['r']],
    ['range', ['phi']],
    # ['range', ['z']],
    ]
widgetLayoutSec = [[0,1], {'sizing_mode':'scale_width'}]
selectionSec = "z<5 & r<120"
bokehDrawSA.fromArray(dfr, selectionSec, figureArraySec, widgetParamsSec, layout=figureLayoutSec, tooltips=tooltips, widgetLayout=widgetLayoutSec, sizing_mode='scale_width')

In [None]:
output_file(plot_dir + "figValidation_distrfluc_distrflucpred.html")
figureArraySec = [
    [['r'], ['mean0_9_flucDistR', 'mean0_9_flucDistRPred'], {"size": 4}],
    [['r'], ['mean1_0_flucDistR', 'mean1_0_flucDistRPred'], {"size": 4}],
    [['r'], ['mean1_1_flucDistR', 'mean1_1_flucDistRPred'], {"size": 4}],
    ]
figureLayoutSec = [
    [0, {'plot_height': 150}],
    [1, {'plot_height': 150}],
    [2, {'plot_height': 150}],
    {'sizing_mode':'scale_width'}
    ]
widgetParamsSec = [
    ['range', ['r']],
    ['range', ['phi']],
    # ['range', ['z']],
    ]
widgetLayoutSec = [[0,1], {'sizing_mode':'scale_width'}]
selectionSec = "z<5 & r<120"
bokehDrawSA.fromArray(dfr, selectionSec, figureArraySec, widgetParamsSec, layout=figureLayoutSec, tooltips=tooltips, widgetLayout=widgetLayoutSec, sizing_mode='scale_width')

### 1D phi distributions

In [None]:
## default cut selections - without them it is too big
selection = "z<5 && r<120"

## load quantities to plot
dfphi = tree2Panda(tree, ["phi", "r", "z", ".*flucDistRPhi", ".*meanDistRPhi", ".*derRefMeanDistRPhi"], selection, exclude = ["validation.*", "meanDistRPhi", "flucDistRPhi", "derRefMeanDistRPhi"], columnMask = [["validation_", ""], ["model_", ""]])
display(dfphi.columns)

# print(dfr)

In [None]:
output_file(plot_dir + "figValidation_distphi_phi.html")
figureArraySec = [
    [['phi'], ['mean0_9_meanDistRPhi', 'mean1_0_meanDistRPhi', 'mean1_1_meanDistRPhi'], {"size": 4}],
    [['phi'], ['mean0_9_flucDistRPhi', 'mean1_0_flucDistRPhi', 'mean1_1_flucDistRPhi'], {"size": 4}],
    [['phi'], ['mean0_9_derRefMeanDistRPhi', 'mean1_0_derRefMeanDistRPhi', 'mean1_1_derRefMeanDistRPhi'], {"size": 4}],
    ]
figureLayoutSec = [
    [0, {'plot_height': 150}],
    [1, {'plot_height': 150}],
    [2, {'plot_height': 150}],
    {'sizing_mode':'scale_width'}
    ]
widgetParamsSec = [
    ['range', ['r']],
    ['range', ['phi']],
    # ['range', ['z']],
    ]
widgetLayoutSec = [[0,1], {'sizing_mode':'scale_width'}]
selectionSec = "z<5 & r<120"
bokehDrawSA.fromArray(dfphi, selectionSec, figureArraySec, widgetParamsSec, layout=figureLayoutSec, tooltips=tooltips, widgetLayout=widgetLayoutSec, sizing_mode='scale_width')