# Calibration QA for per pad and per time series - see AT0-611

* Input data are trees per run with aggregated per pad information
* Machine learning used for the per pad interpolation


In [None]:
from bokeh.io import output_notebook
from RootInteractive.Tools.aliTreePlayer import *
from RootInteractive.InteractiveDrawing.bokeh.bokehDrawSA import *
from bokeh.io import curdoc
import numpy as np
import os
import sys
from ROOT import TFile, gSystem
import plotly.express as px
output_notebook()
from IPython.display import Image
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
import matplotlib.pyplot as plt


from scipy.stats import norm
import matplotlib.mlab as mlab
import scipy.stats as stats
import math

In [None]:
import pprint
from RootInteractive.InteractiveDrawing.bokeh.bokehInteractiveParameters import *
pp = pprint.PrettyPrinter(width=220, compact=True)
#from RootInteractive.MLpipeline.NDFunctionInterface import  *
from RootInteractive.MLpipeline.MIForestErrPDF import *

# Parametrize algorithm
* Random forest parameters
* compression parameter

### Random forest parameters for the per pad calibration

In [None]:
%%time
n_estimators=200
n_jobs=100
npoints=1000000
max_depthBase=14
max_samples=0.1
regressorBase = RandomForestRegressor(n_estimators =n_estimators,n_jobs=n_jobs,max_depth=max_depthBase,max_samples=max_samples)
regressorLocal = RandomForestRegressor(n_estimators =n_estimators,n_jobs=n_jobs,max_samples=max_samples)

### Data dashboard compression

In [None]:
arrayCompressionRelative10=[(".*",[("relative",10), ("code",0), ("zip",0), ("base64",0)])]

In [None]:
# defined by user
inputPath="/home/berki/Software/TPCQCVis/TPCQCVis/macro/out_528471.root"


In [None]:
fIn =ROOT.TFile.Open(inputPath)
tree=fIn.Get("tree")
tree.Show(0)
tree.GetListOfAliases().Print()

## Import variables and aliases from the tree to panda

In [None]:
%%time
varList=[".*Median.*",".*Mean.*", ".*RMS.*",                             ## Mean, Median and RMS properties     
        "roc","ly","lx","gy","gx","row","pad","padArea",  "padHeight",   ## position
        "isEdgePad","partition",                                         ## 
        #"traceLength",                                                   ## trace length
        "sector","roc","A_Side","C_Side",                                ## switch A side C side
        ]
exclude=[".*TS.*"]
dfScan=tree2Panda(tree,varList,"roc>=0",columnMask=[["_fElements",""]], exclude=exclude)
print(dfScan.shape)

In [None]:
print(dfScan.columns,dfScan.columns.shape)

In [None]:
#dfScan["dPedestals"]=dfScan["Pedestals"]-dfScan["Pedestals"].astype('int16')

# Fit base and local  propeties
* regressor with local X
* local filter regressor - mean,median,std filter

In [None]:
%%script false --no-raise-error  # disable for a moment
%%time
statDictionary={"mean":None,"median":None, "std":None}

varListG=["lx","traceLength","padArea"]
varListLocal=["lx","ly","roc"]
vars=[
    "Noise","N_Digits","N_Clusters",
    "Q_Max", "Q_Tot", "GainMap","Q_Max_Digits",
    "fraction","expLambda"
]
for var in vars:
    # base regression limitted deep
    regressorBase.fit(dfScan[varListG],dfScan[var])
    dfScan[f"{var}RF0"]= regressorBase.predict(dfScan[varListG])
    dfScan[f"{var}RF0_Ratio"]=dfScan[f"{var}"]/dfScan[f"{var}RF0"]
    # local regression  
    regressorLocal.fit(dfScan[varListLocal],dfScan[var])
    statDictionaryOut=predictRFStatNew(regressorLocal,dfScan[varListLocal].astype('float32').to_numpy(),statDictionary,n_jobs)
    dfScan[f"{var}RFL"]= regressorLocal.predict(dfScan[varListLocal])
    dfScan[f"{var}RFL_Ratio"]=dfScan[f"{var}"]/dfScan[f"{var}RFL"]
    dfScan[f"{var}RFL_Med"]=statDictionaryOut["median"]
    dfScan[f"{var}RFL_Std"]=statDictionaryOut["std"]
    dfScan[f"{var}RFLMed_Ratio"]=dfScan[f"{var}"]/dfScan[f"{var}RFL_Med"]
    print(f"Fit {var}")



# Define metadata variables

In [None]:
df=dfScan
initMetadata(df)
df.meta.metaData = {'qVector.AxisTitle': "Q(ADC)"}

In [None]:
df["padrow"]=df["row"]+(df["partition"]>1)*63

In [None]:
df.columns

In [None]:

# here we can define derivd variables -  to define some invariances eg abs(XX_Mean/XXXMedain)<
aliasArray=[
#    ("","dNprimdx*padLength"),                                    # ionization over pad
]

#
variables=df.columns


parameterArray = [  
    {"name": "varX", "value":"padrow", "options":variables},
    {"name": "varY", "value":"padArea", "options":variables},
    {"name": "varYNorm", "value":"padArea", "options":variables},
    {"name": "varZ", "value":"partition", "options":variables},
    {"name": "nbinsX", "value":30, "range":[10, 200]},
    {"name": "nbinsY", "value":30, "range":[10, 200]},
    {"name": "nbinsZ", "value":5, "range":[1,10]},
    #{"name": "sigmaNRel", "value":3.35, "range":[1,5]},
    #
    {"name": "exponentX", "value":1, "range":[-5, 5]},
    {'name': "xAxisTransform", "value":None, "options":[None, "sqrt", "lambda x: log(1+x)","lambda x: 1/sqrt(x)", "lambda x: x**exponentX","lambda x,y: x/y" ]},
    {'name': "yAxisTransform", "value":None, "options":[None, "sqrt", "lambda x: log(1+x)","lambda x: 1/sqrt(x)", "lambda x: x**exponentX","lambda x,y: y/x" ]},
    {'name': "zAxisTransform", "value":None, "options":[None, "sqrt", "lambda x: log(1+x)","lambda x: 1/sqrt(x)", "lambda x: x**exponentX" ]},

]

parameterArray.extend(figureParameters["legend"]['parameterArray'])   
parameterArray.extend(figureParameters["markers"]['parameterArray'])    

widgetParams=[
    ['multiSelect',["sector"],{"name":"sector"}],
    ['multiSelect',["partition"],{"name":"partition"}],
    ['multiSelect',["isEdgePad"],{"name":"isEdgePad"}],
    ['multiSelect',["A_Side"],{"name":"A_Side"}],
    ['range',["padrow"],{"name":"padrow"}],
    ['range',["lx"],{"name":"lx"}],
    ['range',["gx"],{"name":"gx"}],
    ['range',["gy"],{"name":"gy"}],
#    ['spinnerRange',["logq2NPrimRatio"],{"name":"logq2NPrimRatio","range": [-2, 4]}],
#    ['spinnerRange',["lognTot2NPrimRatio"],{"name":"lognTot2NPrimRatio"}],
    #
    ['select', ['varX'], {"name": "varX"}],
    ['select', ['varY'], {"name": "varY"}],
    ['select', ['varYNorm'], {"name": "varYNorm"}],
    ['select', ['varZ'], {"name": "varZ"}],
    ['slider', ['nbinsY'], {"name": "nbinsY"}],
    ['slider', ['nbinsX'], {"name": "nbinsX"}],
    ['slider', ['nbinsZ'], {"name": "nbinsZ"}],
    #
    ['spinner', ['exponentX'],{"name": "exponentX"}],
    #['spinner', ['sigmaNRel'],{"name": "sigmaNRel"}],
    ['select', ['yAxisTransform'], {"name": "yAxisTransform"}],
    ['select', ['xAxisTransform'], {"name": "xAxisTransform"}],
    ['select', ['zAxisTransform'], {"name": "zAxisTransform"}],
]                         

widgetParams.extend(figureParameters["legend"]["widgets"])
widgetParams.extend(figureParameters["markers"]["widgets"])

widgetLayoutDesc={
    "Select":[["sector","A_Side","partition","isEdgePad"],["padrow","lx","gx","gy"]],
    #"Select":[["region","SatOn"],["dNprimdx","nSecSatur","TransGEM"]],
    "Histograms":[["nbinsX","nbinsY", "nbinsZ", "varX","varY","varYNorm","varZ"], {'sizing_mode': 'scale_width'}],
    "Transform":[["exponentX","xAxisTransform", "yAxisTransform","zAxisTransform"],{'sizing_mode': 'scale_width'}],
    "Legend": figureParameters['legend']['widgetLayout'],
    "Markers":["markerSize"]
}

figureGlobalOption={}
figureGlobalOption=figureParameters["legend"]["figureOptions"]
figureGlobalOption["size"]="markerSize"
figureGlobalOption["x_transform"]="xAxisTransform"
figureGlobalOption["y_transform"]="yAxisTransform"
figureGlobalOption["z_transform"]="zAxisTransform"

In [None]:
histoArray=[    
    {
        "name": "histoXYData",
        "variables": ["varX","varY"],
        "nbins":["nbinsX","nbinsY"], "axis":[1],"quantiles": [0.35,0.5],"unbinned_projections":True,
    },
    {
        "name": "histoXYNormData",
        "variables": ["varX","varY/varYNorm"],
        "nbins":["nbinsX","nbinsY"], "axis":[1],"quantiles": [0.35,0.5],"unbinned_projections":True,
    },
    {
        "name": "histoXYZData",
        "variables": ["varX","varY","varZ"],
        "nbins":["nbinsX","nbinsY","nbinsZ"], "axis":[1],"quantiles": [0.35,0.5],"unbinned_projections":True,
    },
    {
        "name": "histoXYNormZData",
        "variables": ["varX","varY/varYNorm","varZ"],
        "nbins":["nbinsX","nbinsY","nbinsZ"], "axis":[1],"quantiles": [0.35,0.5],"unbinned_projections":True,
    },
]

In [None]:
figureArray=[
    #
    [[("bin_bottom_0", "bin_top_0")], [("bin_bottom_1", "bin_top_1")], {"colorZvar": "bin_count", "source":"histoXYData"}],
    [["bin_center_1"], ["bin_count"], { "source":"histoXYData", "colorZvar": "bin_center_0"}],
    [["bin_center_0"], ["mean","quantile_0",], { "source":"histoXYData_1","errY":"std/sqrt(entries)"}],
    [["bin_center_0"], ["std"], { "source":"histoXYData_1","errY":"std/sqrt(entries)"}],
    #
    [[("bin_bottom_0", "bin_top_0")], [("bin_bottom_1", "bin_top_1")], {"colorZvar": "bin_count", "source":"histoXYNormData"}],
    [["bin_center_1"], ["bin_count"], { "source":"histoXYNormData", "colorZvar": "bin_center_0"}],
    [["bin_center_0"], ["mean","quantile_0",], { "source":"histoXYNormData_1","errY":"std/sqrt(entries)"}],
    [["bin_center_0"], ["std"], { "source":"histoXYNormData_1","errY":"std/sqrt(entries)"}],
    #
    [["bin_center_0"], ["mean"], { "source":"histoXYZData_1","colorZvar":"bin_center_2","errY":"std/sqrt(entries)"}],
    [["bin_center_0"], ["quantile_0"], { "source":"histoXYZData_1","colorZvar":"bin_center_2","errY":"2*std/sqrt(entries)"}],
    [["bin_center_0"], ["quantile_1"], { "source":"histoXYZData_1","colorZvar":"bin_center_2","errY":"3*std/sqrt(entries)"}],
    [["bin_center_0"], ["std"], { "source":"histoXYZData_1","colorZvar":"bin_center_2","errY":"std/sqrt(entries)"}],
    #
    [["bin_center_0"], ["mean"], { "source":"histoXYNormZData_1","colorZvar":"bin_center_2","errY":"std/sqrt(entries)","yAxisTitle":"{varY}/{varYNorm}"}],
    [["bin_center_0"], ["quantile_0"], { "source":"histoXYNormZData_1","colorZvar":"bin_center_2","errY":"2*std/sqrt(entries)","yAxisTitle":"{varY}/{varYNorm}"}],
    [["bin_center_0"], ["quantile_1"], { "source":"histoXYNormZData_1","colorZvar":"bin_center_2","errY":"3*std/sqrt(entries)","yAxisTitle":"{varY}/{varYNorm}"}],
    [["bin_center_0"], ["std"], { "source":"histoXYNormZData_1","colorZvar":"bin_center_2","errY":"std/sqrt(entries)","yAxisTitle":"{varY}/{varYNorm}"}],
    figureGlobalOption
]
figureLayoutDesc={
    "histoXY":[[0,1],[2,3],{"plot_height":200}],
    "histoXYNorm":[[4,5],[6,7],{"plot_height":200}],
    "histoXYZ":[[8,9],[10,11],{"plot_height":200}],
    "histoXYNormZ":[[12,13],[14,15],{"plot_height":200}],
}

In [None]:
%%time
output_file("QAQCcalPadSummary.html") 
arrayCompression=arrayCompressionRelative10
dfSample=df.sort_index()
fig=bokehDrawSA.fromArray(dfSample, None, figureArray, widgetParams, layout=figureLayoutDesc, sizing_mode='scale_width', nPointRender=50000, widgetLayout=widgetLayoutDesc, 
                          parameterArray=parameterArray, histogramArray=histoArray, rescaleColorMapper=True, arrayCompression=arrayCompression,aliasArray=aliasArray)



## Histogram array
* histogram user defined - X,Y
* number of bins user defined