# NDRegreesionDemo + Error of regression
Example usage of the ML interface/warppers to Random Forets, KNN and Keras
Goal MLpipeline.NDFunctionInterface:
* Using registered predefined method as a standard regression function
* Provide functionaltiy to calculate error estimators 

In this example we would like to demonstrate that regression error for fuction of interes are not constant
We choose pariodic function for regresion - to emulate TPC sector structure and radial dependent occupancy ...
```
f(A,B,C,D) = A+exp(3B)+sin(6.28C)
```

### **!!! NDFunctionInterface classes are  experimental. !!!**
* Implementation and interce wil change  - to be "pythonic"
* better integration with visulization 
* integration with trees

In [None]:
import ROOT
import pandas as pd
import numpy as np
from root_pandas import *
from RootInteractive.InteractiveDrawing.bokeh.bokehDrawSA import *
from RootInteractive.MLpipeline.NDFunctionInterface import  DataContainer, Fitter
from bokeh.io import output_notebook
output_notebook()

# Generate random panda+tree
* generate random vectors A,B,C,D
* generate function value = A+exp(3*B)+sin(6.28*C)
* generate noise vector

In [None]:
npoints=10000
df = pd.DataFrame(np.random.random_sample(size=(npoints, 4)), columns=list('ABCD'))
df["noise"]=np.random.normal(0, 0.1, npoints)
df["csin"]=np.sin(6.28*df["C"])
df["valueOrig"]=df["A"]+np.exp(3*df["B"])*df["csin"]
df["value"]=df["valueOrig"]+df["noise"]
df.to_root('ABCD.root',"ABCD")
f= ROOT.TFile("ABCD.root")
tree=f.Get("ABCD")
#tree.Show(0)

# Interactive visualization of input function
* function value = A+exp(3B)+sin(6.28C) as function of A, B, C and csin

In [None]:
tooltips=[("A","@A"), ("B","@B"), ("C","@C")]
figureArray= [
    [['A'], ['valueOrig'], {"size": 2, "colorZvar":"csin"}],
    [['B'], ['valueOrig'], {"size": 2, "colorZvar":"csin"}],
    ['table']
]
widgets="query.custom(), slider.A(0,1,0.1,0,1), slider.B(0,1,0.1,0,1), slider.C(0,1,0.1,0,1), slider.csin(-1,1,0.1,-1,1)"
figureLayout: str = '((0,1),(2, plot_height=150),commonY=1, x_visible=1,y_visible=0,plot_height=300,plot_width=1200)'
fig=bokehDrawSA.fromArray(df, "A>0", figureArray,widgets,tooltips=tooltips, layout=figureLayout)    

## Make fits
* Register fitters and make a fit
* Regirtartion of fitters will be more pythonic soon

In [None]:
varFit='value'
variableX= ['A',"B", "C",'D']
dataContainer = DataContainer(df, variableX, varFit, [npoints//2,npoints//2])
fitter = Fitter(dataContainer)

fitter.Register_Method('KNN','KNeighbors', 'Regressor')
fitter.Register_Method('RF','RandomForest', 'Regressor', n_estimators=100, max_depth=10)
fitter.Register_Method('RF200','RandomForest', 'Regressor', n_estimators=200, max_depth=10)
fitter.Register_Method('KM','KerasModel', 'Regressor', layout = [50, 50, 50], epochs=100, dropout=0.2)
fitter.Fit()
test=dataContainer.Test_sample

# Register outputs

In [None]:
#fitter.Compress('KM')
for method in ['RF', 'KNN', 'RF200', 'KM']: 
    test = fitter.AppendOtherPandas(method,test)
fitter.AppendStatPandas("RF",test)
fitter.AppendStatPandas("RF200",test)
test=SetAlias(test,"pullRF","(RF-value)/RFRMS")

## Visualize residuals as function of tru value
* compariing different regression methods

In [None]:
figureArray= [
    [['valueOrig'], ['RF-valueOrig'],  {"size": 2, "colorZvar":"B"}],
    [['valueOrig'], ['KM-valueOrig'],  {"size": 2, "colorZvar":"B"}],
    [['valueOrig'], ['KNN-valueOrig'], {"size": 2, "colorZvar":"B"}],
    [['valueOrig'], ['KM-valueOrig'],  {"size": 2, "colorZvar":"B"}],
    ['table']
]
widgets="query.custom(), slider.A(0,1,0.1,0,1), slider.B(0,1,0.1,0,1), slider.sinC(0,1,0.1,0,1)"
figureLayout: str = '((0,1),(2,3),(4, plot_height=100),commonY=1, commonX=0, x_visible=1,y_visible=0,plot_height=250,plot_width=1200)'
output_file("NDFunctionInterace_0.html")   
fig=bokehDrawSA.fromArray(test, "A>0", figureArray,widgets,tooltips=tooltips, layout=figureLayout)  

# Visualize residuals and correlation of residuals

In [None]:
tooltips=[("A","@A"), ("B","@B"), ("C","@C")]
figureArray= [
    [['csin'], ['RF-valueOrig'], {"size": 2, "colorZvar":"B"}],
    [['csin'], ['RFMedian-valueOrig'], {"size": 2, "colorZvar":"B"}],
    [['csin'], ['KM-valueOrig'], {"size": 2, "colorZvar":"B"}],
    [['csin'], ['KNN-valueOrig'], {"size": 2, "colorZvar":"B"}],
    [['RF-valueOrig'], ['KNN-valueOrig'], {"size": 2, "colorZvar":"B"}],
    [['RF-valueOrig'], ['KM-valueOrig'], {"size": 2, "colorZvar":"B"}],
    ['table']
]
widgets="query.custom(), slider.A(0,1,0.1,0,1), slider.B(0,1,0.1,0,1), slider.C(0,1,0.1,0,1)"
figureLayout: str = '((0,1,2,3),(4,5, commonX=4),(6, plot_height=150),commonY=1, commonX=0, x_visible=1,y_visible=0,plot_height=250,plot_width=1400)'
output_file("NDFunctionInterace_1.html")
fig=bokehDrawSA.fromArray(test, "A>0", figureArray,widgets,tooltips=tooltips, layout=figureLayout)  

In [None]:
# Visualize normalized residuals and correlation of residuals

In [None]:
tooltips=[("A","@A"), ("B","@B"), ("C","@C")]
figureArray= [
    [['csin'], ['(RF-valueOrig)/RFRMS'], {"color": "red", "size": 2, "colorZvar":"B"}],
    [['csin'], ['(KM-valueOrig)/RFRMS'], {"color": "red", "size": 2, "colorZvar":"B"}],
    [['csin'], ['(KNN-valueOrig)/RFRMS'], {"color": "red", "size": 2, "colorZvar":"B"}],
    [['RF-valueOrig'], ['KNN-valueOrig'], {"color": "red", "size": 2, "colorZvar":"B"}],
    [['RF-valueOrig'], ['KM-valueOrig'], {"color": "red", "size": 2, "colorZvar":"B"}],
    ['table']
]
widgets="query.custom(), slider.A(0,1,0.1,0,1), slider.B(0,1,0.1,0,1), slider.C(0,1,0.1,0,1), slider.pullRF(-6,6,0.1,-6,6), slider.csin(-1,1,0.1,-1,1)"
figureLayout: str = '((0,1,2),(3,4, commonX=4),(5, plot_height=150),commonY=1, commonX=0, x_visible=1,y_visible=0,plot_height=250,plot_width=1400)'
output_file("NDFunctionInterace_2.html")
fig=bokehDrawSA.fromArray(test, "A>0", figureArray,widgets,tooltips=tooltips, layout=figureLayout) 

In [None]:
test['pullRF'].plot.hist(bins=100)

In [None]:
test['RFRMS'].plot.hist(bins=100)

In [None]:
drawColzArray(test,"A>0","RF", "RFRMS","csin",None,doDraw=1,tooltips=tooltips)