# Regression Wavelet Forest:

In regression wavelet forest, training data is passed through a tree where every node has a small neural network that returns a probability that the sample 'belongs' to this node. The probability at each level is a multlipication of the node on that level and the nodes leading to this level: <br><br>
$\mu_l(x_i) = \Pi_m\mu_m(x_i)$
<br><br>
The valule in each node is determined by a 'vote' of the samples:
<br><br>
$\hat{y}=\sum_i\mu_l(x_i)y_i$

The prediction value is then determined by a 'vote' of the nodes: <br><br>
$\tilde{y_i} = \sum_l\mu_l(x_i)\hat{y}_l$

In [1]:
%load_ext autoreload
%autoreload 2

%matplotlib inline

In [2]:
from func_gen import *
import sklearn
import torch
from params import parameters
import model_conf
import dataset_loader
import trainer
import matplotlib.pyplot as plt

conf = parameters()

x,y = step_gen(range = (0,60),step=0.1)
tdl,vdl = dl_maker(x,y,conf)
data = dataset_loader.DataBunch(tdl,vdl,c=1,features4tree=1)

loss_func =  torch.nn.MSELoss()

#########################################################
conf.tree_depth = 10
conf.epochs = 300
#########################################################

learn = model_conf.Learner(*model_conf.get_model(conf,data), loss_func, data)

device = torch.device('cuda',0)
torch.cuda.set_device(device)

run = trainer.Runner(cb_funcs=conf.cbfs,conf=None)
run.fit(conf.epochs, learn)

AttributeError: 'NoneType' object has no attribute 'intervals'

In [None]:
xx = learn.data.valid_dl.ds.x.numpy()
yy = learn.data.valid_dl.ds.y.numpy()
zz = run.recorder.tot_pred.cpu().numpy()
plt.scatter(xx, yy, marker='^')
plt.scatter(xx, zz, marker='*')


In [None]:
for i in range(2,7):
    conf.tree_depth = 2*i
    learn = model_conf.Learner(*model_conf.get_model(conf,data), loss_func, data)
    run.fit(conf.epochs, learn)
    zz = run.recorder.tot_pred.cpu().numpy()
    plt.scatter(xx, zz, marker='*')

plt.show()

In [None]:
from func_gen import *
import sklearn
import torch
from params import parameters
import model_conf
import dataset_loader
import trainer

conf = parameters()
conf.epochs = 100
conf.tree_depth = 15
conf.one_batch = True
x,y = step_gen(range = (0,100),step=0.1)
tdl,vdl = dl_maker(x,y,conf)
data = dataset_loader.DataBunch(tdl,vdl,c=1,features4tree=1)

loss_func =  torch.nn.MSELoss()

learn = model_conf.Learner(*model_conf.get_model(conf,data), loss_func, data)

device = torch.device('cuda',0)
torch.cuda.set_device(device)

run = trainer.Runner(cb_funcs=conf.cbfs)
run.fit(conf.epochs, learn)

In [None]:
import matplotlib.pyplot as plt
mu = run.mu
y_hat = run.y_hat
arg_mu = torch.argmax(mu, dim=1).cpu().numpy()
uni_mu = np.unique(arg_mu)
yh = y_hat[arg_mu].detach().cpu().numpy()
yh_orig = y_hat.detach().cpu().numpy()
plt.hist(arg_mu,bins=30)

uni_mu_dist = mu[:,uni_mu].cpu().numpy()

xx = learn.data.valid_dl.ds.x.numpy()
yy = learn.data.valid_dl.ds.y.numpy()
zz = run.recorder.tot_pred.cpu().numpy()

plt.scatter(xx, yh, marker='^')
plt.scatter(xx, yy, marker='*')
plt.scatter(xx, zz, marker='o')
plt.show()

for i in range(6):
    plt.scatter(xx, uni_mu_dist.T[i], marker='^')

plt.show()

### Loss vs. No. of batch iterations:

In [None]:
run.recorder.plot_loss()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

xx = learn.data.valid_dl.ds.x.numpy()
yy = learn.data.valid_dl.ds.y.numpy()
zz = run.recorder.tot_pred.cpu().numpy()

plt.scatter(xx, yy, marker='^')
plt.scatter(xx, zz, marker='*')

plt.show()

In [None]:
conf.epochs = 100
conf.one_batch = True
tdl,vdl = dl_maker(x,y,conf)
data = dataset_loader.DataBunch(tdl,vdl,c=1,features4tree=1)

loss_func =  torch.nn.MSELoss()

learn = model_conf.Learner(*model_conf.get_model(conf,data), loss_func, data)

device = torch.device('cuda',0)
torch.cuda.set_device(device)

run = trainer.Runner(cb_funcs=conf.cbfs)
run.fit(conf.epochs, learn)

In [None]:
run.recorder.plot_loss()

In [None]:
import matplotlib.pyplot as plt
import numpy as np

xx = learn.data.valid_dl.ds.x.numpy()
yy = learn.data.valid_dl.ds.y.numpy()
zz = run.recorder.tot_pred.cpu().numpy()

plt.scatter(xx, yy, marker='^')
plt.scatter(xx, zz, marker='*')

plt.show()

In [None]:
# conf.epochs = 1000
# run.fit(conf.epochs, learn)

# import matplotlib.pyplot as plt
# import numpy as np

# xx = learn.data.valid_dl.ds.x.numpy()
# yy = learn.data.valid_dl.ds.y.numpy()
# zz = run.recorder.tot_pred.cpu().numpy()

# plt.scatter(xx, yy, marker='^')
# plt.scatter(xx, zz, marker='*')

# plt.show()

In [None]:
conf.epochs = 100

In [None]:
mu = run.mu_leaves
y_hat = run.y_hat_leaves

In [None]:
arg_mu = torch.argmax(mu, dim=1).cpu().numpy()
uni_mu = np.unique(arg_mu)
arg_mu

### Distribution of most popular leafs:

In [None]:
plt.hist(arg_mu,bins=30)

### values of most popular leafs:

In [None]:
yh_orig[uni_mu]

In [None]:
yh = y_hat[arg_mu].detach().cpu().numpy()
yh_orig = y_hat.detach().cpu().numpy()

### Value of samples by most popular leaf:

In [None]:
plt.scatter(xx, yh, marker='^')
# plt.scatter(xx, yy, marker='*')
# plt.scatter(xx, zz, marker='o')
plt.show()

In [None]:
conf.epochs = 100
conf.tree_depth = 20

learn = model_conf.Learner(*model_conf.get_model(conf,data), loss_func, data)

run.fit(conf.epochs, learn)

import matplotlib.pyplot as plt
import numpy as np

xx = learn.data.valid_dl.ds.x.numpy()
yy = learn.data.valid_dl.ds.y.numpy()
zz = run.recorder.tot_pred.cpu().numpy()

plt.scatter(xx, yy, marker='^')
plt.scatter(xx, zz, marker='*')

plt.show()

In [None]:
uni_mu

In [None]:
uni_mu_dist = mu[:,uni_mu].cpu().numpy()

In [None]:
uni_mu_dist.T

### Distribution of most popular nodes over all samples:

In [None]:
for i in range(4):
    plt.scatter(xx, uni_mu_dist.T[i], marker='^')

plt.show()

In [None]:
mu.size()

In [None]:
len(yh_orig)

### Distribution of arbitrary samples over nodes:

In [None]:
for i in range(10):
    j = i*33
    plt.scatter(j+100*mu[i*33,:].cpu().numpy(),np.arange(0,64), marker='*')

plt.show()

### Value of nodes of arbitrary samples vs. probability:

In [None]:
for i in range(10):
    j = i*33
    plt.scatter(mu[i*33,:].cpu().numpy(),yh_orig, marker='*')

plt.show()

In [None]:
plt.scatter(xx, yh, marker='^')
plt.scatter(xx, yy, marker='*')
plt.scatter(xx, zz, marker='o')
plt.show()