# Introduction to RBF networks

The objective of this notebook is to introduce the reader to radial basis function feedforward neural networks.

In [1]:
import sklearn
sklearn.__version__

'0.19.dev0'

In [2]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [3]:
import os
import sys
import inspect

currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
parent =  os.path.dirname(currentdir) + '/RBFClassifier'
sys.path.insert(0,parent)

import numpy as np

In [4]:
from sklearn import datasets

In [5]:
digits = datasets.load_digits()
X = digits.data
y = digits.target

### Parameters of the RBF network

In [6]:
import rbf_classifier_classic
from rbf_classifier_classic import RBFClassifier as RBFClassifierClassic

In [7]:
rbf_classic = RBFClassifierClassic(n_hidden_basis=20)

In [8]:
rbf_classic.get_params()

{'KMeans_flag': True,
 'n_hidden_basis': 20,
 'random_state': 1234,
 'stratified_basis_selection': True}

## Train RBF network

In [9]:
X.shape, y.shape

((1797, 64), (1797,))

In [10]:
rbf_classic.fit(X, y)

RBFClassifier(KMeans_flag=True, n_hidden_basis=20, random_state=1234,
       stratified_basis_selection=True)

### Making predictions

In [11]:
rbf_classic.forward_propagation(X[0:20]).shape

(20, 10)

In [12]:
rbf_classic.predict(X[0:5])

array([0, 1, 1, 3, 4])

In [13]:
rbf_classic.forward_propagation(X[0:2])

array([[  4.42883191e-02,   1.70678140e-16,  -7.19906504e-15,
         -2.44531610e-17,   5.08643382e-17,  -5.14807425e-16,
         -6.38062659e-18,  -7.10508186e-17,  -3.99527099e-17,
         -1.72322259e-13],
       [ -5.07590911e-25,   1.19133978e-10,  -5.85741548e-25,
         -1.13565820e-23,   6.12177380e-27,  -4.19126122e-24,
         -1.60854356e-25,  -2.71797290e-24,  -8.51515975e-19,
         -1.22041590e-23]])

In [14]:
rbf_classic.predict_proba(X[0:2])

array([[  1.00000000e+00,   3.85379583e-15,  -1.62549972e-13,
         -5.52135676e-16,   1.14848202e-15,  -1.16240001e-14,
         -1.44070191e-16,  -1.60427896e-15,  -9.02104904e-16,
         -3.89091893e-12],
       [ -4.26067291e-15,   1.00000001e+00,  -4.91666239e-15,
         -9.53261381e-14,   5.13856241e-17,  -3.51810735e-14,
         -1.35019714e-15,  -2.28144225e-14,  -7.14754927e-09,
         -1.02440624e-13]])

In [15]:
rbf_classic.score(X,y)

0.9003895381190874

### Gridsearch n_hidden_basis

In [16]:
from sklearn.model_selection import GridSearchCV

In [21]:
param_grid = {"n_hidden_basis": list(range(10,100,2))}

In [22]:
rbf_classic.get_params()

{'KMeans_flag': True,
 'n_hidden_basis': 50,
 'random_state': 1234,
 'stratified_basis_selection': True}

In [23]:
rbf_classic = RBFClassifierClassic()

In [24]:
grid_rbf_classic = GridSearchCV(estimator=rbf_classic, 
                               param_grid=param_grid,
                               n_jobs=-1)

In [25]:
grid_rbf_classic.fit(X,y)

GridSearchCV(cv=None, error_score='raise',
       estimator=RBFClassifier(KMeans_flag=True, n_hidden_basis=50, random_state=1234,
       stratified_basis_selection=True),
       fit_params={}, iid=True, n_jobs=1,
       param_grid={'n_hidden_basis': [10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98]},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring=None, verbose=0)

In [44]:
grid_rbf_classic.cv_results_.keys()

dict_keys(['mean_score_time', 'split1_test_score', 'split2_train_score', 'std_score_time', 'mean_test_score', 'split1_train_score', 'std_fit_time', 'split0_test_score', 'std_test_score', 'rank_test_score', 'param_n_hidden_basis', 'split0_train_score', 'mean_train_score', 'mean_fit_time', 'std_train_score', 'split2_test_score', 'params'])


## Supervised learning for all weights


Let us consider a mapping from vectors in a $d$ dimensional space to an $l$ dimensional space.
$$
F : \mathbb{R}^d \longrightarrow \mathbb{R}^l
$$

This function will be computed as a composition of functions. We will consider $M$ radial basis functions 
$$
F : \mathbb{R}^d  \longrightarrow  \mathbb{R}^M  \longrightarrow \mathbb{R}^l
$$

We will consider the $i$^th output component of our model to be defined as

$$
F_i (\textbf{x}) = \sum_{\mu =1}^M w_{ji} \cdot h( \| \textbf{x} -\textbf{c}_j  \|) + b_{i} \,\,\,\,\,\,\,\, i \in \{ 1, \dots, l\}
$$

#### Computing the gradient of the loss 




#### Updates of the parameters
Update of the weights and centroids of an RBF network

$$
\Delta w_{jp} = \eta \sum_{\mu =1}^M h_j (\textbf{x}^\mu) (y_k^\mu - F_k^\mu (\textbf{x}^\mu) )
$$
