# Here I implement Radial Basis Function Networks

In [87]:
# This is taken from 
# http://www.rueckstiess.net/research/snippets/show/72d2363e
# and modified subsequently to allow for the Least Squares algorithm to build the hidden layer.

#from scipy import *
from scipy.linalg import norm, pinv
from matplotlib import pyplot as plt
import numpy as np

class RBF:
     
    def __init__(self, indim, centers, betas, outdim,):
        self.indim = indim
        self.outdim = outdim
        numCenters = len(centers)
        if numCenters != len(betas):
            print 'number of centers and betas must be the same.'
#        self.numCenters = numCenters
        self.numCenters = numCenters
#        self.centers = [random.uniform(-1, 1, indim) for i in xrange(numCenters)]
        self.centers = centers
        self.beta    = betas
#        self.beta = 8
        self.W = np.random.rand(self.numCenters, self.outdim)
         
    def _basisfunc(self, c, d, bval):
        assert len(d) == self.indim
        return np.exp(-1 * norm(bval*(c-d)**2))
      
     
    def _calcAct(self, X):
        # calculate activations of RBFs
#        G = np.zeros((np.array(X).shape[0], self.numCenters), float)
        G = np.zeros([len(X), self.numCenters])
        for ci, c in enumerate(self.centers):
            beta_ci = self.beta[ci]
            for xi, x in enumerate(X):
                G[xi,ci] = self._basisfunc(c, x, beta_ci)
        return G
     
    def train(self, X, Y):
        """ X: matrix of dimensions n x indim 
            y: column vector of dimension n x 1
            idx: indices of training set objects acting as prototypes """
         
        # choose random center vectors from training set
        #rnd_idx = random.permutation(X.shape[0])[:self.numCenters]
        #self.centers = [X[i,:] for i in rnd_idx]
        #self.centers = [X[i,:] for i in idx] 
        #print "center", self.centers
        # calculate activations of RBFs
        G = self._calcAct(X)
        print G
         
        # calculate output weights (pseudoinverse)
        self.W = np.dot(pinv(G), Y)
         
    def test(self, X):
        """ X: matrix of dimensions n x indim """
         
        G = self._calcAct(X)
        Y = np.dot(G, self.W)
        return Y
 
 


In [88]:
#load databases
#DataDir = './data/'
DataDir = '.'
import pickle
dset = pickle.load(open('%s/training_full.data'%DataDir))
print dset.keys()


featnames = ['dm_j0660','J0378','J0395','J0410','J0430','J0515',
             'J0660','J0861','uSDSS','gSDSS','rSDSS','iSDSS','zSDSS']

ntrain = len(dset['obj'])
feat_arr = []
err_arr = []
import itertools

colfeats = ['uSDSS','gSDSS','rSDSS','iSDSS','zSDSS']

terms = colfeats # create all colour combinations
nterms = len(terms)
ncomb = int(nterms*(nterms-1)/2.)
print 'All colours:', ncomb
comb = list(itertools.combinations(terms,2))
lcomb = list(comb)
colournames = ['%s - %s'%(x[0], x[1]) for x in list(comb)]

for x in range(ntrain):
    fx = []
    ex = []
    for y in featnames:
        fx.append(dset[y][x,0])
        ex.append(dset[y][x,1])
    for z in range(ncomb):
        fx.append(dset[lcomb[z][0]][x,0] - dset[lcomb[z][1]][x,0])
        ex.append(np.sqrt((dset[lcomb[z][0]][x,1])**2 + (dset[lcomb[z][1]][x,1])**2))
    
    feat_arr.append(fx)
    err_arr.append(ex)
class_arr = dset['class']
featnames += colournames
print featnames, len(featnames)
nfeat = len(featnames)



['rSDSS', 'iSDSS', 'obj', 'gSDSS', 'J0395', 'zSDSS', 'J0378', 'J0430', 'uSDSS', 'dm_j0660', 'J0660', 'J0410', 'J0515', 'J0861', 'class']
All colours: 10
['dm_j0660', 'J0378', 'J0395', 'J0410', 'J0430', 'J0515', 'J0660', 'J0861', 'uSDSS', 'gSDSS', 'rSDSS', 'iSDSS', 'zSDSS', 'uSDSS - gSDSS', 'uSDSS - rSDSS', 'uSDSS - iSDSS', 'uSDSS - zSDSS', 'gSDSS - rSDSS', 'gSDSS - iSDSS', 'gSDSS - zSDSS', 'rSDSS - iSDSS', 'rSDSS - zSDSS', 'iSDSS - zSDSS'] 23


In [89]:

Xt = feat_arr
y  = dset['class']
def class_to_int(istr):
    if istr == 'Halpha':
        return 0
    elif istr == 'OIII+Hbeta':
        return 1
    if istr == 'OII':
        return 2
    elif istr == 'contaminant':
        return 3
    else:
        print '%s not recognised'%istr
        return -99


y_train_int = [class_to_int(x) for x in y]

from sklearn.preprocessing import StandardScaler 
Scaledata = True
if Scaledata:
    print 'scaling data...',
    scaler = StandardScaler()
    scaler.fit(Xt)
    Xt = scaler.transform(Xt)
    #x_test  = scaler.transform(x_test)
    print 'done'



# Choose prototypes

idarr = np.random.permutation(np.arange(0,ntrain))[0:100]
mu_arr = np.array(Xt)[idarr]
beta_arr = np.array(err_arr)[idarr]
outdim = len(np.unique(y_train_int))


rbf = RBF(nfeat,mu_arr,beta_arr,outdim)
rbf.train(Xt,y_train_int)
z = rbf.test(Xt)
    
    

scaling data... done
[[5.25298794e-02 4.35289026e-01 3.27869612e-04 ... 1.60234123e-01
  9.03021032e-08 1.15942836e-04]
 [4.79513497e-01 4.37465120e-01 2.27866256e-02 ... 3.36155067e-01
  2.50722033e-03 3.64846613e-01]
 [2.21052855e-01 3.12859859e-01 1.25934180e-02 ... 2.60498658e-01
  3.42272847e-01 1.26128412e-01]
 ...
 [5.85889347e-01 2.91704683e-01 8.29542320e-02 ... 1.34150075e-02
  6.37380527e-13 2.12435927e-02]
 [6.23096252e-01 6.97661681e-01 1.54634346e-01 ... 2.40135532e-01
  1.58722580e-06 1.12129762e-01]
 [4.58814981e-01 4.21382797e-01 2.11972654e-01 ... 2.69718786e-01
  1.27934293e-04 5.07625907e-01]]


In [91]:
pred = [np.round(x) for x in z]
gt3 = pred >3
pred[gt3] = 3 # those that were rounded to 4 are set to 3

from sklearn.metrics import accuracy_score
err = 1 - accuracy_score(y_train_int, pred)
print 'Error rate', err



Error rate 0.13315579227696406


> [0;32m<ipython-input-55-d1536fa401d1>[0m(40)[0;36m_calcAct[0;34m()[0m
[0;32m     38 [0;31m        [0;32mfor[0m [0mci[0m[0;34m,[0m [0mc[0m [0;32min[0m [0menumerate[0m[0;34m([0m[0mself[0m[0;34m.[0m[0mcenters[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0m
[0m[0;32m     39 [0;31m            [0;32mfor[0m [0mxi[0m[0;34m,[0m [0mx[0m [0;32min[0m [0menumerate[0m[0;34m([0m[0mX[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0m
[0m[0;32m---> 40 [0;31m                [0mG[0m[0;34m[[0m[0mxi[0m[0;34m,[0m[0mci[0m[0;34m][0m [0;34m=[0m [0mself[0m[0;34m.[0m[0m_basisfunc[0m[0;34m([0m[0mc[0m[0;34m,[0m [0mx[0m[0;34m,[0m [0mci[0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m     41 [0;31m        [0;32mreturn[0m [0mG[0m[0;34m[0m[0m
[0m[0;32m     42 [0;31m[0;34m[0m[0m
[0m
ipdb> print c,x,ci
[ 0.4542976 22.5581036 21.820425  22.2046375 21.6205902 21.3017254
 20.7645779 20.5660629 22.07925   21.97476   21.28776   21.23176
 21.13188    