In [614]:
%matplotlib inline

from __future__ import absolute_import, print_function, unicode_literals, division
from sklearn.datasets import fetch_mldata
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import itertools
import random as rand
import copy

In [615]:
def mat_dbg(x):
    print(x.shape, ": \n", x)

In [684]:
class MLP(object):
    """
    NOTE: Matrix operations are modified from RBM file
    In particular, we have an an an input of n data points with
    dimension d as a d-by-n matrix
    """
    def __init__(self, data, num_hidden, learn_rate):
        # First row are the target values
        self.targets = data[0]
        self.data = data[1:,:]
        self.num_data = data.shape[1]
        self.num_visible = data.shape[0] - 1
        self.num_hidden = num_hidden
        self.learn_rate = learn_rate
        self.state_size = 2 * (np.amax(self.targets) - \
                               np.amin(self.targets)) + 4

        self.hid_wts = 2 * np.random.rand(self.num_visible + 1, \
                                          self.num_hidden) - 1
        self.out_wts = 2 * np.random.rand(self.num_hidden,1) - 1
        self.data = np.insert(self.data, 0, 1, axis = 0)
    
    def _sigmoid(self,x):
        return 1.0/(1.0+np.exp(-x))
    
    def _activate(self,mat1,mat2):
        activated = self._sigmoid(np.dot(mat1,mat2))
        return activated
        
    def _activated_hidden(self,mat):
        activated = self._activate(self.hid_wts.T,mat)
        return activated
    
    def _activated_out(self,mat):
        activated = self._activate(self.out_wts.T,mat)
        return activated
    
    def _gradient(self,mat):
        w = copy.deepcopy(mat)
        for elt in np.nditer(w,op_flags=['readwrite']):
            elt[...] = elt - elt * elt
        return w
    
    def train(self, iterations=3):
        data_copy = copy.deepcopy(self.data)
        for j in range(0,iterations):
            for i in range(0,self.num_data):

                """Feed-forward phase"""
                data_colT = data_copy[:,i]
                data_col = np.reshape(data_colT, (-1, 1))

                activated_hid = (self._activated_hidden(data_col))
                activated_hid = 2 * activated_hid - 1
                activated_guess = (self._activated_out(activated_hid))
                activated_guess = self.state_size * (2 * activated_guess - 1)
                print("Hid outputs: \n", activated_hid, \
                      "\n Guess: ", activated_guess, \
                      " and target: ", self.targets[i])

                """Error calculation phase"""
                error_out = self.targets[i] - activated_guess
                #print("error_out: ", error_out)

                """Back-propagation phase"""
                grad_out = (self._gradient(activated_guess))
                grad_hid = (self._gradient(activated_hid))
                #print("Grad out: \n", grad_out)
                #print("Grad hid: \n", grad_hid)

                print("PREVIOUS self.out_wts: \n",self.out_wts)
                print("PREVIOUS selt.hid_wts: \n",self.hid_wts)
                c_out = np.dot(error_out,self.learn_rate)
                #print("c_out:", c_out)
                wt_change_out = c_out * grad_out * activated_hid
                print("Wt_change_out: \n", wt_change_out)
                #print("Wt_change check: ",activated_guess * (1 - activated_guess) * \
                #     self.learn_rate * error_out * activated_hid)
                self.out_wts = wt_change_out + self.out_wts
                #print("NEW self.out_wts: \n",self.out_wts)
                
                ### works until here ###
                #print("OLD self.hid_wts: \n", self.hid_wts)
                #print("c_out: \n", c_out)
                error_hid = np.dot(self.out_wts,c_out)
                
                #print("Out errors: \n", error_hid)
                #print("grad_hid: ",grad_hid)
                coeffs = grad_hid * error_hid
                #print("Coeffs: \n", coeffs)
                #print("data_col: \n",data_col)
                data_tile = np.tile(data_col,(1,self.num_hidden))
                wt_changes_hid = np.multiply(coeffs.T,data_tile)
                #print("data tile: \n", data_tile)
                print("wt_changes_hid: \n", wt_changes_hid)
                self.hid_wts = wt_changes_hid + self.hid_wts
    
    def pred(self, test):
        test_copy = copy.deepcopy(test)
        test_copy = np.insert(test_copy, 0, 1, axis = 0)
        pred_p = self._activated_hidden(test_copy)
        pred_p = 2 * pred_p - 1
        pred = self._activated_out(pred_p)
        pred = pred
        return pred


In [685]:
# See www.cse.unsw.edu.au/~cs9417ml/MLP2/
# See https://www.hiit.fi/u/ahonkela/dippa/node41.html

# Generate training data (no noise)
dims = 4
data_pts = 5 * dims
sample_data = np.sign(2*np.random.rand(dims,data_pts)-1)
sample_data[0] = (4 * sample_data[1] ** 2 + 1 * sample_data[2]) 
# Need to write binary target values for data as well for MLP
# mat_dbg(sample_data)
MLP_1 = MLP(sample_data,5,0.1) #0.035 threshold
print("Weights^T: \n",MLP_1.hid_wts.T)
print("Targets: \n",MLP_1.targets)
print("Data sample: \n",MLP_1.data[1:5])

Weights^T: 
 [[ 0.91073838  0.66223424 -0.56572209  0.49993972]
 [-0.51753814 -0.57708492 -0.61650502  0.5224684 ]
 [ 0.94653373  0.44084231 -0.96403309 -0.27314992]
 [-0.47302018  0.23060709 -0.13275349 -0.59255667]
 [-0.33124172  0.23198089 -0.83630172  0.99613071]]
Targets: 
 [ 5.  3.  5.  3.  3.  5.  3.  5.  3.  5.  3.  3.  3.  5.  3.  5.  3.  5.
  3.  5.]
Data sample: 
 [[-1. -1.  1. -1. -1. -1. -1.  1.  1.  1.  1. -1.  1. -1. -1.  1. -1.  1.
  -1. -1.]
 [ 1. -1.  1. -1. -1.  1. -1.  1. -1.  1. -1. -1. -1.  1. -1.  1. -1.  1.
  -1.  1.]
 [ 1. -1.  1. -1.  1. -1.  1.  1. -1. -1.  1.  1. -1. -1.  1.  1.  1.  1.
  -1. -1.]]


In [686]:
MLP_1.train(1)
print("Weights: \n", MLP_1.out_wts)
print("Targets: \n", MLP_1.targets)

Hid outputs: 
 [[ 0.09110754]
 [-0.0172432 ]
 [-0.35026501]
 [-0.6134714 ]
 [-0.19900548]] 
 Guess:  [[ 1.26247906]]  and target:  5.0
PREVIOUS self.out_wts: 
 [[ 0.71843782]
 [ 0.18564213]
 [-0.40863517]
 [-0.4346778 ]
 [ 0.77267897]]
PREVIOUS selt.hid_wts: 
 [[ 0.91073838 -0.51753814  0.94653373 -0.47302018 -0.33124172]
 [ 0.66223424 -0.57708492  0.44084231  0.23060709  0.23198089]
 [-0.56572209 -0.61650502 -0.96403309 -0.13275349 -0.83630172]
 [ 0.49993972  0.5224684  -0.27314992 -0.59255667  0.99613071]]
Wt_change_out: 
 [[-0.01128384]
 [ 0.0021356 ]
 [ 0.04338097]
 [ 0.07597956]
 [ 0.0246472 ]]
wt_changes_hid: 
 [[ 0.0218859  -0.00123104  0.06456462  0.13269925 -0.07110594]
 [-0.0218859   0.00123104 -0.06456462 -0.13269925  0.07110594]
 [ 0.0218859  -0.00123104  0.06456462  0.13269925 -0.07110594]
 [ 0.0218859  -0.00123104  0.06456462  0.13269925 -0.07110594]]
Hid outputs: 
 [[ 0.1558624 ]
 [ 0.07664111]
 [ 0.70210361]
 [ 0.01084102]
 [-0.34655729]] 
 Guess:  [[-1.62524598]]  and 

In [677]:
dims = 3
test_pts = 3
test_data = np.sign(2*np.random.rand(dims,test_pts)-1)
print("Test data: \n",test_data)
print("Shape:",np.shape(a))
print("targets: \n", 4 * test_data[0] ** 2 + 1 * test_data[1] )
guesses = MLP_1.pred(test_data)
print("Guesses: \n", guesses)

Test data: 
 [[-1.  1.  1.]
 [-1. -1. -1.]
 [ 1. -1.  1.]]
Shape: (4, 1)
targets: 
 [ 3.  3.  3.]
Guesses: 
 [[ 0.  0.  0.]]


In [493]:
-1.78 * (2.78) * 1 / 10 * 6.78

-3.3550152

In [585]:
a = np.array([1,2,3,4])
np.tile(a,(5,1)).T

array([[1, 1, 1, 1, 1],
       [2, 2, 2, 2, 2],
       [3, 3, 3, 3, 3],
       [4, 4, 4, 4, 4]])

In [619]:
a = np.array([[1],[2],[3],[4]])
b = np.array([0, 1, 2])
np.multiply(a,b)

array([[0, 1, 2],
       [0, 2, 4],
       [0, 3, 6],
       [0, 4, 8]])

In [638]:
np.tile(a,(1,5))

array([[1, 1, 1, 1, 1],
       [2, 2, 2, 2, 2],
       [3, 3, 3, 3, 3],
       [4, 4, 4, 4, 4]])