In [1]:
import numpy as np
from scipy.special import expit as expit

In [2]:
id = np.identity(8)
np.random.seed(50)

In [3]:
id

array([[1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0., 0., 0., 0.],
       [0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 1., 0., 0., 0., 0.],
       [0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 1., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 0., 0., 0., 1.]])

In [4]:
def activation(x, dx = False):
    '''
    Sigmiod based activation function - as discussed by Mike Keiser in class
    Input: value to be 'activated' can be int, float or array, boolean of if we want the derivative or not
    Output: sigmoid activation of input, derivative if requested
    using expit from scipy to prevent runtime overflow errors
    '''
    sig = expit(x + 1) 
    if dx:
        return sig*(1-sig)
    else:
        return sig
    
    

In [5]:
seqs = id

In [6]:
num_hidden = 3
rounds = 100000
output_dim = 8
classes = id

num_seqs = len(seqs)
input_dim = len(seqs[0])

learn_rate = 0.01
reg_term = .1

In [20]:
def create_model(seqs, classes, num_hidden, rounds, input_dim, output_dim, learn_rate, reg_term):
    # initialize needed values
    weight1 = 2*np.random.randn(input_dim, num_hidden) - 1
    weight2 = 2*np.random.randn(num_hidden, output_dim) - 1
    model = dict()
    err = []
    
    for i in range(rounds):
        lay0 = seqs
        lay1 = activation(x=np.dot(lay0, weight1), dx = False) # hidden layer 
        lay2 = activation(x = np.dot(lay1, weight2), dx = False) # output layer
    
        l2_error = classes - lay2
        l2_change = l2_error*activation(x = lay2, dx = False)

        #backpropogation for gradient descent
        l1_error = l2_change.dot(weight2.T)
        l1_change = l1_error*activation(x = lay1, dx = True)

        # update weights

        weight1 += learn_rate * (lay0.T.dot(l1_change) + reg_term*weight1)
        weight2 += learn_rate * (lay1.T.dot(l2_change) + reg_term*weight2)

        if i%1000 == 0:
            e = np.mean(np.abs(l2_error))
            err.append(e)
        
    model['weight1'] = weight1
    model['weight2'] = weight2
    
    return model, lay2.round(4), err

In [27]:
m = create_model(seqs, classes, num_hidden, rounds, input_dim, output_dim, learn_rate = 0.01, reg_term = 0)

In [28]:
m

({'weight1': array([[   0.288697  ,   47.55133313, -109.27364242],
         [   0.4443881 , -139.98940266,   51.93564548],
         [ -45.96668091,   24.17869344,   27.75433592],
         [  57.05144575,   -5.50054465,   -1.4377844 ],
         [  -2.61998813,   -2.74337811,   -2.75052247],
         [  -2.39410477,   -1.58148938,   52.37299862],
         [  59.04533912,   -1.37319996,   -6.57323331],
         [  -2.44231327,   55.43207698,   -1.56619847]]),
  'weight2': array([[ -7.72150119,  -7.40310972, -32.19363437,   6.67586203,
           -3.28448318, -21.99106071,   6.53528064, -22.52094152],
         [  8.25010861, -31.83842327,   1.05179388, -28.14145855,
           -2.64776335, -17.61930536, -10.13430208,  12.717199  ],
         [-31.34426065,   8.25455559,   1.35303629, -10.4244516 ,
           -2.60895041,  12.79784816, -28.72763058, -17.49579587]])},
 array([[0.9607, 0.    , 0.    , 0.    , 0.0144, 0.    , 0.0178, 0.0191],
        [0.    , 0.9632, 0.    , 0.0176, 0.0138, 0.0