In [157]:
from scipy.io import loadmat
import numpy as np

In [None]:
def sigmoid(x):
    return 1.0/(1+np.exp(-x))

def sigmoid_derivative(x):
    return x*(1.0-x)

def softmax(x):
    ex = np.exp(x)
    return ex/np.sum(ex, axis=1, keepdims=True)

In [159]:
# Part (b)
ion_train = loadmat("Ion.trin.mat")
ion_test = loadmat("Ion.test.mat")

In [160]:
tst = np.zeros((4,1))
tst[2][0]

0.0

In [164]:
# Part (a)
class SingleLayerNN:
    def __init__(self, X, y, num_classes = 0, 
                 hidden = 0, decay = 0.0, epochs = 50, 
                 seed = 314152, l2 = 0.01, learning_rate = 0.1):
        self.X = X
        self.y = y
        self.epochs = epochs
        self.seed = seed
        self.l2 = l2
        self.lrate = learning_rate
        self.decay = 0.0
    
        d, n = X.shape
        if hidden == 0:
            self.hidden = n + 1
        else:
            self.hidden = hidden
            
        if num_classes == 0:
            self.k = len(np.unique(y))
        else:
            self.k = num_classes
            
        # Setting a seed to get consistent results when testing
        np.random.seed(self.seed)

        # Initializing random weights and biases
        self.w = np.random.randn(d, self.hidden)/np.sqrt(d)
        self.b = np.zeros((1, self.hidden))
        ## output layer weights/biases
        self.wy = np.random.randn(self.hidden, self.k)/np.sqrt(self.hidden)
        self.by = np.zeros((1,self.k))
        
        # Storing the activations
        self.hidden_activation = np.zeros((n, self.hidden))
        self.output = np.zeros((n, self.k))
        
    def feed_forward(self, input_set):
        hidden_layer = np.dot(input_set.T, self.w) + self.b
        self.hidden_activation = np.tanh(hidden_layer)
        output_layer = np.dot(self.hidden_activation, self.wy) + self.by
        output_activation = softmax(output_layer)
        return output_activation
    
    def fit(self):
        error_rates = [0] * self.epochs
        error = 0.0
        
        for i in range(self.epochs):
            # Shuffling the dataset
            d, n = self.X.shape
            dataset = np.append(self.X, self.y.T, axis = 0)
            np.random.shuffle(dataset.T)
            input_set = dataset[:d]
            targets = np.reshape(dataset[d], (n,1)).astype(int)
            self.output = self.feed_forward(input_set)
            
            # Backpropagation
            delta = self.output
            delta[range(n), targets] = 1 - delta[range(n), targets]
            hidden_gradient = sigmoid_derivative(self.hidden_activation)
            output_gradient = sigmoid_derivative(self.output)
            output_delta = np.dot(self.hidden_activation, delta)
            err_hidden = np.dot(output_delta, self.wy.T) 
            hidden_delta = err_hidden * hidden_gradient
            ## Regularization
            hidden_reg = self.l2 * self.w
            output_reg = self.l2 * self.wy
            ## Update weights
            self.wy -= self.lrate * (np.dot(self.hidden_activation.T, output_delta) +
                                     output_reg)
            self.w -= self.lrate * (np.dot(input_set, hidden_delta) +
                                    hidden_reg)
            ## Update biases
            self.by -= self.lrate * np.sum(output_delta, axis=0, keepdims=True) 
            self.b -= self.lrate * np.sum(hidden_delta, axis=0, keepdims=True)
            self.lrate *= 1.0 /(1 + self.decay*i)
            # Error/cost function
            error += sum(0.5 * ((targets - self.output)**2))
            error_rates[i] = error

        return error_rates
    
    def predict(self, data):
        probs = self.feed_forward(data)
        return np.argmax(probs, axis=1)

In [169]:
model = SingleLayerNN(ion_train['Xtrain'], ion_train['ytrain']
                      , epochs=10000, decay = 0.01, l2=0.01)
errors = model.fit()

In [170]:
np.unique(ion_test['ytest'], return_counts=True)

(array([0, 1], dtype=uint8), array([112,  63]))

In [171]:
ion_test['Xtest']

array([[ 1.     ,  0.     ,  1.     , ...,  1.     ,  1.     ,  1.     ],
       [ 1.     ,  0.     ,  0.96355, ...,  0.66667,  0.83508,  0.90608],
       [-0.45161,  0.     , -0.07198, ..., -0.01366,  0.08298, -0.01657],
       ..., 
       [ 1.     ,  1.     , -0.72779, ...,  0.2459 , -0.10714, -0.03757],
       [-0.32382,  0.     ,  0.38895, ...,  0.13934,  0.90546,  0.87403],
       [ 1.     ,  0.     , -0.7342 , ...,  0.48087, -0.04307, -0.16243]])

In [172]:
predictions = model.predict(ion_test['Xtest'])
predictions

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])