In [1]:
datapath = './mlrefined_datasets/nonlinear_superlearn_datasets/'
import numpy as np
from matplotlib.pylab import plt
#import autograd.numpy as np

In [2]:
def feature_transforms(a, w):
    
    for W in w:
        
        bias = W[0]
        mult = np.dot(W[1:].T, a)
        a = bias + mult.T
        a = np.tanh(a).T 
        
    return a

In [3]:
def model(x, theta):
    
    f = feature_transforms(x, theta[0])
    
    a = theta[1][0] + np.dot(f.T, theta[1][1:])
    
    return a.T

In [4]:
def network_initializer(layer_sizes, scale):
    
    weights = []
    
    for k in range(len(layer_sizes)-1):
        U_k = layer_sizes[k]
        U_k_plus_1 = layer_sizes[k+1]
                
        weight = scale*np.random.randn(U_k+1, U_k_plus_1)
        
        weights.append(weight)
        
    theta_init = [weights[:-1], weights[-1]]
    
    return np.array(theta_init, dtype=np.object)

In [5]:
def loss_func(a, x, y):
    
    N, D = x.shape
    exp_a = np.exp(a - np.max(a))
    softmax =  exp_a / exp_a.sum(axis=0, keepdims=True)
        
    
    loss = np.sum(y - np.log(softmax))
    loss /= N
    return loss
    

In [6]:
data = np.loadtxt(datapath + '2_eggs.csv', delimiter=',')
x = data[:2,:].T.astype(float)
y = data[2,:][np.newaxis,:].astype(int)

y[y==-1] = 0


y1 = np.zeros((y.size, y.max()+1)).astype(int)
y1[np.arange(y.size),y] = 1

y = y1

print(np.shape(x))
print(np.shape(y))
print(x[0], y[0])

(96, 2)
(96, 2)
[0.24539 0.81725] [1 0]


In [7]:
#plt.figure(figsize=(5,5))
#plt.scatter(x[:,0], x[:,1], c=y, s=20, cmap='jet')

In [8]:
from autograd import nn, utils
from autograd.engine import Scalar
from autograd.visualize import draw_graph

In [9]:
model = nn.MLP(2,[10,10,10,10,2])
print(model)
print(f"\nNumber of trainable parameters: {len(model.parameters())}")

MLP([Layer([ReLUNeuron(2),ReLUNeuron(2),ReLUNeuron(2),ReLUNeuron(2),ReLUNeuron(2),ReLUNeuron(2),ReLUNeuron(2),ReLUNeuron(2),ReLUNeuron(2),ReLUNeuron(2)]),Layer([ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10)]),Layer([ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10)]),Layer([ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10),ReLUNeuron(10)]),Layer([LinearNeuron(10),LinearNeuron(10)])])

Number of trainable parameters: 382


In [10]:
# define loss function
def calculate_loss(batch_size=None):
    if not batch_size:
        Xb, yb = x, y
    else:
        random_indexes = np.random.permutation(x.shape[0])[:batch_size]
        Xb, yb = x[random_indexes], y[random_indexes]
    
    # initialize inputs as Scalars
    inputs = [list(map(Scalar, row)) for row in Xb]
    # initialize labels as Scalars
    #yb = [list(map(Scalar, row)) for row in yb]
    
    
    
    # forward propagation
    outputs = list(map(model, inputs))
    
    
    
    

    # svm "max-margin" loss
    #losses = utils.svm_max_margin_loss(outputs, yb)
    data_loss = utils.softmax_loss(outputs, yb)
    #data_loss = sum(losses) * (1.0 / len(losses))
    
    # L2 regularization (adding sum of squares of weights to penalize large weights, times hyperparameter alpha)
    #reg_loss = utils.l2_regularization(model, alpha=1e-4)
    #total_loss = data_loss + reg_loss
    total_loss = data_loss

    # calculate accuracy
    outputs = [[x[0].value, x[1].value] for x in outputs]
    #print(outputs[0])
    predicted = np.argmax(outputs, axis = 1)
    predicted1 = np.zeros((predicted.size, 2)).astype(int)
    predicted1[np.arange(predicted.size),predicted] = 1
    predicted = predicted1
    accuracy = [(y_i[0] == output_i[0] and y_i[1] == output_i[1]) for y_i, output_i in zip(yb, predicted)]
    return total_loss, sum(accuracy) / len(accuracy)

In [11]:
# training
def train(epochs=100, debug=False):
    for epoch in range(epochs):
        total_loss, accuracy = calculate_loss()
        
        # zero out previous gradients for next iteration of backpropagation
        model.zero_grad()
        
        # backprop
        total_loss.backward()
        
        # update parameters (stochastic gradient descent with learning rate decay)
        #learning_rate = 1.0 - 0.9*epoch/100
        learning_rate = 0.1
        
        for parameter in model.parameters():
            if debug:
                print('before',parameter.value,'grad',parameter.grad)
            parameter.value -= learning_rate * parameter.grad
            if debug:
                print('after',parameter.value)
        if epoch % 5 == 0:
            print(f"Epoch: {epoch}, Loss: {total_loss.value}, Accuracy: {accuracy*100}%")

In [12]:
train(debug=True)

[[5.36919848 4.83709935]
 [5.41850789 4.84329011]
 [5.49134542 4.84830597]
 [5.63106373 4.85309595]
 [5.73353834 4.85463423]
 [5.81593904 4.83740619]
 [5.83052688 4.82501103]
 [5.68355795 4.82460602]
 [5.61722975 4.8395593 ]
 [5.51399565 4.83372221]
 [5.40236718 4.82698805]
 [5.37707376 4.83129925]
 [5.45745328 4.83733041]
 [5.58287066 4.84441845]
 [5.69987396 4.84171105]
 [5.56496464 4.84128037]
 [5.73881243 4.63048443]
 [5.83672629 4.57289574]
 [5.77565514 4.32376016]
 [5.4933471  4.21687433]
 [5.43070524 4.17187357]
 [5.48853873 4.20508494]
 [5.66590247 4.37990029]
 [5.67669845 4.3778605 ]
 [5.79524216 4.38509597]
 [5.91009182 4.58682892]
 [5.79871674 4.65523293]
 [5.70583622 4.69560109]
 [5.81497664 4.62135326]
 [5.7949183  4.35734499]
 [5.59065269 4.30383719]
 [5.57921042 4.30223162]
 [4.1633568  5.82155115]
 [4.28461234 5.83093892]
 [4.31970041 5.81891376]
 [4.27286196 5.81265209]
 [4.40201821 5.81496272]
 [4.26355154 5.82218329]
 [4.56955611 5.79548554]
 [4.51254964 5.81235273]


AssertionError: node value must be a scalar

In [None]:
layer_sizes = [2, 10, 10, 10, 1]

w = network_initializer(layer_sizes, 1)
print(w[1].shape)



In [None]:
from autograd import numpy as np
from autograd import elementwise_grad 


def fit(x, y,w, steps = 1000, lr = 0.001, comp_wise=False):
    
    gradient = elementwise_grad(model)
    
    values = []

    for step in range(steps):
        
        fw,gw = gradient(x, w)
        
        #print(fw1)
                
        out = model(x, w)
        
        loss = loss_func(out, x, y)
        
        
        
        
        if comp_wise == False:
            gw_norm = np.linalg.norm(gw)
            if gw_norm == 0 :
                gw_norm += 10**-6*np.sign(2*np.random.rand(1) - 1)
            gw /= gw_norm
        
        else:
            comp_norm = np.abs(gw) + 10**(-8)
            gw /= comp_norm
        
        
        print(w.shape)
        print(gw.shape)
        print(fw.shape)
        
        
        
        w = w - gw*step
        
#         values.append(fw)
        
        
        
        
        

In [None]:
fit(x, y, w)