In [None]:
import numpy as np
import mlp
import matplotlib.pyplot as plt

In [None]:
train_input = np.load('../data/trainX.npy')
train_output = np.load('../data/trainY.npy')

test_input = np.load('../data/testX.npy')
test_output = np.load('../data/testY.npy')

train_mean = np.mean(train_input,axis=0)
train_std = np.std(train_input,axis=0)


train_input = (train_input - train_mean)/(train_std + 1e-16)
test_input = (test_input - train_mean)/(train_std + 1e-16)
indim = train_input[0].shape[0]
print(indim)


In [None]:
train_label1 = train_output[:,0]
train_label2 = train_output[:,1]

test_label1 = test_output[:,0]
test_label2 = test_output[:,1]

In [None]:
def random_normal_weight_init(input, output):
    return np.random.normal(0,1,(output,input))

def random_weight_init(input,output):
    b = np.sqrt(6)/np.sqrt(input+output)
    return np.random.uniform(-b,b,(output,input))

def zeros_bias_init(outd):
    return np.zeros((outd,1))

def labels2onehot(labels):
    return np.array([[i==lab for i in range(10)]for lab in labels])


train_label1_onehot = labels2onehot(train_label1)
train_label2_onehot = labels2onehot(train_label2)

test_label1_onehot = labels2onehot(test_label1)
test_label2_onehot = labels2onehot(test_label2)

In [None]:
def create_batches(input_data,label1_onehot,label2_onehot,batch_size):
    input_batches = []
    output1_batches = []
    output2_batches = []
    indices = np.arange(len(input_data))
   
    
    while True:
        np.random.shuffle(indices)
        
        for i in range(int(len(indices)/batch_size)):
            input_data_batch = input_data[i*batch_size:(i+1)*batch_size]
            batch_label1_onehot = label1_onehot[i*batch_size:(i+1)*batch_size]
            batch_label2_onehot = label2_onehot[i*batch_size:(i+1)*batch_size]


            input_batches.append(input_data_batch)
            output1_batches.append(batch_label1_onehot)
            output2_batches.append(batch_label2_onehot)

        return input_batches, output1_batches, output2_batches


input_batches, output1_batches, output2_batches = create_batches(train_input,train_label1_onehot,train_label2_onehot,32)
test_input_batches,test_output1_batches,test_output2_batches = create_batches(test_input,test_label1_onehot,test_label2_onehot,32)

input_batches = np.array(input_batches)
output1_batches = np.array(output1_batches)
output2_batches = np.array(output2_batches)

test_input_batches = np.array(test_input_batches)
test_output1_batches = np.array(test_output1_batches)
test_output2_batches = np.array(test_output2_batches)



In [None]:
SingleLayerSingleTaskMLP = mlp.SingleLayerSingleTaskMLP(inp=indim,outp=10,
                                                        hiddenlayer=100,alpha=0,dropout_chance=0,lr=0.001)


TwoLayerSingleTaskMLP = mlp.TwoLayerSingleTaskMLP(inp=indim,outp=10,
                                                  hiddenlayers=[100,100],alpha=0,dropout_chance=0,lr=0.001)


TwoLayerTwoTaskMLP = mlp.TwoLayerTwoTaskMLP(inp=indim,outp=10,
                                            hiddenlayers=[100,100],alpha=0,dropout_chance=0,lr=0.001) 



SingleLayerSingleTaskMLP.loadparams([random_weight_init(indim,SingleLayerSingleTaskMLP.hiddenlayer),random_weight_init(SingleLayerSingleTaskMLP.hiddenlayer,output1_batches[0].shape[1])]
                                    ,[zeros_bias_init(SingleLayerSingleTaskMLP.hiddenlayer),zeros_bias_init(output1_batches[0].shape[1])])



TwoLayerSingleTaskMLP.loadparams([random_weight_init(indim,TwoLayerSingleTaskMLP.hiddenlayers[0]),random_weight_init(TwoLayerSingleTaskMLP.hiddenlayers[0],TwoLayerSingleTaskMLP.hiddenlayers[1]),random_weight_init(TwoLayerSingleTaskMLP.hiddenlayers[1],output1_batches[0].shape[1])]
                                    ,[zeros_bias_init(TwoLayerSingleTaskMLP.hiddenlayers[0]),zeros_bias_init(TwoLayerSingleTaskMLP.hiddenlayers[1]),zeros_bias_init(output1_batches[0].shape[1])])




TwoLayerTwoTaskMLP.loadparams([random_weight_init(indim,TwoLayerTwoTaskMLP.hiddenlayers[0]),random_weight_init(TwoLayerTwoTaskMLP.hiddenlayers[0],TwoLayerTwoTaskMLP.hiddenlayers[1]),random_weight_init(TwoLayerTwoTaskMLP.hiddenlayers[1],output1_batches[0].shape[1]),random_weight_init(TwoLayerTwoTaskMLP.hiddenlayers[1],output1_batches[0].shape[1])]
                                    ,[zeros_bias_init(TwoLayerTwoTaskMLP.hiddenlayers[0]),zeros_bias_init(TwoLayerTwoTaskMLP.hiddenlayers[1]),zeros_bias_init(output1_batches[0].shape[1]),zeros_bias_init(output1_batches[0].shape[1])])


SoftmaxCrossEntropyLoss = mlp.SoftmaxCrossEntropyLoss()

SoftmaxCrossEntropyLoss_two_task1 = mlp.SoftmaxCrossEntropyLoss()
SoftmaxCrossEntropyLoss_two_task2 = mlp.SoftmaxCrossEntropyLoss()

In [None]:
def single_layer_train_nn(train_data,test_data,train_label1_onehot,test_label1_onehot,train_x,train_y,test_x,test_y,learning_rate,momentum,dropout_rate,epochs,batch_size):
    single_layer_train_loss_array = []
    single_layer_train_accuracy_array = []
    single_layer_test_loss_array = []
    single_layer_test_accuracy_array = []
    
    for j in range(epochs):
        for i in range(len(train_x)):
            train_a = SingleLayerSingleTaskMLP.forward(train_x[i].T)
            single_layer_loss_train = SoftmaxCrossEntropyLoss.forward(train_a,train_y[i].T)
            train_c = SoftmaxCrossEntropyLoss.backward()
            train_d = SingleLayerSingleTaskMLP.backward(train_c)
            train_e = SingleLayerSingleTaskMLP.step()
            train_f = SingleLayerSingleTaskMLP.zerograd()
            
        train_a_data = SingleLayerSingleTaskMLP.forward(train_data.T)
        single_layer_loss_train = SoftmaxCrossEntropyLoss.forward(train_a_data,train_label1_onehot.T)
        single_layer_accuracy_train = (np.argmax(train_a_data,axis=0)==np.argmax(train_label1_onehot.T, axis=0)).sum()/train_a_data.shape[1]
        
        test_a_data = SingleLayerSingleTaskMLP.forward(test_data.T)
        single_layer_loss_test = SoftmaxCrossEntropyLoss.forward(test_a_data,test_label1_onehot.T)
        single_layer_accuracy_test = (np.argmax(test_a_data,axis=0)==np.argmax(test_label1_onehot.T, axis=0)).sum()/test_a_data.shape[1]
        
        single_layer_train_loss_array.append(single_layer_loss_train)
        single_layer_test_loss_array.append(single_layer_loss_test)
        
        single_layer_train_accuracy_array.append(single_layer_accuracy_train)
        single_layer_test_accuracy_array.append(single_layer_accuracy_test)
        
        
        print('epoch={}'.format(j+1),'train_loss={}'.format(single_layer_loss_train),'accuracy_train={}'.format(single_layer_accuracy_train))
        print('epoch={}'.format(j+1),'test_loss={}'.format(single_layer_loss_test),'accuracy_test={}'.format(single_layer_accuracy_test))
        print('.............................................................................................')
    return single_layer_train_loss_array,single_layer_test_loss_array,single_layer_train_accuracy_array,single_layer_test_accuracy_array


def two_layer_single_task_train_nn(train_data,test_data,train_label1_onehot,test_label1_onehot,train_x,train_y,test_x,test_y,learning_rate,momentum,dropout_rate,epochs):
    two_layer_train_loss_array = []
    two_layer_train_accuracy_array = []
    two_layer_test_loss_array = []
    two_layer_test_accuracy_array = []
    
    for j in range(epochs):
        for i in range(len(train_x)):
            train_a = TwoLayerSingleTaskMLP.forward(train_x[i].T)
            two_layer_loss_train = SoftmaxCrossEntropyLoss.forward(train_a,train_y[i].T)
            train_c = SoftmaxCrossEntropyLoss.backward()
            train_d = TwoLayerSingleTaskMLP.backward(train_c)
            train_e = TwoLayerSingleTaskMLP.step()
            train_f = TwoLayerSingleTaskMLP.zerograd()
            
        train_a_data = TwoLayerSingleTaskMLP.forward(train_data.T)
        two_layer_loss_train = SoftmaxCrossEntropyLoss.forward(train_a_data,train_label1_onehot.T)
        two_layer_accuracy_train = (np.argmax(train_a_data,axis=0)==np.argmax(train_label1_onehot.T, axis=0)).sum()/train_a_data.shape[1]
        
        test_a_data = TwoLayerSingleTaskMLP.forward(test_data.T)
        two_layer_loss_test = SoftmaxCrossEntropyLoss.forward(test_a_data,test_label1_onehot.T)
        two_layer_accuracy_test = (np.argmax(test_a_data,axis=0)==np.argmax(test_label1_onehot.T, axis=0)).sum()/test_a_data.shape[1]
        
        two_layer_train_loss_array.append(two_layer_loss_train)
        two_layer_test_loss_array.append(two_layer_loss_test)
        
        two_layer_train_accuracy_array.append(two_layer_accuracy_train)
        two_layer_test_accuracy_array.append(two_layer_accuracy_test)
        
        
        print('epoch={}'.format(j+1),'train_loss={}'.format(two_layer_loss_train),'accuracy_train={}'.format(two_layer_accuracy_train))
        print('epoch={}'.format(j+1),'test_loss={}'.format(two_layer_loss_test),'accuracy_test={}'.format(two_layer_accuracy_test))
        print('.............................................................................................')
    return two_layer_train_loss_array, two_layer_test_loss_array, two_layer_train_accuracy_array, two_layer_test_accuracy_array

def two_layer_two_task_train_nn(train_data,test_data,train_label1_onehot,train_label2_onehot,test_label1_onehot,test_label2_onehot,train_x,train_y1,train_y2,test_x,test_y1,test_y2,learning_rate,momentum,dropout_rate,epochs):
    two_task_train_loss_array1 = []
    two_task_train_loss_array2 = []
    two_task_train_accuracy_array1 = []
    two_task_train_accuracy_array2 = []
    
    two_task_test_loss_array1 = []
    two_task_test_loss_array2 = []
    two_task_test_accuracy_array1 = []
    two_task_test_accuracy_array2 = []
    
    for j in range(epochs):
        for i in range(len(train_x)):
            train_a1,train_a2 = TwoLayerTwoTaskMLP.forward(train_x[i].T)
            SoftmaxCrossEntropyLoss_two_task1.forward(train_a1,train_y1[i].T)
            SoftmaxCrossEntropyLoss_two_task2.forward(train_a2,train_y2[i].T)
            train_c1 = SoftmaxCrossEntropyLoss_two_task1.backward()
            train_c2 = SoftmaxCrossEntropyLoss_two_task2.backward()
            train_d = TwoLayerTwoTaskMLP.backward([train_c1,train_c2])
            train_e = TwoLayerTwoTaskMLP.step()
            train_f = TwoLayerTwoTaskMLP.zerograd()
            
        train_a_data1, train_a_data2 = TwoLayerTwoTaskMLP.forward(train_data.T)
        two_layer_two_task_loss_train1 = SoftmaxCrossEntropyLoss_two_task1.forward(train_a_data1,train_label1_onehot.T)
        two_layer_two_task_loss_train2 = SoftmaxCrossEntropyLoss_two_task2.forward(train_a_data2,train_label2_onehot.T)
        two_layer_two_task_accuracy_train1 = (np.argmax(train_a_data1,axis=0)==np.argmax(train_label1_onehot.T, axis=0)).sum()/train_a_data1.shape[1]
        two_layer_two_task_accuracy_train2 = (np.argmax(train_a_data2,axis=0)==np.argmax(train_label2_onehot.T, axis=0)).sum()/train_a_data2.shape[1]
        
        test_a_data1,test_a_data2 = TwoLayerTwoTaskMLP.forward(test_data.T)
        two_layer_two_task_loss_test1 = SoftmaxCrossEntropyLoss_two_task1.forward(test_a_data1,test_label1_onehot.T)
        two_layer_two_task_loss_test2 = SoftmaxCrossEntropyLoss_two_task2.forward(test_a_data2,test_label2_onehot.T)
        two_layer_two_task_accuracy_test1 = (np.argmax(test_a_data1,axis=0)==np.argmax(test_label1_onehot.T, axis=0)).sum()/test_a_data1.shape[1]
        two_layer_two_task_accuracy_test2 = (np.argmax(test_a_data2,axis=0)==np.argmax(test_label2_onehot.T, axis=0)).sum()/test_a_data2.shape[1]
        
        
        
        two_task_train_loss_array1.append(two_layer_two_task_loss_train1)
        two_task_train_loss_array2.append(two_layer_two_task_loss_train2)
        
        two_task_test_loss_array1.append(two_layer_two_task_loss_test1)
        two_task_test_loss_array2.append(two_layer_two_task_loss_test2)
        
        two_task_train_accuracy_array1.append(two_layer_two_task_accuracy_train1)
        two_task_train_accuracy_array2.append(two_layer_two_task_accuracy_train2)
        
        two_task_test_accuracy_array1.append(two_layer_two_task_accuracy_test1)
        two_task_test_accuracy_array2.append(two_layer_two_task_accuracy_test2)
        
        
        print('epoch={}'.format(j+1),'train_loss={}'.format(two_layer_two_task_loss_train2),'accuracy_train={}'.format(two_layer_two_task_accuracy_train2))
        print('epoch={}'.format(j+1),'test_loss={}'.format(two_layer_two_task_loss_test2),'accuracy_test={}'.format(two_layer_two_task_accuracy_test2))
        print('.............................................................................................')
    return two_task_train_loss_array1, two_task_train_loss_array2, two_task_test_loss_array1, two_task_test_loss_array2, two_task_train_accuracy_array1, two_task_train_accuracy_array2, two_task_test_accuracy_array1, two_task_test_accuracy_array2 


In [None]:
single_layer_train_loss_array,single_layer_test_loss_array,single_layer_train_accuracy_array,single_layer_test_accuracy_array = single_layer_train_nn(train_input,test_input,train_label1_onehot,test_label1_onehot,input_batches,output1_batches,test_input_batches,test_output1_batches,learning_rate=0.001,momentum=0,dropout_rate=0,epochs=100,batch_size=32)

In [None]:
two_layer_train_loss_array, two_layer_test_loss_array, two_layer_train_accuracy_array, two_layer_test_accuracy_array = two_layer_single_task_train_nn(train_input,test_input,train_label1_onehot,test_label1_onehot,input_batches,output1_batches,test_input_batches,test_output1_batches,learning_rate=0.001,momentum=0,dropout_rate=0,epochs=100)

In [None]:
two_task_train_loss_array1, two_task_train_loss_array2, two_task_test_loss_array1, two_task_test_loss_array2, two_task_train_accuracy_array1, two_task_train_accuracy_array2, two_task_test_accuracy_array1, two_task_test_accuracy_array2 = two_layer_two_task_train_nn(train_input,test_input,train_label1_onehot,train_label2_onehot,test_label1_onehot,test_label2_onehot,input_batches,output1_batches,output2_batches,test_input_batches,test_output1_batches,test_output2_batches,learning_rate=0.001,momentum=0,dropout_rate=0,epochs=100)                                                                                                                                                                                              

In [None]:
## !! LOSS CURVES !! ##
file_name = '../{}.png'

######### SINGLE LAYER SINGLE TASK #################
plt.plot(np.arange(100),single_layer_train_loss_array, label='Training Loss')
plt.plot(np.arange(100),single_layer_test_loss_array, label='Test Loss')
plt.legend(loc='best')
plt.title('SingleLayerSingleTask: Train and Test Loss Curves')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.savefig(file_name.format('single_layer_loss_task1'))
plt.close()

########## TWO LAYER SINGLE TASK #################
plt.plot(np.arange(100),two_layer_train_loss_array, label='Training Loss')
plt.plot(np.arange(100),two_layer_test_loss_array, label='Test Loss')
plt.legend(loc='best')
plt.title('TwoLayerSingleTask: Train and Test Loss Curves')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.savefig(file_name.format('two_layer_loss_task1'))
plt.close()

######### TWO LAYER TWO TASK #################

plt.plot(np.arange(100),two_task_train_loss_array1, label='Training Loss 1')
plt.plot(np.arange(100),two_task_test_loss_array1, label='Test Loss 1')
plt.plot(np.arange(100),two_task_train_loss_array2, label='Training Loss 2')
plt.plot(np.arange(100),two_task_test_loss_array2, label='Test Loss 2')
plt.legend(loc='best')
plt.title('TwoLayerTwoTask: Train and Test Loss Curves')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.savefig(file_name.format('two_task_loss_task1'))
plt.close()


## !! ACCURACY CURVES !! ##

######### SINGLE LAYER SINGLE TASK #################
plt.plot(np.arange(100),single_layer_train_accuracy_array,label = 'Training accuracy')
plt.plot(np.arange(100),single_layer_test_accuracy_array,label = 'Test accuracy')
plt.legend(loc='best')
plt.title('SingleLayerSingleTask: Train and Test Accuracy Curves')
plt.xlabel('Epochs')
plt.ylabel('Accuracy') 
plt.savefig(file_name.format('single_layer_accuracy_task1'))
plt.close()



########## TWO LAYER SINGLE TASK #################
plt.plot(np.arange(100),two_layer_train_accuracy_array,label = 'Training accuracy')
plt.plot(np.arange(100),two_layer_test_accuracy_array,label = 'Test accuracy')
plt.legend(loc='best')
plt.title('TwoLayerSingleTask: Train and Test Accuracy Curves')
plt.xlabel('Epochs')
plt.ylabel('Accuracy') 
plt.savefig(file_name.format('two_layer_accuracy_task1'))
plt.close()



########## TWO LAYER TWO TASK #################
plt.plot(np.arange(100),two_task_train_accuracy_array1,label = 'Training accuracy 1')
plt.plot(np.arange(100),two_task_test_accuracy_array1,label = 'Test accuracy 1')
plt.plot(np.arange(100),two_task_train_accuracy_array2,label = 'Test accuracy 2')
plt.plot(np.arange(100),two_task_test_accuracy_array2,label = 'Test accuracy 2')
plt.legend(loc='best')
plt.title('TwoLayerTwoTask: Train and Test Accuracy Curves')
plt.xlabel('Epochs')
plt.ylabel('Accuracy') 
plt.savefig(file_name.format('two_task_accuracy_task1'))
plt.close()

