# Assignment 1: Exercise three - Low-level functions

In [31]:
import numpy as np
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt 
from functools import reduce
%matplotlib inline

### Question 20:
Write a simple function that achieves the convolution operation efficiently for twodimensional and three-dimensional inputs. This should allow you to input a set of convolutional filters (‘kernels’ in Keras’s terminology) and an input layer (or image) as inputs. The input layer should have a third dimension, representing a stack of feature maps, and each filter should have a third dimension of corresponding size. The function should output a number of two-dimensional feature maps corresponding to the number of input filters, though these can be stacked into a third dimensional like the input layer. After agreeing on a common function with your group members, show this to your teacher.

In [32]:
class Conv2D:
    '''A Convolution layer using nxn filters.
    
    A simple function that achieves the convolution operation efficiently for two-dimensional inputs and three-dimensional inputs. 
    a set of convolutional filters (‘kernels’ in Keras’s terminology)
    an input layer (or image) as inputs. 

    The input layer should have a third dimension or two dimension, 
    representing a stack of feature maps, and each filter should have a third dimension of corresponding size. 

    The function should output a number of two-dimensional feature maps corresponding to the number of input filters, 
    though these can be stacked into a third dimensional like the input layer. 
            
    TODO: 3d
    TODO: padding
    '''

    def __init__(self, num_filters, kernal_size):
        '''
            filters is a 3 dimensions array (num_filters, 3, 3)
        '''
        self.num_filters = num_filters
        self.kernal_size = kernal_size
        self.filters = np.random.randn(num_filters, 3, 3)
        
    
    
    def iterate_regions(self, image):
        '''Generates image regions    
        ''' 
        h, w = image.shape

        for i in range(h - 2):
            for j in range(w - 2):
                im_region = image[i:(i + self.kernal_size), j:(j + self.kernal_size)]
                yield im_region, i, j

    def sub_forward(self, inputs):
        '''Return a 3 dimensions array
            
        ::inputs: 28x28
        ::outputs: 26x26x8
        '''
        # (28, 28)
        h, w = inputs.shape

        # for now, padding = 0 and stride = 1 
        outputs = np.zeros((h - self.kernal_size + 1, w - self.kernal_size + 1, self.num_filters))
    
        for im_region, i, j in self.iterate_regions(inputs):
            outputs[i, j] = np.sum(im_region * self.filters, axis=(1, 2))
        return outputs  
    
    
    def forward(self, inputs):
        if len(inputs.shape) == 2:
            return self.sub_forward(inputs)
        
        elif len(inputs.shape) == 3:
            #permuted = np.transpose(inputs, (2, 0, 1))
            #c, h, w = permuted.shape
            w,h,c = inputs.shape
            container = np.zeros((h - self.kernal_size + 1, w - self.kernal_size + 1, self.num_filters))

            for i in range(c):
                outputs = self.sub_forward(inputs[:,:,i])
                container += outputs
            return container     
        else:
            raise AttributeError

### Question 21:
Write a simple function that achieves rectified linear (relu) activation over a whole feature map, with a threshold at zero. After agreeing on a common function with your group members, show this to your teacher

Answer 21:

In [33]:
class Activation:
    '''Activation function Implement
    '''
    def __init__(self):
        pass

    def relu(self, in_features):
        '''A simple function that achieves rectified linear (relu) activation over a whole feature map, with a threshold at zero. 

        in_features can be numpy array, scalar, vector, or matrix
        '''
        return np.maximum(0, in_features)

    def sigmoid(self, in_features):
        '''Apply sigmoid activation function
        
        in_features can be numpy array, scalar, vector, or matrix
        '''
        return 1/(1+np.exp(-in_features))
    
    def leakyRelu(self, in_features, alpha=0.1):
        '''Apply leakyRelu activation function
        
        in_features can be numpy array, scalar, vector, or matrix
        '''
        return np.where(in_features > 0, in_features, in_features * alpha)      
    
    def softmax(self, in_features):
        '''A function that converts the activation of a 1-dimensional matrix (such as the output of a fully-connected layer) 
        into a set of probabilities that each matrix element is the most likely classification. 

        This should include the algorithmic expression of a softmax (normalised exponential) function.
        
        in_features can be numpy array, scalar, vector, or matrix
        '''
        expo = np.exp(in_features)
        expo_sum = np.sum(expo)
        return expo/expo_sum
    
    def softmax_derive():
        pass




### Question 22:
Write a simple function that achieves max pooling. This should allow you to specify the spatial extent of the pooling, with the size of the output feature map changing accordingly. After agreeing on a common function with your group members, show this to your teacher.

Answer 22:

In [34]:
class MaxPooling:
    '''Specify the spatial extent of the pooling, with the size of the output feature map changing accordingly
    '''
    def __init__(self, pool=2, stride=2):
        self.pool = pool 
        self.stride = stride 

    def iterate_regions(self, image):
        '''Generates non-overlapping kxk image regions to pool over
        '''
        h, w, c = image.shape
        
        # floor() the value
        new_h = int(np.floor(h/self.pool))
        new_w = int(np.floor(w/self.pool))
                
        for i in range(new_h):
            for j in range(new_w):
                im_region = image[(i * self.pool):(i * self.pool + self.stride), (j * self.pool):(j * self.pool + self.stride)]
                yield im_region, i, j

    def forward(self, inputs):
        '''Apply a forward for the maxpooling layer
        
        ::output is a 3d numpy array with dimensions (floor(h/2), floor(w/2), num_filters).
        ::input is a 3d numpy array with dimensions (h, w, num_filters)
        '''
        h, w, num_filters = inputs.shape
        
        # floor() the value
        new_h = int(np.floor(h/self.pool))
        new_w = int(np.floor(w/self.pool))
        
        output = np.zeros((new_h, new_w, num_filters))

        for im_region, i, j in self.iterate_regions(inputs):
            output[i, j] = np.amax(im_region, axis=(0, 1))

        return output

### Question 23:
Write a simple function that achieves normalisation within each feature map,
modifying the feature map so that its mean value is zero and its standard deviation is one. After agreeing on a common function with your group members, show this to your teacher.

Answer 23:

In [35]:
class Normalization():
    '''Normalization Implement
    
    TODO: for all filter or overall?
    '''
    def __init__(slef):
        self.epsilon = np.finfo(float).eps
        # self.epsilon=1e-10
        
    
    def zeromean(self,in_features):
        '''Normalisation within each feature map, modifying the feature map 
        so that its mean value is zero and its standard deviation is one.
        '''
        return (in_features - np.mean(in_features, axis=0))/ ( np.std(in_features, axis=0)+ self.epsilon )
    
    def minmax(self,in_features):
        '''min-max normalization
        '''
        return (in_features - np.amin(in_features, axis=0)) / (np.amax(in_features, axis=0)-np.amin(in_features, axis=0) + self.epsilon)
    
    def loge(self,in_features):
        '''log transform normalization
        
        note: np.log is ln, whereas np.log10 is standard base 10 log.
        '''
        return np.log(in_features)/np.log(np.amax(in_features, axis=0))
    
    def log10(self,in_features):
        '''log transform normalization
        
        note: np.log is ln, whereas np.log10 is standard base 10 log.
        '''
        return np.log10(in_features)/np.log10(np.amax(in_features, axis=0))

### Question 24: 
Write a function that produces a fully-connected layer. This should allow you to specify the number of output nodes, and link each of these to every node a stack of feature maps. The stack of feature maps will typically be flattened into a 1- dimensional matrix first. After agreeing on a common function with your group members, show this to your teacher. 

Answer 24:


In [36]:
class FC:
    '''fully-connected layer
    specify the number of output nodes, and link each of these to every node a stack of feature maps. 
    the stack of feature maps will typically be flattened into a 1-dimensional matrix first. 
    '''
    def __init__(self, in_dim, out_dim):
        '''Divide by in_dim to reduce the variance of our initial values
        
        in_dim = Inputs Numbers of Neuron
        out_dim = Outputs Numbers of Neuron
        '''
        self.weights = np.random.randn(in_dim, out_dim) / in_dim
        self.biases = np.zeros(out_dim)

    def forward(self, inputs):
        '''Returns a 1d numpy array
        '''
        inputs = inputs.flatten()

        in_dim, out_dim = self.weights.shape

        
        outputs = np.dot(inputs, self.weights) + self.biases

        return outputs
    
    def clac(in_dim, out_dim=10):
        return in_dim.count_dimension(output)

In [37]:
class clac:
    '''write some utility funtion 
    '''
    def count_dimension(inputs):
        '''count dimention
        '''
        return reduce(lambda x,y:x*y,inputs.shape)
  

### Question 25:
Write a function that converts the activation of a 1-dimensional matrix (such as the output of a fully-connected layer) into a set of probabilities that each matrix element is the most likely classification. This should include the algorithmic expression of a softmax (normalised exponential) function. After agreeing on a common function with your group members, show this to your teacher.

Answer 25:

In [38]:
class Softmax:
    '''A standard fully-connected layer with softmax activation.
    
    Refer https://deepai.org/machine-learning-glossary-and-terms/softmax-layer
    '''

    def __init__(self, in_dim, out_dim):
        '''Divide by in_dim to reduce the variance of our initial values
        
        in_dim = Inputs Numbers of Neuron
        out_dim = Outputs Numbers of Neuron
        '''

        self.weights = np.random.randn(in_dim, out_dim) / in_dim
        self.biases = np.zeros(out_dim)

    def forward(self, inputs):
        '''
        Performs a forward pass of the softmax layer using the given input.
        Returns a 1d numpy array containing the respective probability values.
        - input can be any array with any dimensions.
        '''
        inputs = inputs.flatten()

        in_dim, out_dim = self.weights.shape

        feature = np.dot(inputs, self.weights) + self.biases

        # softmax function
        expo = np.exp(feature)
        expo_sum = np.sum(expo, axis=0)
        out = expo / expo_sum
        
        return out

# Import Test data

In [39]:
(x_train, y_train), (x_test, y_test) = keras.datasets.cifar10.load_data()

# Scale images to the [0, 1] range
x_train = x_train.astype("float32") / 255  # (50000, 32, 32, 3)
x_test = x_test.astype("float32") / 255 # (10000, 32, 32, 3)

# Need an extra dimension for colour channels
print("x train shape", x_train.shape)
print("x test shape", x_test.shape)

# convert class vectors to binary class matrices
num_classes = 10
y_train = keras.utils.to_categorical(y_train, num_classes) # (50000, 10)
y_test = keras.utils.to_categorical(y_test, num_classes) # (10000, 10)

print("y train shape:", y_train.shape)
print("y test shape:", y_test.shape)

x train shape (50000, 32, 32, 3)
x test shape (10000, 32, 32, 3)
y train shape: (50000, 10)
y test shape: (10000, 10)


## Check test image size

In [40]:
image = x_train[0]
print(image.shape)

(32, 32, 3)


## Check Conv

In [41]:
conv = Conv2D(num_filters=8, kernal_size=3)
output = conv.forward(image)
print(output.shape)

(30, 30, 8)


In [42]:
image_reduce = image[:,:,0]
output_2d = conv.forward(image_reduce)
print(output_2d.shape)

(30, 30, 8)


## Check MaxPooling


In [43]:
maxpool = MaxPooling()
output = maxpool.forward(output)
print(output.shape)

(15, 15, 8)


## Check Second times Conv

In [44]:
conv = Conv2D(num_filters=32, kernal_size=3)
output = conv.forward(output)
print(output.shape)

(13, 13, 32)


## Check Second times MaxPooling

In [45]:
maxpool = MaxPooling()
output = maxpool.forward(output)
#print(output.shape)

## Check FC

In [46]:
# fully connected layer

output = FC(in_dim=clac.count_dimension(output), out_dim=10).forward(output)
print(output.shape)

(10,)


In [47]:
clac.count_dimension(output)

10

## Check SoftMax

In [48]:
output = Softmax(in_dim=clac.count_dimension(output), out_dim=10).forward(output)
print(output.shape)

(10,)


In [49]:
clac.count_dimension(image)

3072

In [50]:
sum(output)

1.0

In [51]:
output

array([0.08359444, 0.12243041, 0.09749358, 0.13145643, 0.09691764,
       0.13286025, 0.10252805, 0.07351275, 0.07899229, 0.08021418])

In [52]:
#Create a class | It is needed to store the weights of hidden layers and outputs layers
class NeuralNetwork:
    def __init__(self, hidden, output):
        #np.random.seed(10) # for generating the same results
        
        #np.random.seed(10) # for generating the same results
        self.wij   = np.random.rand(hidden.shape[0], hidden.shape[1], hidden.shape[2]) # input to hidden layer weights
        self.wjk   = np.random.rand(output.shape[0],1) # hidden layer to output weights
        
    def sigmoid(self, x, w):
        z = np.dot(x, w)
        return 1/(1 + np.exp(-z))
    
    def conv2d():
        pass
    
    def maxPool():
        pass
    
    def sigmoid_derivative(self, x, w):
        return self.sigmoid(x, w) * (1 - self.sigmoid(x, w))
    
    def gradient_descent(self, x, y, iterations):
        for i in range(iterations):
            Xi = x
            Xj = self.sigmoid(Xi, self.wij)
            yhat = self.sigmoid(Xj, self.wjk)
            # gradients for hidden to output weights
            g_wjk = np.dot(Xj.T, (y - yhat) * self.sigmoid_derivative(Xj, self.wjk))
            # gradients for input to hidden weights
            g_wij = np.dot(Xi.T, np.dot((y - yhat) * self.sigmoid_derivative(Xj, self.wjk), self.wjk.T) * self.sigmoid_derivative(Xi, self.wij))
            # update weights
            self.wij += g_wij
            self.wjk += g_wjk

In [53]:
#select a subset only
x = x_train[0]
y = y_train[0] #number 6

#creating our classes
conv = Conv2D(num_filters=32, kernal_size=3)
conv_array = conv.forward(x)
maxpool = MaxPooling(pool=2, stride=2)
maxpool_array = maxpool.forward(conv_array)
fc = FC(in_dim=clac.count_dimension(conv_array), out_dim=10)
fc_array = fc.forward(conv_array)

#create the network
nn = NeuralNetwork(x, y.T)

#print('Initital weights hidden', nn.wij)
#print('\n Initital weights output', nn.wjk)



In [54]:
maxpool_array.shape

(15, 15, 32)

In [55]:
#Hidden layers shape
conv.filters.shape

(32, 3, 3)

In [56]:
#NN Hidden layers shape
nn.wij.shape

(32, 32, 3)

In [57]:
#Hidden layers shape
conv_array.shape

(30, 30, 32)

In [58]:
#outputshape
fc_array.shape

(10,)

In [59]:
#nn output shape
nn.wjk.shape

(10, 1)

In [60]:
#run the model

#number of epochs -> how many loops the network will run 
epochs = 10
#learning rate -> how rate the parameters are updating
alpha = 0.1 

nn.gradient_descent(x, y.T, epochs)

ValueError: shapes (32,32,3) and (32,32,3) not aligned: 3 (dim 2) != 32 (dim 1)

In [None]:
y.T.shape

# Build Model

In [None]:

#select a subset only
select = 80

#need to receive softmax
def model(inputs, soft=0, first=True):
    if first == True:
        #create softmax and conv (avoid random weights everytime)        
        conv = Conv2D(num_filters=32, kernal_size=3)
        output = conv.forward(inputs)
        output = MaxPooling(pool=2, stride=2).forward(output)          
        output = FC(in_dim=clac.count_dimension(output), out_dim=10).forward(output)
        
        #need a object softmax to update weights
        soft = Softmax(in_dim=clac.count_dimension(output), out_dim=10)                               
        output = soft.forward(output)
        return soft, output
    
    else:
        output = conv.forward(inputs)
        output = MaxPooling(pool=2, stride=2).forward(output)
        output = FC(in_dim=clac.count_dimension(output), out_dim=10).forward(output)

        #need a object softmax to update weights
        output = soft(in_dim=clac.count_dimension(output), out_dim=10).forward(output) 
        return soft, output
    
#number of epochs -> how many loops the network will run 
epochs = 10
#learning rate -> how rate the parameters are updating
alpha = 0.1 
real_values = []
predict_values = []

for j in range(0,select):
    x = x_train[j]
    y = y_train[j] #number 6
    for i in range(0,epochs):    
        if i == 0: #first time :(
            #forward phase
            soft, output = model(inputs=x)
        else:
            soft, output = model(inputs=x, soft=soft)
       # print("output array \n", output)
       # print('real array', y)
       # print('Number predicted', np.argmax(output)) #returns the index of the maximum number

        #backward phase 
        #evaluate error
        error = (output - y)
        error_cross = -np.log(error)
        error_class = np.argmax(output) - np.argmax(y)
       # print("Error classification", error_class)
       # print('total error', np.sum(error))
       #  print("\n")

        #update weights error
        # partial derivatives
        #need to implement

        # average for total gradients (need a function to evaluate eror by position like cross entrophy)

        #evalute error by prediction and classification!
        if error_class != 0:
            #store cross_entrophy error to update weights
            total_output = -np.log(error[error_class])
            
            # update weights of the OBJECT softmax
            soft.weights = soft.weights*total_output*-alpha
            soft.biases = 0
            
            #update convulution layer filters as well
            #conv.filters = conv.filters*total_output*-alpha

            
        else:
            #stop finding
            #print('local optimal')
            real_values.append(np.argmax(y))
            predict_values.append(np.argmax(output))
            break
      

#final prediction        
print("output array \n", output)
print('real array', y)
print('Number predicted', np.argmax(output)) #returns the index of the maximum number  
print("Real Number", np.argmax(y))
print('Cross error', -np.log(error[error_class]))



In [None]:
conv.filters[1]

In [None]:
x_train[0].shape

In [None]:
output = Conv2D(num_filters=32, kernal_size=3)
output.filters.shape

In [None]:
for i in range(0,len(predict_values)):
    print(predict_values[i], real_values[i])
                 


In [None]:
new = soft.weights * 0
new = new + 1
print(new.shape)
new

In [None]:
loss = np.array([0,1,2,3,4,5,6,7,8,9])
print(loss.shape)
loss

In [None]:
new * loss.reshape(10,1)

In [None]:
error_cross = -np.log(error[error_cross])
error_cross

In [None]:
soft.weights.shape