## neural net workspace

In [2]:
import numpy as np

In [None]:
'''
NN functions
'''
# def nonlinear g(x) for transformation into 0 to 1 scale
# is there a way to do this with ReLU? look at ISLR notes 
def sig(x):
    return 1 / (1 + np.exp(-x))
    
def dsig(x):
    return x * (1 - x)

# def training function(s)
def nxt_layer(inputs, synap_weights):
    # obtain next layer from multiplying the inputs by the synaptic weights
    return sig(np.dot(inputs, synap_weights)) # returns 

# n is the number of iterations
def training(synap_weights, inputs, outputs, n):
    
    for n in range(n):
        # find the output your network produces
        nn_output = nxt_layer(inputs, synap_weights) # 1x5 matrix
        
        # cost = (pred - actual)**2, where pred is product of sigmoid(x)
        # in this network, the a fn is just sigmoid(net_output)
        cost = (outputs - nn_output)**2 # 1x5 matrix
        
        # compute gradient to find the change in the weights (result: 2Lx1 matrix where L is layer number)
        dcost = (2*outputs - nn_output)
        
        # here, our simplified dC/dw and dC/db are the same function
        grad_cost = np.dot(inputs.T, dcost * dsig(nn_output)) # 4x5 dot 1x5 = 4x1 matrix
        synap_weights += grad_cost
        
    return synap_weights

In [None]:
'''
CNN functions 
'''
# convolution functions from the internet... tensorflow for computer vision, towardsdatascience
# calculate dim of img after convolution (assumes img and filter are square)
def calculate_target_size(img_size: int, kernel_size: int) -> int:
    num_pixels = 0
    
    # From 0 up to img size (if img size = 224, then up to 223)
    for i in range(img_size):
        # Add the kernel size (let's say 3) to the current i
        added = i + kernel_size
        # It must be lower than the image size
        if added <= img_size:
            # Increment if so
            num_pixels += 1
            
    return num_pixels


# the actual convolution ... where kernel = filter
def convolve(img: np.array, kernel: np.array) -> np.array:
    # Assuming a rectangular image
    tgt_size = calculate_target_size(
        img_size=img.shape[0],
        kernel_size=kernel.shape[0]
    )
    # To simplify things
    k = kernel.shape[0]
    
    # 2D array of zeros
    convolved_img = np.zeros(shape=(tgt_size, tgt_size))
    
    # Iterate over the rows
    for i in range(tgt_size):
        # Iterate over the columns
        for j in range(tgt_size):
            # img[i, j] = individual pixel value
            # Get the current matrix
            mat = img[i:i+k, j:j+k]
            
            # Apply the convolution - element-wise multiplication and summation of the result
            # Store the result to i-th row and j-th column of our convolved_img array
            convolved_img[i, j] = np.sum(np.multiply(mat, kernel))
            
    return convolved_img

# stride size is the amount you move to get to the next pool
def get_pools(img: np.array, pool_size: int, stride: int) -> np.array:
    # To store individual pools
    pools = []
    
    # Iterate over all row blocks (single block has `stride` rows)
    for i in np.arange(img.shape[0], step=stride):
        # Iterate over all column blocks (single block has `stride` columns)
        for j in np.arange(img.shape[0], step=stride):
            
            # Extract the current pool
            mat = img[i:i+pool_size, j:j+pool_size]
            
            # Make sure it's rectangular - has the shape identical to the pool size
            if mat.shape == (pool_size, pool_size):
                # Append to the list of pools
                pools.append(mat)
                
    # Return all pools as a Numpy array
    return np.array(pools)


### giving it a shot

In [174]:
'''
baby heart: 3x3
[[0, 0, 0],
 [1, 0, 1],
 [0, 1, 0]]
2x2 filter -> 2x2 output
2x2 filter -> binary output

the heart: 5x5 matrix
[[0, 1, 0, 1, 0],
 [1, 0, 1, 0, 1],
 [1, 0, 0, 0, 1], 
 [0, 1, 0, 1, 0],
 [0, 0, 1, 0, 0]]
 try using a 3x3 filter -> 3x3 resulting image
 
then use a 2x2 filter on the 3x3 -> 2x2 image
 
another 2x2 filter on the 2x2 -> binary output
 '''

baby_heart = np.array([[0, 0, 0],
                       [1, 0, 1],
                       [0, 1, 0]])
filter1 = np.random.random((2, 2))
filter2 = np.random.random((2, 2))
correct_output = 1
test_output = sig(convolve(convolve(baby_heart, filter1), filter2))
print("Before CNN: ")
print("filter1: \n", filter1)
print("filter2: \n", filter2)
print("Output: ", test_output)
print("\n")

for n in range(10):
    # find the output your network produces
    layer1 = convolve(baby_heart, filter1)
    #print(layer1)

    test_output = sig(convolve(layer1, filter2))
    #print(test_output)

    # dC for the connection between the layer and the output
    dcost = (2*test_output - correct_output)
    #print(dcost)

    gradcost1 = dsig(dcost) * layer1 # sus
    new_filter1 = filter1 + gradcost1
    #print(new_filter1)

    # dC for the connection between the input and the layer
    dcost = (2*filter1 - new_filter1)
    #print(dcost)

    gradcost2 = dsig(dcost) * new_filter1 # my grad functions are sus
    new_filter2 = filter2 + gradcost2
    #print(new_filter2)

    filter1 = new_filter1
    filter2 = new_filter2
    
print("After CNN: ")
test_output = sig(convolve(convolve(baby_heart, filter1), filter2))
print("filter1: \n", filter1)
print("filter2: \n", filter2)
print("Output: ", test_output)
    
# manually collected stats: 75% accuracy

Before CNN: 
filter1: 
 [[0.14675589 0.09233859]
 [0.18626021 0.34556073]]
filter2: 
 [[0.39676747 0.53881673]
 [0.41919451 0.6852195 ]]
Output:  [[0.65867017]]


After CNN: 
filter1: 
 [[0.52104937 0.56175707]
 [0.8871335  0.94355107]]
filter2: 
 [[1.33800068 1.52446515]
 [1.52324451 1.77478802]]
Output:  [[0.99940587]]


In [173]:
'''
initiating a neural net
note that all matrix notes are in row column order
referenced amatullah's NN code
'''

# trying it out: single layer NN
# initiate synaptic weights
np.random.seed(1)
test_weights = 2 * np.random.random((4, 1)) - 1 # initialize random synaptic weights in 4x1 matrix (rand * 2 - 1)

test_inputs = np.array([[0, 0, 1, 0], [1, 0, 1, 1], [1, 0, 1, 0], [0, 1, 1, 0], [1, 0, 1, 1]])
test_outputs = np.array([[0, 1, 0, 0, 1]]).T # 2D, 1x5 matrix
print("Before training:")
print("Test weights: \n", test_weights)
print("Network output: \n", nxt_layer(test_inputs, test_weights), "\n")

new_test_weights = training(test_weights, test_inputs, test_outputs, 1000)
print("After training: ")
print("New test weights in network: \n", new_test_weights)

# see whether these new weights -> correct output
print("New network output: \n", nxt_layer(test_inputs, new_test_weights))

Before training:
Test weights: 
 [[-0.16595599]
 [ 0.44064899]
 [-0.99977125]
 [-0.39533485]]
Network output: 
 [[0.2689864 ]
 [0.1734943 ]
 [0.23762817]
 [0.36375058]
 [0.1734943 ]] 

After training: 
New test weights in network: 
 [[ 0.78385909]
 [-0.89871616]
 [-3.92642712]
 [10.2851889 ]]
New network output: 
 [[0.01933285]
 [0.99920995]
 [0.04138512]
 [0.00796151]
 [0.99920995]]


In [110]:
# another test run, but generating 1000 different samples rather than running training fn 1000 times

test_weights = np.random.random((4, 1))
test_inputs = np.array([[0, 0, 1, 0], [1, 0, 1, 1], [1, 0, 1, 0], [0, 1, 1, 0], [1, 0, 1, 1]])
test_outputs = np.array([[0, 1, 0, 0, 1]]).T # 2D, 1x5 matrix
print("Before training:")
print("Test weights: \n", test_weights)
print("Network output: \n", nxt_layer(test_inputs, test_weights), "\n")

for i in range(100):
    training_inputs = np.random.randint(0, 2, size = (5, 4)) # generate random test inputs between 0, 1
    #print(training_inputs)
    training_outputs = np.array([training_inputs[:, 0]]).T
    #print(training_outputs)
    
    new_test_weights = training(test_weights, training_inputs, training_outputs, 1)
    #print(new_test_weights)

print("After training: ")
print("New test weights in network: \n", new_test_weights)

# see whether these new weights -> correct output
# we expect each output to be around 0.5, since the assignment of the first column is random... confused
print("New network output: \n", nxt_layer(test_inputs, new_test_weights))

Before training:
Test weights: 
 [[0.34832992]
 [0.80896286]
 [0.45963719]
 [0.6261243 ]]
Network output: 
 [[0.6129281 ]
 [0.807538  ]
 [0.69167614]
 [0.78050301]
 [0.807538  ]] 

After training: 
New test weights in network: 
 [[ 7.10539303]
 [-1.06798136]
 [-0.95600473]
 [-1.12219649]]
New network output: 
 [[0.27767883]
 [0.99348552]
 [0.99786976]
 [0.11670745]
 [0.99348552]]


In [119]:
# CNN processing != matrix multiplication
# with matrix multiplication, rows of 1 need to equal the cols of 2 or vv
# CNN process is doing dot product of subsections

A = np.array([[0, 1, 2],
              [0, 1, 2], 
              [0, 1, 2]])
B = np.array([[0, 2],
              [0, 2]])

print(convolve(A, B))

[[4. 8.]
 [4. 8.]]
