In [28]:
import numpy as np
import pandas as pd
import struct as st
from scipy.signal import convolve2d

def convert_to_binary(path_images, path_labels):
    filename = {'images' : path_images ,'labels' : path_labels}
    train_imagesfile = open(filename['images'],'rb')
    train_imagesfile.seek(0)
    magic = st.unpack('>4B',train_imagesfile.read(4))
    nImg = st.unpack('>I',train_imagesfile.read(4))[0]
    nR = st.unpack('>I',train_imagesfile.read(4))[0]
    nC = st.unpack('>I',train_imagesfile.read(4))[0] 
    nBytesTotal = nImg*nR*nC*1 #since each pixel data is 1 byte
    images_array = np.asarray(st.unpack('>'+'B'*nBytesTotal,train_imagesfile.read(nBytesTotal))).reshape((nImg,nR,nC))
    
    # Open labels file
    train_labelsfile = open(filename['labels'], 'rb')
    train_labelsfile.seek(0)

    # Read label metadata
    magic = st.unpack('>4B', train_labelsfile.read(4))
    nLabels = st.unpack('>I', train_labelsfile.read(4))[0]  # Number of labels

    # Read label data
    labels_array = np.asarray(
        st.unpack('>' + 'B' * nLabels, train_labelsfile.read(nLabels))
    )
    return images_array, labels_array

def normalize(X):
    X_normalize = (X - np.min(X))/(np.max(X) - np.min(X))
    return X_normalize


def one_hot(a,num_classes):
    return np.squeeze(np.eye(num_classes)[a.reshape(-1)]) 

In [2]:
images, labels = convert_to_binary('train-images.idx3-ubyte', 'train-labels.idx1-ubyte')

df = pd.DataFrame(images[0])
vector_features = pd.Series([0] * (28 * 28))

X_train = images[0:50000]
y_train = labels[0:50000]
X_test = images[50001:]
y_test = labels[50001:]

In [54]:
#Make arbitrary weight matrix
rng = np.random.RandomState(42)
weight_mat = rng.standard_normal(size = (30,40))
weight_mat = weight_mat[::-1,::-1]
single_example = np.array(X_train[0])
#may implement this manually later on
convolution = convolve2d(single_example, weight_mat,mode = 'valid')

In [55]:
print(convolution.shape)
print(weight_mat.shape)
print(single_example.shape)

(3, 13)
(30, 40)
(28, 28)


In [57]:
input_1 = np.array([[0,1,2],[3,4,5],[6,7,8]])
kernel = np.array([[0,1],[2,3]])
kernel = kernel[::-1,::-1]
convoled  = convolve2d(kernel, input_1, mode = 'valid')
print(convoled)

[[19 25]
 [37 43]]


In [48]:
#define convolution layer
def convolution_forward(kernel, input):
    kernel = kernel[::-1,::-1]
    return convolve2d(kernel, input, mode = 'valid')

[[5]]


In [None]:
#define ReLU layer functions
def ReLU_forward(input):
    return np.maximum(0, input)

def ReLU_backward(input, grad_output):
    return (input > 0) * grad_output

In [64]:
#define pooling layer to ensure NN is invariant
def two_x_two_max_pooling(filter, stride = 1):
    final_mat = np.full(shape=(filter.shape[0], filter.shape[1]), fill_value=np.nan)
    for i in range(0,filter.shape[0], stride + 1):
        for j in range(0,filter.shape[1], stride + 1):
            slice = filter[i:i+2,j:j+2]
            final_mat[i,j] = slice.max()
    return final_mat
            

In [80]:
filter = np.array([[1,2,3,4,5],[6,7,8,9,10],[11,12,13,14,15]])
final_mat = two_x_two_max_pooling(filter)
rows = ~np.isnan(final_mat).all(axis=1)
cols = ~np.isnan(final_mat).all(axis=0)
print(final_mat[rows][:,cols])

[[ 7.  9. 10.]
 [12. 14. 15.]]


In [None]:
#for final layer we want to pass into a fully connected layer to map it to a C-dimensional vector (10 dimensions) and pass into a softmax output.


def dense_initialize(input_size, output_size = 10):
    #initalization
    #output size is the number of neurons in that layer
    weights = rng.standard_normal(size = (input_size, output_size))
    LR = 0.1
    bias = np.zeros(output_size)
    return weights, LR, bias

def dense_forward(input, weights, bias):
    return np.dot(input, weights) + bias

def dense_backward(weights, bias, gradient_output, input, LR):
    gradient_input = np.dot(gradient_output, weights.T)
    grad_weight = np.dot(input.T, gradient_output)
    grad_bias = gradient_output.mean(axis = 0) * input.shape[0]
    numpy_weight = np.array(grad_weight)
    weights -= LR * numpy_weight
    bias -= LR * grad_bias
    return weights, bias, gradient_input

In [None]:
#pass through softmax for predictions

#loss and loss gradient functions
def softmax_crossentropy_with_logits(logits, reference_answers):
    # Compute crossentropy from logits[batch,n_classes] and ids of correct answers             
    logits_for_answers = logits[np.arange(len(logits)), reference_answers]
    xentropy = - logits_for_answers + np.log(np.sum(np.exp(logits), axis=-1))    
    return xentropy


def grad_softmax_crossentropy_with_logits(logits, y_labels):
    # Compute crossentropy gradient from logits[batch,n_classes] and ids of correct answers
    ones_for_answers = np.zeros_like(logits)
    ones_for_answers[np.arange(len(logits)), y_labels] = 1    
    softmax = np.exp(logits) / np.exp(logits).sum(axis=-1,keepdims=True)    
    return (- ones_for_answers + softmax) / logits.shape[0]