### Assignmengt#1 Yuntong Zhu

In [38]:
import numpy as np

In [39]:
fname = '/Users/zoezhu/Desktop/assign1_data.csv'
data = np.genfromtxt(fname, dtype='float', delimiter=',', skip_header=1)
X, y = data[:, :-1], data[:, -1].astype(int)
X_train, y_train = X[:400], y[:400]
X_test, y_test = X[400:], y[400:]

X_train.shape, y_train.shape, X_test.shape, y_test.shape


((400, 3), (400,), (200, 3), (200,))

## Layer

#### DenseLayer

In [40]:
class DenseLayer:
    def __init__(self, n_inputs, n_neurons):
        """
        Initialize weights & biases.
        Weights should be initialized with values drawn from a normal distribution scaled by 0.01.
        Biases are initialized to 0.0.
        """
        self.weights = 0.01 * np.random.randn(n_inputs, n_neurons)
        self.biases = np.zeros((1, n_neurons))

    def forward(self, inputs):
        """
        A forward pass through the layer to give z.
        Compute it using np.dot(...) and then add the biases.
        """
        self.inputs = inputs
        self.z = np.dot(inputs, self.weights) + self.biases

    def backward(self, dz):
        """
        Backward pass
        """
        # Gradients of weights
        self.dweights = np.dot(self.inputs.T, dz)
        # Gradients of biases
        self.dbiases = np.sum(dz, axis=0, keepdims=True)
        # Gradients of inputs
        self.dinputs = np.dot(dz, self.weights.T)


## Activations

#### Relu

In [41]:
class ReLu:
    """
    ReLu activation
    """
    def forward(self, z):
        """
        Forward pass
        """
        self.z = z
        self.activity = np.maximum(0, z)

    def backward(self, dactivity):
        """
        Backward pass
        """
        self.dz = dactivity.copy()
        self.dz[self.z <= 0] = 0.0


#### Softmax

In [42]:
class Softmax:
    def forward(self, z):
        """
        Softmax forward pass
        """
        e_z = np.exp(z - np.max(z, axis=1, keepdims=True))
        self.probs = e_z / e_z.sum(axis=1, keepdims=True)
        return self.probs

    def backward(self, dprobs):
        """
        Softmax backward pass
        """
        # Empty array
        self.dz = np.empty_like(dprobs)
        for i, (prob, dprob) in enumerate(zip(self.probs, dprobs)):
            # Flatten to a column vector
            prob = prob.reshape(-1, 1)
            # Jacobian matrix
            jacobian = np.diagflat(prob) - np.dot(prob, prob.T)
            self.dz[i] = np.dot(jacobian, dprob)


## Loss function

#### Crossentropy loss

In [43]:
class CrossEntropyLoss:
    def forward(self, probs, oh_y_true):
        """
        Use one-hot encoded y_true.
        """
        # Clip to prevent division by 0
        # Clip both sides to not bias up.
        probs_clipped = np.clip(probs, 1e-7, 1 - 1e-7)
        # Negative log likelihoods
        loss = -np.sum(oh_y_true * np.log(probs_clipped), axis=1)
        return loss.mean(axis=0)

    def backward(self, probs, oh_y_true):
        """
        Use one-hot encoded y_true.
        """
        # Number of examples in batch and number of classes
        batch_sz, n_class = probs.shape
        # Get the gradient
        self.dprobs = -oh_y_true / probs
        # Normalize the gradient
        self.dprobs = self.dprobs / batch_sz


## Optimizer

#### Stochastic Gradient Descent

In [44]:
class SGD:
    """
    Stochastic Gradient Descent
    """
    def __init__(self, learning_rate=1.0):
        # Initialize the optimizer with a learning rate
        self.learning_rate = learning_rate

    def update_params(self, layer):
        # Update weights and biases 
        layer.weights -= self.learning_rate * layer.dweights
        layer.biases -= self.learning_rate * layer.dbiases


## Helper functions

#### Convert probabilities to predictions

In [45]:
def predictions(probs):
    """
    Convert probabilities 
    """
    y_preds = np.argmax(probs, axis=1)
    return y_preds


#### Accuracy

In [46]:
def accuracy(y_preds, y_true):
    """
    accuracy
    """
    return np.mean(y_preds == y_true)

#### One-hot encoding

In [47]:
def one_hot_encode(y_true, n_class):
    Oh_y_true = np.eye(n_class)[y_true]
    return Oh_y_true

## Training

In [48]:
# Network architecture: 
# Forward pass
def forward_pass(X, y_true, oh_y_true):
    """
    """
    dense1.forward(X)
    activation1.forward(dense1.z)
    
    dense2.forward(activation1.activity)
    activation2.forward(dense2.z)
    
    dense3.forward(activation2.activity)
    probs = output_activation.forward(dense3.z)
    
    loss = crossentropy.forward(probs, oh_y_true)
    return probs



In [49]:
# A single backward pass through the entire network
def backward_pass(probs, y_true, oh_y_true):
    """
    """
    # Compute the gradient of loss
    crossentropy.backward(probs, oh_y_true)
    
    output_activation.backward(crossentropy.dprobs)
    dense3.backward(output_activation.dz)
    
    activation2.backward(dense3.dinputs)
    dense2.backward(activation2.dz)
    
    activation1.backward(dense2.dinputs)
    dense1.backward(activation1.dz)

## Training loop

In [50]:
# Hyperparameters
learning_rate = 0.1
epochs = 10
batch_sz = 32

n_inputs = 3
n_class = 3

# the network layers
dense1 = DenseLayer(n_inputs, 4)
activation1 = ReLu()
dense2 = DenseLayer(4, 8)
activation2 = ReLu()
dense3 = DenseLayer(8, n_class)
output_activation = Softmax()

crossentropy = CrossEntropyLoss()
optimizer = SGD(learning_rate)


In [51]:
def get_random_mini_batch(X, y, batch_sz):
    index = np.random.choice(X.shape[0], batch_sz, replace=False)
    return X[index], y[index]

In [52]:
max_epochs = 100
epoch = 0

while epoch < max_epochs:
    print('epoch:', epoch)
    n_batch = X_train.shape[0] // batch_sz
    
    for batch_i in range(n_batch):
        # Get a mini-batch of data from X_train and y_train.
        X_batch, y_batch = get_random_mini_batch(X_train, y_train, batch_sz)
        
        # One-hot encode y_true
        oh_y_batch = np.eye(n_class)[y_batch]
        
        # Forward pass
        probs = forward_pass(X_batch, y_batch, oh_y_batch)
        
        # Loss
        loss = crossentropy.forward(probs, oh_y_batch)
        
        # Print accuracy and loss
        y_preds = predictions(probs)
        acc = accuracy(y_preds, y_batch)
        print(f"Loss: {loss:.4f}, Accuracy: {acc:.4f}")
        
        # Backward pass
        backward_pass(probs, y_batch, oh_y_batch)
        
        # Update the weights
        optimizer.update_params(dense1)
        optimizer.update_params(dense2)
        optimizer.update_params(dense3)
    
    # Test the model on the test set
    test_probs = forward_pass(X_test, y_test, np.eye(n_class)[y_test])
    y_test_preds = predictions(test_probs)
    current_accuracy = accuracy(y_test_preds, y_test)
    print(f"Test Accuracy: {current_accuracy:.4f}")
    
    if current_accuracy >= 0.9:
        print("Achieved desired accuracy! Stopping training.")
        break
    
    epoch += 1


epoch: 0
Loss: 1.0986, Accuracy: 0.3125
Loss: 1.0980, Accuracy: 0.3750
Loss: 1.0984, Accuracy: 0.3750
Loss: 1.1017, Accuracy: 0.1562
Loss: 1.0967, Accuracy: 0.4062
Loss: 1.0955, Accuracy: 0.4375
Loss: 1.0979, Accuracy: 0.3438
Loss: 1.1025, Accuracy: 0.2812
Loss: 1.0932, Accuracy: 0.4688
Loss: 1.0968, Accuracy: 0.3438
Loss: 1.0979, Accuracy: 0.3438
Loss: 1.1026, Accuracy: 0.3438
Test Accuracy: 0.3400
epoch: 1
Loss: 1.1018, Accuracy: 0.2812
Loss: 1.1045, Accuracy: 0.2500
Loss: 1.0984, Accuracy: 0.3438
Loss: 1.0972, Accuracy: 0.3750
Loss: 1.0940, Accuracy: 0.4375
Loss: 1.0982, Accuracy: 0.3750
Loss: 1.1065, Accuracy: 0.2188
Loss: 1.1049, Accuracy: 0.1875
Loss: 1.0970, Accuracy: 0.4688
Loss: 1.0994, Accuracy: 0.3438
Loss: 1.0966, Accuracy: 0.3438
Loss: 1.1088, Accuracy: 0.2188
Test Accuracy: 0.3400
epoch: 2
Loss: 1.0991, Accuracy: 0.2812
Loss: 1.1019, Accuracy: 0.3438
Loss: 1.0974, Accuracy: 0.4375
Loss: 1.1007, Accuracy: 0.3125
Loss: 1.1015, Accuracy: 0.2188
Loss: 1.0988, Accuracy: 0.2812

Loss: 1.0997, Accuracy: 0.3750
Loss: 1.1002, Accuracy: 0.2500
Loss: 1.0983, Accuracy: 0.3438
Loss: 1.0986, Accuracy: 0.4062
Loss: 1.0993, Accuracy: 0.3125
Loss: 1.0989, Accuracy: 0.3125
Loss: 1.0987, Accuracy: 0.3438
Test Accuracy: 0.3400
epoch: 37
Loss: 1.0995, Accuracy: 0.2188
Loss: 1.0988, Accuracy: 0.3438
Loss: 1.0986, Accuracy: 0.3438
Loss: 1.1007, Accuracy: 0.2500
Loss: 1.0987, Accuracy: 0.3125
Loss: 1.0984, Accuracy: 0.5000
Loss: 1.0993, Accuracy: 0.3125
Loss: 1.0994, Accuracy: 0.3125
Loss: 1.0996, Accuracy: 0.2812
Loss: 1.0993, Accuracy: 0.3438
Loss: 1.0988, Accuracy: 0.2812
Loss: 1.0984, Accuracy: 0.3125
Test Accuracy: 0.5000
epoch: 38
Loss: 1.0982, Accuracy: 0.5000
Loss: 1.0996, Accuracy: 0.4375
Loss: 1.0978, Accuracy: 0.3438
Loss: 1.1006, Accuracy: 0.2188
Loss: 1.0997, Accuracy: 0.2812
Loss: 1.0990, Accuracy: 0.2812
Loss: 1.0986, Accuracy: 0.3438
Loss: 1.0983, Accuracy: 0.3750
Loss: 1.0988, Accuracy: 0.3125
Loss: 1.0974, Accuracy: 0.4062
Loss: 1.1025, Accuracy: 0.2188
Loss: 

In [55]:
current_accuracy = 0
for epoch in range(epochs):
    print('epoch:', epoch)
    n_batch = X_train.shape[0] // batch_sz
    for batch_i in range(n_batch):
        # Get a random-mini-batch of data from X_train and y_train.
        X_batch, y_batch = get_random_mini_batch(X_train, y_train, batch_sz)
            
        # One-hot encode y_true
        Oh_y_batch = np.eye(n_class)[y_batch]
            
        # Forward pass
        probs = forward_pass(X_batch, y_batch, Oh_y_batch)
            
        # Loss
        loss = crossentropy.forward(probs, Oh_y_batch)
        # print accuracy
        y_preds = predictions(probs)
        acc = accuracy(y_preds, y_batch)
        print(f"Loss: {loss:.4f}, Accuracy: {acc:.2f}")
        # Backward pass
        backward_pass(probs, y_batch, Oh_y_batch)
            
        # Update the weights
        optimizer.update_params(dense1)
        optimizer.update_params(dense2)
        optimizer.update_params(dense3)

    test_probs = forward_pass(X_test, y_test, np.eye(n_class)[y_test])
    y_test_preds = predictions(test_probs)
    test_accuracy = accuracy(y_test_preds, y_test)
    print(f"Test Accuracy: {test_accuracy:.4f}")
        
    if test_accuracy >= 0.9:
        print("Achieved desired accuracy. Stopping training.")
        break


epoch: 0
Loss: 0.5045, Accuracy: 0.88
Loss: 0.5578, Accuracy: 0.88
Loss: 0.4070, Accuracy: 1.00
Loss: 0.4509, Accuracy: 0.88
Loss: 0.3794, Accuracy: 0.88
Loss: 0.4683, Accuracy: 0.97
Loss: 0.4141, Accuracy: 0.91
Loss: 0.5129, Accuracy: 0.88
Loss: 0.4735, Accuracy: 0.81
Loss: 0.5128, Accuracy: 0.88
Loss: 0.4132, Accuracy: 0.94
Loss: 0.4739, Accuracy: 0.91
Test Accuracy: 0.8600
epoch: 1
Loss: 0.4697, Accuracy: 0.91
Loss: 0.4230, Accuracy: 0.91
Loss: 0.5195, Accuracy: 0.91
Loss: 0.4001, Accuracy: 0.97
Loss: 0.3453, Accuracy: 0.91
Loss: 0.4260, Accuracy: 0.97
Loss: 0.5353, Accuracy: 0.91
Loss: 0.3876, Accuracy: 0.94
Loss: 0.4964, Accuracy: 0.81
Loss: 0.4466, Accuracy: 0.94
Loss: 0.4676, Accuracy: 0.94
Loss: 0.3047, Accuracy: 0.97
Test Accuracy: 0.9150
Achieved desired accuracy. Stopping training.


In [54]:
test_accuracy = 0
while test_accuracy < 0.9:
    test_probs = forward_pass(X_test, y_test, np.eye(n_class)[y_test])
    y_test_preds = predictions(test_probs)
    test_accuracy = accuracy(y_test_preds, y_test)
    print(f"Test Accuracy: {test_accuracy:.4f}")
        
    if test_accuracy >= 0.9:
        break

Test Accuracy: 0.9050
