In [1]:
import torch, torchvision
transform = torchvision.transforms.Compose( [torchvision.transforms.ToTensor(),torchvision.transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,download=True, transform=transform)
testset = torchvision.datasets.CIFAR10(root='./data', train=False,download=True, transform=transform)

Files already downloaded and verified
Files already downloaded and verified


In [1]:
import numpy as np

class NeuralNetwork:
    def __init__(self, layer_dimensions):
        self.layer_dimensions = layer_dimensions
        self.parameters = self.initialize_parameters()

    def initialize_parameters(self):
        parameters = {}
        dim_length = len(self.layer_dimensions)
        for layer_ind in range(1, dim_length):
            distribution = np.sqrt(6 / (self.layer_dimensions[layer_ind] + self.layer_dimensions[layer_ind - 1]))
            parameters[f'W{layer_ind}'] = np.random.uniform(-distribution ,distribution ,(self.layer_dimensions[layer_ind], self.layer_dimensions[layer_ind - 1]))
            parameters[f'b{layer_ind}'] = np.zeros((self.layer_dimensions[layer_ind], 1))
        return parameters

    def affine_forward(self, A, W, b):
        Z = np.dot(W, A) + b
        aff_fwd_cache = (A, W, b)
        return Z, aff_fwd_cache

    def activation_forward(self, A):
        Z = np.maximum(0, A)
        atv_fwd_cache = A
        return Z, atv_fwd_cache

    def forward_propagation(self, X):
        fwd_ppg_caches = dict()
        A = X
        dim_length = len(self.layer_dimensions)
        for layer_ind in range(1, dim_length):
            Z, fwd_ppg_caches [f"affine{layer_ind}"] = self.affine_forward(A, self.parameters[f'W{layer_ind}'], self.parameters[f'b{layer_ind}'])
            A, fwd_ppg_caches [f"activation{layer_ind}"] = self.activation_forward(Z)
        return A, fwd_ppg_caches

    def cost_function(self, AL, y):
        # m = y.shape[1]
        # cost = -(1/m) * np.sum(y * np.log(AL + 1e-15) + (1-y) * np.log(1 - AL + 1e-15))
        # return cost
        m = y.shape[1]
        epsilon = 1e-15
        AL = np.maximum(epsilon, AL)
        AL = np.minimum(1 - epsilon, AL)
        loss = -1/m * np.sum(y * np.log(AL) + (1 - y) * np.log(1 - AL))
        return loss

    def relu_backward(self, derivative_actvn, activation_cache):
        Z = activation_cache
        dZ = np.array(derivative_actvn, copy=True)
        dZ[Z <= 0] = 0
        return dZ

    def affine_backward(self, dZ, cache):
        A_prev, W, b = cache
        size = A_prev.shape[1]
        derivative_Weight = np.dot(dZ, A_prev.T) / size
        derivative_bias = np.sum(dZ, axis=1, keepdims=True) / size
        derivative_actvn = np.dot(W.T, dZ)
        return derivative_actvn, derivative_Weight, derivative_bias

    def activation_backward(self, derivative_actvn, cache):
        activation_cache = cache
        dZ = self.relu_backward(derivative_actvn, activation_cache)
        return dZ

    def backward_propagation(self, lastlayer_opt, Y, caches):
        gradients = {}
        size = lastlayer_opt.shape[1]
        derivative_actvn = lastlayer_opt
        dim_length = len(self.layer_dimensions)
        for layer_ind in reversed(range(1,dim_length )):
          current_cache = caches[f'affine{layer_ind}']
          derivative_actvn,gradients[f'dW{layer_ind}'],gradients[f'db{layer_ind}'] = self.affine_backward(derivative_actvn, current_cache)
          if layer_ind>1:
            current_cache = caches[f'activation{layer_ind-1}']
            derivative_actvn = self.activation_backward(derivative_actvn, current_cache)
        return gradients

    def update_parameters(self, gradients, alpha):
        dim_length = len(self.layer_dimensions)
        for layer_ind in range(1, dim_length ):
            self.parameters[f'W{layer_ind}'] -= alpha * gradients[f'dW{layer_ind}']
            self.parameters[f'b{layer_ind}'] -= alpha * gradients[f'db{layer_ind}']

    def train(self, X_train, X_val, y_train, y_val, epochs, alpha, batch_size):
        size_xtrain = X_train.shape[1]
        for epoch in range(epochs):
            for x_point in range(0, size_xtrain, batch_size):
                X_batch = X_train[:, x_point:x_point + batch_size]
                y_batch = y_train[:, x_point:x_point + batch_size]
                lastlayer_opt, caches = self.forward_propagation(X_batch)
                cost = self.cost_function(lastlayer_opt, y_batch)
                gradients = self.backward_propagation(lastlayer_opt - y_batch, y_batch, caches)
                self.update_parameters(gradients, alpha)
            train_predictions= self.predict(X_train)
            val_predictions = self.predict(X_val)
            train_accuracy = self.calculate_accuracy(train_predictions, y_train,X_train.shape[1])
            val_accuracy = self.calculate_accuracy(val_predictions, y_val,X_val.shape[1])
            print(f"Epoch {epoch + 1}/{epochs}:")
            print(f"  - Training Loss: {cost:.4f}")
            print(f"  - Training Accuracy: {100*train_accuracy:.2f}%")
            print(f"  - Validation Accuracy: {100*val_accuracy:.2f}%")

    def calculate_accuracy(self, lastlayer_opt, Y , size):
        accuracy = np.sum(lastlayer_opt==np.argmax(Y,axis = 0)) / (size)
        return accuracy


    def predict(self, X_new):
        lastlayer_opt, _ = self.forward_propagation(X_new)
        return np.argmax(lastlayer_opt,axis = 0)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def imshow(img):
  img = img / 2 + 0.5
  npimg = img.numpy()
  plt.figure(figsize=(2,2))
  plt.imshow(np.transpose(npimg, (1, 2, 0)))

def changing_to_numpy(dataset):
    x = np.array([np.array(x[0].numpy().reshape(3*32*32), dtype=np.float64) for x in dataset])
    y = np.array([x[1] for x in dataset])
    return x,y

import numpy as np

def custom_train_val_split(dataset, validation_size=0.1, random_seed=34):
    np.random.seed(random_seed)
    dataset_length = len(dataset)
    indices = np.arange(dataset_length)
    np.random.shuffle(indices)
    validation_limit = int(validation_size * dataset_length)
    validation_indices = indices[:validation_limit]
    train_indices = indices[validation_limit:]
    train_data = [dataset[i] for i in train_indices]
    validation_data = [dataset[i] for i in validation_indices]
    return train_data, validation_data






layer_dimensions = [
    3072,
    1024,
    512,
    256,
    128,
    64,
    32,
    10
]

def one_hot_encode(y, num_classes):
    one_hot_encoded = np.zeros((y.shape[0], num_classes))
    for i_point in range(len(y)):
        one_hot_encoded[i_point, y[i_point]] = 1
    return one_hot_encoded



nn_model = NeuralNetwork(layer_dimensions)

import numpy as np



train_data, val_data = custom_train_val_split(trainset)
X_train, y_train = changing_to_numpy(train_data)
X_val, y_val = changing_to_numpy(val_data)
X_test,Y_test = changing_to_numpy(testset)

y_train = one_hot_encode(y_train,10)
y_val = one_hot_encode(y_val,10)
y_test = one_hot_encode(Y_test,10)

epochs = 15
learning_rate = 0.001
batch_size = 64

nn_model.train(X_train.T, X_val.T, y_train.T, y_val.T, epochs, learning_rate, batch_size)
p = nn_model.predict(X_test.T)
testing_accuracy = nn_model.calculate_accuracy( p, y_test.T  , X_test.T.shape[1])
testing_cost = nn_model.cost_function(p,y_test.T)

print(f"  - Testing Accuracy: {100*testing_accuracy:.2f}%")


for i in range(5):
  imshow(testset[i][0])

Epoch 1/15:
  - Training Loss: 2.9674
  - Training Accuracy: 18.07%
  - Validation Accuracy: 17.62%
