## imports

In [263]:
import os
import numpy as np
import random
import matplotlib.pyplot as plt
from scipy.io import loadmat
from sklearn.preprocessing import PolynomialFeatures
%matplotlib


Using matplotlib backend: Qt5Agg


## Loading data

In [264]:
def load_data(path: str, add_bias: bool = True) -> tuple:
    """
    Loading data and formatting for the latter linear regression

    Args:
        path: directory path of the csv file containing the data
        add_bias: if True x will be returned with 1 in the first column
    Returns:
        x: data features
        y: data classes
    """
    data = loadmat(path)
    x = np.array(data["X"])

    if add_bias:
        poly = PolynomialFeatures(1)
        x = poly.fit_transform(x)

    y = np.array(data["y"])
    #y[y == 10] = 0
    return x, y

In [265]:
# importing data:
path = r'C:\Users\student\Hafifa\ML_intro\ex3\data\ex3data1.mat'
x, y = load_data(path, add_bias=False)

## Visualizing data:

In [266]:
def show_data(x):
    # sample 100 random images
    random_index = np.random.randint(0, 5000, 100)
    images = x[random_index, :]
    plt.figure(figsize=(9,9))
    
    for i in range(images.shape[0]):
        image = np.reshape(images[i,:], (20, 20)).T
        plt.subplot(10, 10, i+1)
        plt.imshow(image, cmap='gray')
        plt.axis('off')

    plt.show()

In [267]:
show_data(x)

In [268]:
def sigmoid(z):
    sig = 1 / (1 + np.exp(-z))
    return sig

def compute_cost(theta,x,y):
    z = x @ theta
    h = sigmoid(z)
    cost = -y @ np.log(h) - (1-y) @ np.log(1-h)
    return cost

def forward(weights, x):
        a = x.T
        gradients = []

        for layer in weights:
            a = np.insert(a, 0, 1, axis=0)
            z = layer @ a
            a = sigmoid(z)
            grad = sigmoid_gradient(z)
            gradients.append(grad)
            
        return a, gradients

In [269]:
# loading the pretrained model:

def load_nn_weights(path: str) -> list:
    """
    Loading weights of a pre-trained NN
    Args:
        path: path to the .mat file

    Returns:
        theta1: weights of the 1st layer
        theta2: weights of the 2nd layer
    """
    weights = loadmat(path)
    theta1 = weights["Theta1"]
    theta2 = weights["Theta2"]
    
    return [theta1, theta2]


In [270]:
def convert_to_one_hot(y, n_classes=10):
    y_one_hot = np.zeros((len(y),n_classes))
    
    for i, j in enumerate(y):
        y_one_hot[i,j-1] = 1
    
    return y_one_hot    

In [271]:
class NeuralNet:

    def __init__(self):
        """The constructor of the NN-model object"""
        path = r"C:\Users\student\Hafifa\ML_intro\ex3\data\ex3weights.mat"
        self.thetas = init_nn_weights(load_nn_weights(path), epsilon=0.12) #sys.argv[2])

    def predict(self, x: np.ndarray) -> np.ndarray:
        """
        Applies a prediction of the neural-net over x
        Args:
            x: input data

        Returns:
            prediction: predicted label for each data-point
        """
        pred,_ = forward(self.thetas, x)
        prediction = np.argmax(pred, axis=0).astype(int)
        return prediction
    
    def compute_cost(self, x, y, llambda=0.):
        h,_ = forward(self.thetas, x)
        y_one_hot = convert_to_one_hot(y)
        classification_term = -y_one_hot.T*np.log(h) - (1-y_one_hot).T*np.log(1-h)
        cost = np.sum(classification_term)/len(y)
        regularization_term = 0
        
        for layer in self.thetas:
            layer_without_bias = layer[:,1:]
            regularization_term += np.sum(layer_without_bias**2)
            
        regularization_term = regularization_term * (llambda /(2*len(y)))
        cost += regularization_term
        return cost

In [286]:
nn = NeuralNet()
nn.compute_cost(x, y, llambda=0)

(25, 401)
(10, 26)


6.9010629452224865

In [273]:
def sigmoid_gradient(z):
    sig_grad = sigmoid(z) * (1-sigmoid(z))
    return sig_grad

In [274]:
def init_nn_weights(weights, epsilon):
    new_layers = []
    
    for layer in weights:
        shapee =  layer.shape
        print(layer.shape)
        layer = np.random.rand(shapee[0],shapee[1]) * 2 * epsilon - epsilon
        new_layers.append(layer)
    
    return new_layers