In [1]:
import gzip
import struct
import matplotlib.pyplot as plt
import numpy as np
#from src.simple_ml import *
# Example of target with class indices
import torch
import torch.nn as nn
import numdifftools as nd

# Exploring Parsing MNIST using python struct package

In [None]:
def visualize_image(img):
    """
    img: list with len = 784
    An example image of MNIST dataset either train or test
    """
    # Convert to numpy array
    img = np.array(img)
    # Reshape to 28x28 image to plot using matplotlib
    img = img.reshape(28,28)
    plt.imshow(img)

In [None]:
# trying reading binary (rb) mode
# ubyte is unsigned byte
# idx3 means dimensions like n1xn2xn3; for MNIST train its 60,000 x 28 x 28
# MNIST test is 10,000 x 28 x 28, we will convert this into 10,000 x 784

path = 'data/t10k-images-idx3-ubyte.gz'
with gzip.open(path,'rb') as f:
    data = f.read() # returns a bytes object

#### Format of the test images file
# Train SET IMAGE FILE (train-images-idx3-ubyte):
# |[offset] | [type]     |     [value]     |     [description]
# 0000     32 bit integer  0x00000803(2051) magic number
# 0004     32 bit integer  10000            number of images
# 0008     32 bit integer  28               number of rows
# 0012     32 bit integer  28               number of columns
# 0016     unsigned byte   ??               pixel
# 0017     unsigned byte   ??               pixel
# ........
# xxxx     unsigned byte   ??               pixel

# For reading first 16 bytes, containing 4 bytes for magic number, 4 for #images, 4 for #rows and 4 for #cols 
meta_data = struct.iter_unpack('>I',data[0:16])

magic_number = next(meta_data)
n_images= next(meta_data)
n_rows = next(meta_data)
n_cols = next(meta_data)

magic_number, n_images, n_rows, n_cols

pixels = list(struct.iter_unpack('>B',data[16:]))

len(list(pixels)) # 10000 x 784

images = []
n_pixels = n_rows[0] * n_cols[0]
for i in range(n_images[0]):
    images.append(pixels[i * n_pixels: i * n_pixels + n_pixels])

assert len(images)==10000, "Make sure there are 10,000 images in the test set"

visualize_image(images[0])

In [None]:
# trying reading binary (rb) mode
# ubyte is unsigned byte
# idx3 means dimensions like n1xn2xn3; for MNIST train its 60,000 x 28 x 28
# MNIST test is 10,000 x 28 x 28, we will convert this into 10,000 x 784

path = 'data/train-images-idx3-ubyte.gz'
with gzip.open(path,'rb') as f:
    data = f.read() # returns a bytes object

In [None]:
meta_data = struct.iter_unpack('>I',data[0:16])
magic_number, n_images, n_rows, n_cols = meta_data

In [None]:
magic_number,n_images,n_rows,n_cols

In [None]:
#### Format of the test images file
# Train SET IMAGE FILE (train-images-idx3-ubyte):
# |[offset] | [type]     |     [value]     |     [description]
# 0000     32 bit integer  0x00000803(2051) magic number
# 0004     32 bit integer  10000            number of images
# 0008     32 bit integer  28               number of rows
# 0012     32 bit integer  28               number of columns
# 0016     unsigned byte   ??               pixel
# 0017     unsigned byte   ??               pixel
# ........
# xxxx     unsigned byte   ??               pixel

# For reading first 16 bytes, containing 4 bytes for magic number, 4 for #images, 4 for #rows and 4 for #cols 
meta_data = struct.iter_unpack('>I',data[0:16])

magic_number = next(meta_data)
n_images= next(meta_data)
n_rows = next(meta_data)
n_cols = next(meta_data)

magic_number, n_images, n_rows, n_cols

pixels = list(struct.iter_unpack('>B',data[16:]))

len(list(pixels)) # 10000 x 784

images = []
n_pixels = n_rows[0] * n_cols[0]
for i in range(n_images[0]):
    images.append(pixels[i * n_pixels: i * n_pixels + n_pixels])

assert len(images)==60000, "Make sure there are 10,000 images in the test set"

visualize_image(images[0])

In [None]:
# trying reading binary (rb) mode
# ubyte is unsigned byte
# idx3 means dimensions like n1xn2xn3; for MNIST train its 60,000 x 28 x 28
# MNIST test is 10,000 x 28 x 28, we will convert this into 10,000 x 784

path = 'data/train-labels-idx1-ubyte.gz'
with gzip.open(path,'rb') as f:
    data = f.read() # returns a bytes object

#### Format of the test images file
# Train SET label FILE (train-labels-idx1-ubyte):
# [offset] [type]          [value]          [description]
# 0000     32 bit integer  0x00000801(2049) magic number (MSB first)
# 0004     32 bit integer  10000            number of items
# 0008     unsigned byte   ??               label
# 0009     unsigned byte   ??               label
# ........
# xxxx     unsigned byte   ??               label
# The labels values are 0 to 9.

# For reading first 16 bytes, containing 4 bytes for magic number, 4 for #images, 4 for #rows and 4 for #cols 
meta_data = struct.iter_unpack('>I',data[0:8])

magic_number = next(meta_data)
n_labels = next(meta_data)


train_labels = [label[0] for label in struct.iter_unpack('>B',data[8:])]

assert len(train_labels)==n_labels[0], f"Make sure there are {n_labels} labels in the test set, you currently have {len(train_labels)}"

In [None]:
# trying reading binary (rb) mode
# ubyte is unsigned byte
# idx1 means dimensions like n1; for MNIST train its 60,000 for train
# MNIST test is 10,000 x 1

path = 'data/t10k-labels-idx1-ubyte.gz'
with gzip.open(path,'rb') as f:
    data = f.read() # returns a bytes object

#### Format of the test images file
# Test SET label FILE (test-labels-idx1-ubyte):
# [offset] [type]          [value]          [description]
# 0000     32 bit integer  0x00000801(2049) magic number (MSB first)
# 0004     32 bit integer  10000            number of items
# 0008     unsigned byte   ??               label
# 0009     unsigned byte   ??               label
# ........
# xxxx     unsigned byte   ??               label
# The labels values are 0 to 9.

# For reading first 16 bytes, containing 4 bytes for magic number, 4 for #images, 4 for #rows and 4 for #cols 
meta_data = struct.iter_unpack('>I',data[0:8])

magic_number = next(meta_data)
n_labels = next(meta_data)


test_labels = [label[0] for label in struct.iter_unpack('>B',data[8:])]

assert len(test_labels)==n_labels[0], f"Make sure there are {n_labels} labels in the test set, you currently have {len(train_labels)}"

In [None]:
def parse_images(image_filename:str)->list:
    print(f"Working on {image_filename}")
    with gzip.open(image_filename,'rb') as f:
        data = f.read() # returns a bytes object

    #### Format of the test images file
    # Train/Test SET IMAGE FILE (train[t10k]-images-idx3-ubyte):
    # |[offset] | [type]     |     [value]     |     [description]
    # 0000     32 bit integer  0x00000803(2051) magic number
    # 0004     32 bit integer  60000(10000)     number of train(test)images
    # 0008     32 bit integer  28               number of rows
    # 0012     32 bit integer  28               number of columns
    # 0016     unsigned byte   ??               pixel
    # 0017     unsigned byte   ??               pixel
    # ........
    # xxxx     unsigned byte   ??               pixel

    # For reading first 16 bytes, containing 4 bytes for magic number, 4 for #images, 4 for #rows and 4 for #cols 
    meta_data = struct.iter_unpack('>I',data[0:16])

    magic_number, n_images, n_rows, n_cols  = meta_data

    pixels = [pix[0] for pix in struct.iter_unpack('>B',data[16:])]

    images = []
    n_pixels = n_rows[0] * n_cols[0]
    n_images = n_images[0]
    assert len(list(pixels))==n_images*n_pixels # 60000(10000) x 784
    
    for i in range(n_images):
        images.append(pixels[i * n_pixels: i * n_pixels + n_pixels])

    return images

In [None]:
def parse_labels(label_filename:str)->list:
    print(f"Working on {label_filename}")
    with gzip.open(label_filename,'rb') as f:
        data = f.read() # returns a bytes object

    #### Format of the test images file
    # Train/test SET label FILE (train(t10k)-labels-idx1-ubyte):
    # [offset] [type]          [value]          [description]
    # 0000     32 bit integer  0x00000801(2049) magic number (MSB first)
    # 0004     32 bit integer  60000(10000)     number of items
    # 0008     unsigned byte   ??               label
    # 0009     unsigned byte   ??               label
    # ........
    # xxxx     unsigned byte   ??               label
    # The labels values are 0 to 9.

    # For reading first 8 bytes, containing 4 bytes for magic number, 4 for #labels
    meta_data = struct.iter_unpack('>I',data[0:8])

    magic_number = next(meta_data)
    n_labels = next(meta_data)

    labels = [label[0] for label in struct.iter_unpack('>B',data[8:])]

    assert len(labels)==n_labels[0], f"Make sure there are {n_labels} labels in the test set,\
                                       you currently have {len(labels)}"

    return labels

In [None]:
def parse_mnist(image_filename, label_filename):
    """ Read an images and labels file in MNIST format.  See this page:
    http://yann.lecun.com/exdb/mnist/ for a description of the file format.

    Args:
        image_filename (str): name of gzipped images file in MNIST format
        label_filename (str): name of gzipped labels file in MNIST format

    Returns:
        Tuple (X,y):
            X (numpy.ndarray[np.float32]): 2D numpy array containing the loaded
                data.  The dimensionality of the data should be
                (num_examples x input_dim) where 'input_dim' is the full
                dimension of the data, e.g., since MNIST images are 28x28, it
                will be 784.  Values should be of type np.float32, and the data
                should be normalized to have a minimum value of 0.0 and a
                maximum value of 1.0.

            y (numpy.ndarray[dypte=np.uint8]): 1D numpy array containing the
                labels of the examples.  Values should be of type np.uint8 and
                for MNIST will contain the values 0-9.
    """
    ### BEGIN YOUR CODE
    if  "train" in image_filename:
        assert "train-labels" in label_filename, "Please pass labels for train images only."
        images = parse_images(image_filename)
        labels = parse_labels(label_filename)
        train_images = np.array(images, dtype=np.float32)
        # normalize the images between 0 and 1
        #train_images = train_images/np.linalg.norm(train_images)
        train_labels = np.array(labels, dtype=np.uint8)
        return (train_images, train_labels)
    
    if  "test" in image_filename:
        assert "t10k-labels" in label_filename, "Please pass labels for test images only."
        images = parse_images(image_filename)
        labels = parse_labels(label_filename)
        #test_images = np.array(images, dtype=np.float32)
        # normalize the images between 0 and 1
        test_images = test_images/np.linalg.norm(test_images)
        test_labels = np.array(labels, dtype=np.uint8)
        return (test_images, test_labels)

In [2]:
X, y = simple_ml.parse_mnist("data/train-images-idx3-ubyte.gz",
            "data/train-labels-idx1-ubyte.gz")

In [3]:
X.shape, y.shape

((60000, 784), (60000,))

In [4]:
Z = np.zeros((y.shape[0], 10))

In [None]:
# Z = np.random.randn(y.shape[0], 10)

# Z.shape, y.shape

# (np.argmax(Z, axis=1)==y).shape

# np.argmax(Z, axis=1).shape, np.max(y)

# np.mean(np.log(np.sum(np.exp(Z),axis=1)))

# np.mean(np.argmax(Z, axis=1)==y)

# np.mean(np.log(np.sum(np.exp(Z),axis=1))) 

# loss = np.mean(np.log(np.sum(np.exp(Z),axis=1)) - ((np.argmax(Z, axis=1)+1==y+1)))

# loss

# np.mean(loss, dtype=np.float32)

# e_x = np.exp(Z - np.max(Z))
# softmax = np.log(e_x / e_x.sum())

# np.mean(np.mean(softmax) - (np.argmax(Z, axis=1)==y))

# Z = np.random.randn(y.shape[0], 10)

# np.mean(np.log(np.sum(np.exp(Z),axis=1)) - (np.argmax(Z, axis=1)==y))

# np.argmax(Z, axis=1).shape, y.shape

# type(y), type(np.argmax(Z,axis=0))

# np.mean(np.log(np.sum(np.exp(Z),axis=1)) - np.equal(np.argmax(Z, axis=1),y))

# np.sum(Z,axis=1).shape

# np.argmax(Z[0:10],axis=1)

# max_Z = np.max(Z)
# max_Z

# exp_Z = np.exp(Z - max_Z)
# exp_Z.shape

# sum_exp_Z = np.sum(exp_Z,axis=0)
# log_sum_exp_Z = np.log(sum_exp_Z)
# max_plus_log_sum_exp_Z = max_Z + log_sum_exp_Z
# max_plus_log_sum_exp_Z.shape

# Z.max()

# max_plus_log_sum_exp_Z

# log_probs = Z - max_plus_log_sum_exp_Z

# log_probs

# np.mean(np.log(np.sum(np.exp(Z),axis=1)) - (np.argmax(Z, axis=1)==y))

In [None]:
def softmax(X):
    exps = np.exp(X)
    return exps / np.sum(exps)

In [None]:
def stable_softmax(X):
    exps = np.exp(X - np.max(X))
    return exps / np.sum(exps)

In [None]:
m = y.shape[0]
p = softmax(Z)
# We use multidimensional array indexing to extract 
# softmax probability of the correct label for each sample.
# Refer to https://docs.scipy.org/doc/numpy/user/basics.indexing.html#indexing-multi-dimensional-arrays for understanding multidimensional array indexing.
log_likelihood = -np.log(p[range(m),y])
loss = np.sum(log_likelihood) / m

In [None]:
loss

In [None]:
#nll_loss(log_softmax(input, 1), target, weight, size_average, ignore_index, reduce)

# Exploring the softmax loss function

In [38]:
#Reference:
#https://stackoverflow.com/questions/70202761/manually-computing-cross-entropy-loss-in-pytorch
class CrossEntropyLossManual:
    """
    Z is the vector with shape (batch_size,C)
    y shape is the same (batch_size), whose entries are integers from 0 to C-1
    """
    def __init__(self) -> None:
        pass
    def __call__(self, Z, y):
        loss = 0.
        n_batch, n_class = Z.shape
        #numerator, denominator, softmax, log_softmax = [],[],[],[]
        #print(n_batch,n_class)
        for z1, y1 in zip(Z, y):
            #print(z1,y1)
            class_index = int(y1.item())
            # numerator.append(torch.exp(z1[class_index]))
            # denominator.append(torch.exp(z1).sum())
            # softmax.append(torch.exp(z1[class_index])/(torch.exp(z1).sum()))
            # log_softmax.append(torch.log(torch.exp(z1[class_index])/(torch.exp(z1).sum())))
            loss = loss + torch.log(torch.exp(z1[class_index])/(torch.exp(z1).sum()))
        # print(f"Numerator calcualted by loss_manual is {numerator}")
        # print(f"Denominator calcualted by loss_manual is {denominator}")
        # print(f"Softmax calcualted by loss_manual is {softmax}")
        # print(f"Log-Softmax calcualted by loss_manual is {log_softmax}")
        # print(f"Loss before average by loss_manual is {loss}")
        loss = - loss/n_batch
        return loss

In [39]:
def loss_numpy(input, target):
    numerator = np.exp(input[range(input.shape[0]),target])
    #print(f"Numerator calcualted by loss_numpy is {numerator}")
    denominator = np.sum(np.exp(input), axis=1)
    #print(f"Denominator calcualted by loss_numpy is {denominator}")
    softmax = numerator/denominator
    #print(f"Softmax calcualted by loss_numpy is {softmax}")
    log_softmax = np.log(softmax)
    #print(f"Log-Softmax calcualted by loss_numpy is {log_softmax}")
    loss = np.mean(log_softmax)
    return loss

In [None]:
np.mean(np.log(np.exp(input[range(input.shape[0]),target])/np.sum(np.exp(input), axis=1)))

In [40]:
input = torch.rand(3, 5)
target = torch.empty(3, dtype = torch.long).random_(5)
print(input.shape,target.shape)
input_np = input.numpy()
target_np = target.numpy()

torch.Size([3, 5]) torch.Size([3])


In [41]:
loss_manual = CrossEntropyLossManual()
output_manual = loss_manual(input, target)
output_numpy = loss_numpy(input_np,target_np)
print('Cross Entropy Loss via manual: \n', output_manual)
print('Cross Entropy Loss via numpy: \n', output_numpy)

Cross Entropy Loss via manual: 
 tensor(1.7306)
Cross Entropy Loss via numpy: 
 -1.7306296


In [42]:
loss_manual = CrossEntropyLossManual()
loss_pytorch = nn.CrossEntropyLoss()
output_pytorch = loss_pytorch(input, target)
output_manual = loss_manual(input, target)
output_numpy = loss_numpy(input_np,target_np)
print('input:\n ', input)
print('target:\n ', target)
print('Cross Entropy Loss via pytorch: \n', output_pytorch)
print('Cross Entropy Loss via manual: \n', output_manual)
print('Cross Entropy Loss via numpy: \n', output_numpy)

input:
  tensor([[0.4009, 0.5387, 0.2455, 0.1033, 0.6917],
        [0.5332, 0.1734, 0.7952, 0.8132, 0.7162],
        [0.2048, 0.5878, 0.9979, 0.6212, 0.8195]])
target:
  tensor([2, 0, 1])
Cross Entropy Loss via pytorch: 
 tensor(1.7306)
Cross Entropy Loss via manual: 
 tensor(1.7306)
Cross Entropy Loss via numpy: 
 -1.7306296


In [43]:
Z_tensor = torch.from_numpy(Z)
y_tensor = torch.from_numpy(y)
print(Z.shape, y.shape)
output_pytorch = loss_pytorch(Z_tensor, y_tensor)
output_manual = loss_manual(Z_tensor, y_tensor)
output_numpy = loss_numpy(Z,y)
# print('input:\n ', input)
# print('target:\n ', target)
print('Cross Entropy Loss via pytorch: \n', output_pytorch)
print('Cross Entropy Loss via manual: \n', output_manual)
print('Cross Entropy Loss via numpy: \n', output_numpy)

(60000, 10) (60000,)
Cross Entropy Loss via pytorch: 
 tensor(2.3026, dtype=torch.float64)
Cross Entropy Loss via manual: 
 tensor(2.3026, dtype=torch.float64)
Cross Entropy Loss via numpy: 
 -2.3025850929940455


In [None]:
print(calc_loss(Z,y))

In [None]:
# Z = torch.from_numpy(Z)
# y = torch.from_numpy(y)
loss(Z,y)

### Exploring numpy functionalities

In [12]:
a = np.random.randint(10,size=5)
b = np.random.randint(1,3,size=5)
np.log(np.exp(a)/b)

array([6.30685282, 2.        , 0.30685282, 0.30685282, 3.        ])

In [10]:
a

1

In [9]:
np.exp(a)

2.718281828459045

In [17]:
input = torch.rand(3, 10) # represents 3 examples, each with 10 features
target = torch.empty(3, dtype = torch.long).random_(4) #represents 4 classes in the data
print(input.shape,target.shape)
input_np = input.numpy()
target_np = target.numpy()

torch.Size([3, 10]) torch.Size([3])


In [21]:
target_np

array([1, 3, 0])

In [22]:
np.exp(target_np)

array([ 2.71828183, 20.08553692,  1.        ])

In [23]:
np.sum(np.exp(target_np))

23.803818751646713

In [26]:
log_softmax = target_np - np.log(np.sum(np.exp(target_np)))

In [27]:
log_softmax

array([-2.16984602, -0.16984602, -3.16984602])

\begin{equation}
\ell_{\mathrm{softmax}}(z, y) = \log\sum_{i=1}^k \exp z_i - z_y.
\end{equation}

In [31]:
# Assume we have 5 examples, 2 features/input-size, 3 classes
# z is the number of examples x prob. of each class i.e. 5 x 3 here
# This is the output of the batch of the neural network
z = np.random.rand(5,3)
z

array([[0.22063055, 0.71128048, 0.10020596],
       [0.97143822, 0.59815188, 0.13750278],
       [0.79228972, 0.02759386, 0.83471661],
       [0.59853255, 0.78788152, 0.36687649],
       [0.58836817, 0.9153712 , 0.35075371]])

In [33]:
# Labels is the label for each example in the batch i.e. 5x1
labels = np.random.randint(0,3, size=5)
labels, labels.shape

(array([2, 0, 1, 2, 0]), (5,))

In [36]:
exp = np.exp(z) # 5 x 3
sum_exp = np.sum(exp, axis=1) # 5 x 1
log_sum_exp = np.log(sum_exp) # 5 x 1
log_sum_exp, log_sum_exp.shape

(array([1.47906921, 1.7241764 , 1.71210035, 1.69770518, 1.74377417]), (5,))

In [38]:
#multi-dimensional indexing z is of shape (n_examples, n_classes) i.e. 5x3
# Selecting the probability of first,second example being of Class 1,2
z[[0,1],[1,2]]

array([0.71128048, 0.13750278])

In [40]:
# Select the probability predicted by the NN (stored in Z (5x3))
# as opposed to actual label of the example (stored in y (5x1))
# Use multi-dimensional indexing in numpy for brevity
z_y = z[list(range(5)), labels] # 5 x 1
z_y,z_y.shape

(array([0.10020596, 0.97143822, 0.02759386, 0.36687649, 0.58836817]), (5,))

In [42]:
log_softmax = log_sum_exp - z_y # 5 x 1
avg_log_softmax = - np.mean(log_softmax)
print(log_softmax,log_softmax.shape)
print(avg_log_softmax)

[1.37886325 0.75273818 1.68450649 1.33082869 1.155406  ] (5,)
-1.2604685212531055


In [43]:
# check with reference solution
- np.mean(np.log(np.exp(z[range(z.shape[0]),labels])/np.sum(np.exp(z), axis=1)))

1.2604685212531055

In [45]:
# One liner solution 
- np.mean(np.log(np.sum(np.exp(z),axis=1)) - z[range(z.shape[0]),labels])

-1.2604685212531055

# Exploring Logistic Regression

In [2]:
# Assume we have 5 examples, 10 features/input-size, 3 classes
# z is the number of examples x prob. of each class i.e. 5 x 3 here
# This is the output of the batch of the neural network
X = np.random.rand(5,10)
y = np.random.randint(0,3,size=5)
batch = 2
theta = np.random.rand(10,3) # weights of size (input_dimensions x n_classes)
lr = 0.01

In [3]:
def softmax_loss(Z, y):
    """ Return softmax loss.  Note that for the purposes of this assignment,
    you don't need to worry about "nicely" scaling the numerical properties
    of the log-sum-exp computation, but can just compute this directly.

    Args:
        Z (np.ndarray[np.float32]): 2D numpy array of shape
            (batch_size, num_classes), containing the logit predictions for
            each class.
        y (np.ndarray[np.int8]): 1D numpy array of shape (batch_size, )
            containing the true label of each example.

    Returns:
        Average softmax loss over the sample.
    """
    # Reference https://stackoverflow.com/questions/70202761/manually-computing-cross-entropy-loss-in-pytorch
    ### BEGIN YOUR CODE
    # simple log softmax 
    # return - np.mean(np.log(np.exp(Z[range(Z.shape[0]),y])/np.sum(np.exp(Z), axis=1)))
    # stable log-softmax without the need of division (so no divide by zero error)
    return np.mean(np.log(np.sum(np.exp(Z),axis=1)) - Z[range(Z.shape[0]),y])

In [4]:
# Using array indexing to divide X into equally sized batches
def softmax_regression_epoch(X,y,theta,lr=1.0,batch=50):
    if (X.shape[0]%batch):
        n_iterations = X.shape[0]//batch + 1
    else: 
        n_iterations = X.shape[0]//batch

    for i in range(n_iterations):
        mini_batch = X[i*batch: i*batch + batch] # Take the batch out, batch_size x number of features
        mini_batch_targets = y[i*batch: i*batch + batch]
        output = np.matmul(mini_batch, theta) # batch_size x n_classes
        z = np.exp(output)/np.sum(np.exp(output),axis=0) # normalize wrt rows, batch_size x n_classes
        a = np.zeros((mini_batch_targets.size,y.max()+1))
        a[range(mini_batch_targets.size),mini_batch_targets] = 1 # one-hot encoding; batch_size x n_classes
        b = z - a
        temp = np.matmul(mini_batch.T, b) 
        theta -= lr * temp # modifying in-place, n_features x n_classes

In [7]:
np.random.seed(0)
X = np.random.randn(50,5).astype(np.float32)
y = np.random.randint(3, size=(50,)).astype(np.uint8)
Theta = np.zeros((5,3), dtype=np.float32)
dTheta = -nd.Gradient(lambda Th : softmax_loss(X@Th.reshape(5,3),y))(Theta)
softmax_regression_epoch(X,y,Theta,lr=1.0,batch=50)
print(Theta)
print(dTheta.reshape(5,3))
np.testing.assert_allclose(dTheta.reshape(5,3), Theta, rtol=1e-4, atol=1e-4)

[[-3.0916562  -7.5612955   5.166902  ]
 [ 3.5317361  -1.8526123   5.4791226 ]
 [ 2.44782     0.62592995 -2.310427  ]
 [-3.9415877   2.3346987   1.1991554 ]
 [ 2.6594627   2.4838834   0.29733157]]
[[-0.02525946 -0.11465225  0.13991171]
 [ 0.02291308 -0.08477389  0.06186081]
 [ 0.04386758  0.00742978 -0.05129736]
 [-0.07611353  0.0494122   0.02670133]
 [ 0.01691807  0.01340648 -0.03032455]]


AssertionError: 
Not equal to tolerance rtol=0.0001, atol=0.0001

Mismatched elements: 15 / 15 (100%)
Max absolute difference: 7.44664326
Max relative difference: 1.10198901
 x: array([[-0.025259, -0.114652,  0.139912],
       [ 0.022913, -0.084774,  0.061861],
       [ 0.043868,  0.00743 , -0.051297],...
 y: array([[-3.091656, -7.561296,  5.166902],
       [ 3.531736, -1.852612,  5.479123],
       [ 2.44782 ,  0.62593 , -2.310427],...

In [62]:
test_batch = X[0:2]
print(test_batch.shape)
output = np.matmul(test_batch, theta)
print(output.shape)
np.exp(output)/

(2, 10)
(2, 3)


ValueError: operands could not be broadcast together with shapes (2,3) (2,) 

In [38]:
z = np.exp(test_batch)/np.sum(np.exp(test_batch),axis=0)

array([[0.46876614, 0.4288653 , 0.42054245, 0.50892158, 0.49846952,
        0.32217456, 0.58762212, 0.31426216, 0.64452526, 0.46206821],
       [0.53123386, 0.5711347 , 0.57945755, 0.49107842, 0.50153048,
        0.67782544, 0.41237788, 0.68573784, 0.35547474, 0.53793179]])

In [55]:
test_targets = y[0:2]
print(test_targets)
a = np.zeros((test_targets.size,3))
a[range(2),test_targets] = 1
a

[2 2]


array([[0., 0., 1.],
       [0., 0., 1.]])