In [1]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

%load_ext autoreload
%autoreload 2

In [2]:
from dataset import load_svhn, random_split_train_val
from gradient_check import check_gradient
from metrics import multiclass_accuracy 
import linear_classifer

In [3]:
def prepare_for_linear_classifier(train_X, test_X):
    train_flat = train_X.reshape(train_X.shape[0], -1).astype(np.float) / 255.0
    test_flat = test_X.reshape(test_X.shape[0], -1).astype(np.float) / 255.0
    
    # Subtract mean
    mean_image = np.mean(train_flat, axis = 0)
    train_flat -= mean_image
    test_flat -= mean_image
    
    # Add another channel with ones as a bias term
    train_flat_with_ones = np.hstack([train_flat, np.ones((train_X.shape[0], 1))])
    test_flat_with_ones = np.hstack([test_flat, np.ones((test_X.shape[0], 1))])    
    return train_flat_with_ones, test_flat_with_ones
    
train_X, train_y, test_X, test_y = load_svhn("../data", max_train=10000, max_test=1000)    
train_X, test_X = prepare_for_linear_classifier(train_X, test_X)
# Split train into train and val
train_X, train_y, val_X, val_y = random_split_train_val(train_X, train_y, num_val = 1000)

In [4]:
def square(x):
    return float(x*x), 2*x

check_gradient(square, np.array([3.0]))

def array_sum(x):
    assert x.shape == (2,), x.shape
    return np.sum(x), np.ones_like(x)

check_gradient(array_sum, np.array([3.0, 2.0]))

def array_2d_sum(x):
    assert x.shape == (2,2)
    return np.sum(x), np.ones_like(x)

check_gradient(array_2d_sum, np.array([[3.0, 2.0], [1.0, 0.0]]))

Gradient check passed!
Gradient check passed!
Gradient check passed!


True

In [5]:
# TODO Implement softmax and cross-entropy for single sample
probs = linear_classifer.softmax(np.array([-10, 0, 10]))

# Make sure it works for big numbers too!
probs = linear_classifer.softmax(np.array([1000, 0, 0]))
assert np.isclose(probs[0], 1.0)

In [6]:
probs = linear_classifer.softmax(np.array([-5, 0, 5]))
linear_classifer.cross_entropy_loss(probs, 1)

5.006760443547122

In [7]:
# TODO Implement combined function or softmax and cross entropy and produces gradient
loss, grad = linear_classifer.softmax_with_cross_entropy(np.array([1, 0, 0]), 1)
check_gradient(lambda x: linear_classifer.softmax_with_cross_entropy(x, 1), np.array([1, 0, 0], np.float))

Gradient check passed!


True

In [8]:
# TODO Extend combined function so it can receive a 2d array with batch of samples
np.random.seed(42)
# Test batch_size = 1
num_classes = 4
batch_size = 1
predictions = np.random.randint(-1, 3, size=(batch_size, num_classes)).astype(np.float)
target_index = np.random.randint(0, num_classes, size=(batch_size, 1)).astype(np.int)
check_gradient(lambda x: linear_classifer.softmax_with_cross_entropy(x, target_index), predictions)

# Test batch_size = 3
num_classes = 4
batch_size = 3
predictions = np.random.randint(-1, 3, size=(batch_size, num_classes)).astype(np.float)
target_index = np.random.randint(0, num_classes, size=(batch_size, 1)).astype(np.int)
check_gradient(lambda x: linear_classifer.softmax_with_cross_entropy(x, target_index), predictions)

# Make sure maximum subtraction for numberic stability is done separately for every sample in the batch
probs = linear_classifer.softmax(np.array([[20,0,0], [1000, 0, 0]]))
assert np.all(np.isclose(probs[:, 0], 1.0))

Gradient check passed!
Gradient check passed!


In [9]:
# TODO Implement linear_softmax function that uses softmax with cross-entropy for linear classifier
batch_size = 2
num_classes = 2
num_features = 3
np.random.seed(42)
W = np.random.randint(-1, 3, size=(num_features, num_classes)).astype(np.float)
X = np.random.randint(-1, 3, size=(batch_size, num_features)).astype(np.float)
target_index = np.ones(batch_size, dtype=np.int)

loss, dW = linear_classifer.linear_softmax(X, W, target_index)
check_gradient(lambda w: linear_classifer.linear_softmax(X, w, target_index), W)

--------------------
[[-1. -1.  1.]
 [ 0.  1.  1.]]
[[ 1.  2.]
 [-1.  1.]
 [ 1.  2.]]
[[ 0.44039854 -0.44039854]
 [ 0.02371294 -0.02371294]]
[[ 0.44039854  0.02371294]
 [-0.44039854 -0.02371294]]
[[[-4.40398539e-01 -8.80797078e-01 -5.55111512e-17]
  [-4.40398539e-01 -8.80797078e-01 -5.55111512e-17]
  [-4.40398539e-01 -8.80797078e-01 -5.55111512e-17]]

 [[-2.37129366e-02 -4.74258732e-02  7.63278329e-17]
  [-2.37129366e-02 -4.74258732e-02  7.63278329e-17]
  [-2.37129366e-02 -4.74258732e-02  7.63278329e-17]]]
--------------------
--------------------
[[-1. -1.  1.]
 [ 0.  1.  1.]]
[[ 1.  2.]
 [-1.  1.]
 [ 1.  2.]]
[[ 0.44039854 -0.44039854]
 [ 0.02371294 -0.02371294]]
[[ 0.44039854  0.02371294]
 [-0.44039854 -0.02371294]]
[[[-4.40398539e-01 -8.80797078e-01 -5.55111512e-17]
  [-4.40398539e-01 -8.80797078e-01 -5.55111512e-17]
  [-4.40398539e-01 -8.80797078e-01 -5.55111512e-17]]

 [[-2.37129366e-02 -4.74258732e-02  7.63278329e-17]
  [-2.37129366e-02 -4.74258732e-02  7.63278329e-17]
  [-2.371

False

In [None]:
[[ 0.44039854 -0.44039854]
 [ 0.02371294 -0.02371294]]

-0.4403985389922482 0.4403985389922482 
-0.4166856024112597 0.4166856024112597 
0.4641114755732367 -0.4641114755732367

In [10]:
x1 = [[1,2,3],[4,5,6]]
x2 = [[5,6],[7,8]]
np.dot(x2,x1)

array([[29, 40, 51],
       [39, 54, 69]])

In [11]:
xx = np.arange(6).reshape(3,2)
print(xx)

yy = np.array([[0,0],[1,1],[2,2]])
print(xx[yy].shape)
# shape[0] - len(classes)
# shape[1] - len(features)
# shape[2] - len(samples)
xx[yy]

[[0 1]
 [2 3]
 [4 5]]
(3, 2, 2)


array([[[0, 1],
        [0, 1]],

       [[2, 3],
        [2, 3]],

       [[4, 5],
        [4, 5]]])

In [12]:
len_classes=3
len_feature=2
#len_samples=2
yy = list(range(len_classes))
yy = yy * len_feature
yy = np.sort(yy).reshape(-1, len_feature)
print(yy)

xx = np.arange(6).reshape(3,2)
print(xx)
xx[yy]

[[0 0]
 [1 1]
 [2 2]]
[[0 1]
 [2 3]
 [4 5]]


array([[[0, 1],
        [0, 1]],

       [[2, 3],
        [2, 3]],

       [[4, 5],
        [4, 5]]])

In [13]:
a = np.arange(4).reshape(2,2)
print(a)
b = np.arange(4).reshape(2,2)
print(b)
np.tensordot(a,b, axes=([1,0]))

[[0 1]
 [2 3]]
[[0 1]
 [2 3]]


array([[ 2,  3],
       [ 6, 11]])