In [1]:
# import autograd-wrapped numpy
import autograd.numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook
from matplotlib import rcParams
rcParams['figure.autolayout'] = True

%load_ext autoreload
%autoreload 2

# datapath to data
datapath = './superlearn_datasets/'

-----
some handy tools

In [2]:
from autograd import grad
from autograd import hessian
def newtons_method(g, max_its, w):
    gradient = grad(g)
    hess = hessian(g)
    epsilon = 1e-7
    
    weight_his = [w]
    cost_his = [g(w)]
    for k in range(max_its):
        grad_eval = gradient(w)
        hess_eval = hess(w)
        
        hess_eval.shape = (int(np.size(hess_eval)**0.5),
                           int(np.size(hess_eval)**0.5))
        A = hess_eval + epsilon*np.eye(w.size)
        b = grad_eval
        w = np.linalg.solve(A, np.dot(A,w)-b)
        weight_his.append(w)
        cost_his.append(g(w))
    return weight_his, cost_his

def gradient_descent(g, max_its, w):
    gradient = grad(g)
    weight_his = [w]
    cost_his = [g(w)]
    for k in range(max_its):
        grad_eval = gradient(w)
        w = w - grad_eval
        weight_his.append(w)
        cost_his.append(g(w))
    return weight_his, cost_his

-----

# 7.2

In [3]:
# load in dataset
csvname = datapath + '4class_data.csv'
data = np.loadtxt(csvname,delimiter = ',');
x = data[:-1,:]
y = data[-1:,:] 

l = 1e-3

def model(x, w):
    a = w[0] + np.dot(x.T,w[1:])
    return a.T

def fusion_rule(w):
    return np.argmax(model(x, w), axis = 0)

def softmax(w):
    cost = np.sum(np.log(1+np.exp(-y*model(x, w))))
    return cost/float(np.size(y))

w = 0.1*np.random.randn(3,4)

for j in range(4):
    _y = y
    y = np.apply_along_axis(lambda x : 1.0 if x == j else -1.0, 0, y)
    weight_his, _ = newtons_method(softmax, 1000, w[:, j])
    w[:, j] = weight_his[-1]
    y = _y

print(y, fusion_rule(w), sep = '\n')
print(f"There are {np.sum(y.squeeze() != fusion_rule(w))} mismatches in total")

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 2. 2. 2. 2.
  2. 2. 2. 2. 2. 2. 3. 3. 3. 3. 3. 3. 3. 3. 3. 3.]]
[0 0 0 0 0 0 1 2 0 0 0 1 2 1 1 1 1 1 3 1 0 1 2 2 2 2 2 2 2 3 2 1 3 3 3 3 3
 3 3 3]
There are 10 mismatches in total


# 7.3

In [4]:
# load in dataset
data = np.loadtxt(datapath + '3class_data.csv',delimiter = ',')

# get input/output pairs
x = data[:-1,:]
y = data[-1:,:]

lam = 10 ** -5
w = 0.1*np.random.randn(3,3)

def model(x, w):
    a = w[0] + np.dot(x.T,w[1:])
    return a.T

def multiclass_perceptron(w):
    all_evals = model(x, w)
    a = np.max(all_evals, axis = 0)
    b = all_evals[y.astype(int).flatten(), np.arange(np.size(y))]
    cost = np.sum(a - b)
    
    cost = cost + lam * np.linalg.norm(w[1:, :], 'fro')**2
    
    return cost/float(np.size(y))

weight_his, _ = gradient_descent(multiclass_perceptron, 1000, w)
w = weight_his[-1]
print(y, np.argmax(model(x, w), axis=0), sep='\n')
print(f"There are {np.sum(y.squeeze() != np.argmax(model(x, w), axis=0))} mismatches in total")

[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 2. 2. 2. 2.
  2. 2. 2. 2. 2. 2.]]
[0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2]
There are 0 mismatches in total


# 7.4

$$
\begin{align}
  g(w_0, w_1) &=  \frac{1}{P}\sum_{p=1}^{P}\max(0, \mathring{\boldsymbol{x_p}}^T(\boldsymbol{w}_0-\boldsymbol{w}_{1}), \mathring{\boldsymbol{x_p}}^T(\boldsymbol{w}_1-\boldsymbol{w}_{0}))\\
              &= \frac{1}{P}\sum_{p=1}^P (0, \mathring{\boldsymbol{x_p}}^T |\boldsymbol{w_1}-\boldsymbol{w_0}|)
\end{align}
$$

If we replace the $|\boldsymbol{w_1}-\boldsymbol{w_0}|$ with $-\boldsymbol{w}$, then it's changed into

$$
\frac{1}{P}\sum_{p=1}^P (0, -\mathring{\boldsymbol{x_p}}^Tw)
$$

# 7.8

# 9.2

In [21]:
from sklearn.datasets import fetch_openml
from skimage.feature import hog
from skimage import data, exposure
# import MNIST
x, y = fetch_openml('mnist_784', version=1, return_X_y=True)

# re-shape input/output data
x = np.array(x.T)
y = np.array([int(v) for v in y])[np.newaxis,:]

for i in range(x.shape[1]):
    x[:,i] = x[:,i] / np.linalg.norm(x[:,i])

print(x.shape)
print(y.shape)

  warn(


(784, 70000)
(1, 70000)


prepare for the data:

In [26]:
import random

def get_train_set(x, y):
    return x[:, 0:50000], y[:, 0:50000]

def get_test_set(x, y):
    return x[:, 50000:], y[:, 50000:]


feature design:

In [44]:
def convert_edges(x):
    x = x.T
    features = []
    for i, image in enumerate(x):
        image = image.reshape((28, 28))
        _, image = hog(
            image,
            orientations=8,
            pixels_per_cell=(1, 1),
            cells_per_block=(1, 1),
            visualize=True,
        )
        image = exposure.rescale_intensity(image, in_range=(0, 10))
        features.append(image)
    features = np.array(features)
    features = features.reshape((50000, -1))
    return features.T

#print(x.shape)
#print(convert_edges(get_features(train_set)).shape)
features, targets = get_train_set(x, y)
print(features.shape, targets.shape)
features = convert_edges(features)
print(features.shape, targets.shape)

In [None]:
w = 0.1*np.random.rand(785, 10)
# histogram = x
# targets = y

print(targets)
print(f"{np.sum(targets.squeeze() != np.argmax(model(features, w), axis=0))} miss")

def train_with_feature():
    batch_size = 200
    epoch = 20
    def train_mini_batch(batch_size, epoch, w):
        weight = []
        cost = []
        from tqdm import tqdm
        for i in range(epoch):
            assert(targets.shape[1] == features.shape[1])
            print(f"running epoch {i}")
            feature_list = np.split(features, features.shape[1]/batch_size, axis=1)
            target_list = np.split(targets, targets.shape[1]/batch_size, axis=1)
            for x, y in tqdm(zip(feature_list, target_list)):
                def multiclass_softmax(w):  
                    all_evals = model(x, w)
                    #print(x.shape, y.shape, w.shape)
                    b = all_evals[y.astype(int).flatten(), np.arange(np.size(y))]  
                    a = (all_evals - b)
                    a = np.exp(a)   
                    cost = a.sum(axis=0)
                    cost = np.log(1 + cost)
                    return np.sum(cost) / float(np.size(y))
                weight_his, cost_his = gradient_descent(multiclass_softmax, 1, w)
                w = weight_his[-1]
            weight.append(w)
            cost.append(cost_his[-1])
            print(f"cost is {cost[-1]}")
            print(f"{np.sum(targets.squeeze() != np.argmax(model(features, w), axis=0))} miss")
        return weight, cost
    return train_mini_batch(batch_size, epoch, w)

weight, cost = train_with_feature()

(784, 50000) (1, 50000)


In [13]:
#print(weight)
features = get_features(test_set)
histogram = convert_edges(features)
targets = get_target(test_set)
def test_with_feature(w):
#     print(f"There are {np.sum(y.squeeze() != np.argmax(model(x, weight[-1]), axis=0))} mismatch in total")
    return np.sum(y.squeeze() != np.argmax(model(x, w), axis=0))
#print(weight[-1].shape)
mismatches = []
for w in weight:
    mismatch = test_with_feature(w)
    mismatches.append(mismatch)

plot the cost and mismatch

In [14]:
print(test_with_feature(w))

6793


In [15]:
plt.subplot(211)
plt.plot(cost)
plt.subplot(212)
plt.plot(mismatches)

<IPython.core.display.Javascript object>

[<matplotlib.lines.Line2D at 0x125972a10>]

In [12]:
def train_without_feature():
    batch_size = 200
    epoch = 20
    
    def train_mini_batch(batch_size, epoch, w):
        weight = []
        cost = []
        from tqdm import tqdm
        for i in range(epoch):
            assert(targets.shape[1] == features.shape[1])
            print(f"running epoch {i}")
            features = np.split(features, features.shape[1]/batch_size, axis=1)
            targets = np.split(targets, targets.shape[1]/batch_size, axis=1)
            for x, y in tqdm(zip(features, targets)):
                weight_his, cost_his = gradient_descent(multiclass_softmax, 1, w)
                weight += weight_his
                cost += cost_his  
                w = weight[-1]
            print(f"cost is {cost[-1]}")
        return weight, cost
    return train_mini_batch(batch_size, epoch, w)

weight_, cost_ = train_without_feature()

UnboundLocalError: cannot access local variable 'targets' where it is not associated with a value

In [None]:
def test_without_feature(w):
    x = get_features(test_set)
    y = get_target(test_set)
    #print(f"There are {np.sum(y.squeeze() != np.argmax(model(x, w), axis=0))} mismatch in total")
    return np.sum(y.squeeze() != np.argmax(model(x, w), axis=0))
#print(weight[-1].shape)
mismatches = []
for w in weight:
    mismatch = test_with_feature(w)
    mismatches.append(mismatch)
plt.subplot(pos=0, cost)
plt.subplot(pos=1, mismatches)

## the mini batch version

## full batch verision