In [309]:
#imports
import numpy as np
import torch as tr
from scipy.stats import bernoulli
import matplotlib.pyplot as plt
import sklearn.svm
from cvxopt.solvers import qp
from cvxopt import matrix as cvxmatrix

In [307]:
# provided stub

class neural_network():
    def __init__(self, layers=[2, 100, 2], scale=.1, p=.1, lr=.1, lam=.1):
        super().__init__()
        self.weights = tr.nn.ParameterList([tr.nn.Parameter(scale * tr.randn(m, n)) for m, n in zip(layers[:-1], layers[1:])])
        self.biases = tr.nn.ParameterList([tr.nn.Parameter(scale * tr.randn(n)) for n in layers[1:]])
        self.parameters = list(self.weights) + list(self.biases)

        self.p = p
        self.lr = lr
        self.lam = lam
        self.train = False

    def relu(self, X, W, b):
        # algorithm 15, pg 46 from guide.pdf
        if self.train:
            delta = bernoulli.rvs(1 - self.p,
                                  size=W.shape[1])  # sample 'out' many samples from Bernoulli distribution B(1-p)
            Z = tr.from_numpy(delta) * tr.max(tr.zeros(X.shape[0], W.shape[1]), tr.mm(X, W) + b)

        else:
            Z = tr.max(tr.zeros(X.shape[0], W.shape[1]), (1 - self.p) * tr.mm(X, W) + b)

        return Z

    def softmax(self, Z, W, b):
        # algorithm 16, pg 46 from guide.pdf
        Z = tr.mm(Z, W) + b
        y_hat = tr.div(tr.exp(Z).T, tr.sum(tr.exp(Z), dim=1)).T

        return y_hat

    def forward(self, X):
        # algorithm 14, pg 45 from guide.pdf
        X = tr.tensor(X, dtype=tr.float)
        Z = X
        # apply ReLU to all layers but the last
        for w, b in zip(self.weights[:len(self.weights) - 1],
                        self.biases[:len(self.biases) - 1]):  # iterate through L-1 layers
            Z = self.relu(Z, w, b)
        # apply softmax to last layer
        y_hat = self.softmax(Z, self.weights[len(self.weights) - 1], self.biases[len(self.biases) - 1])

        return y_hat

    def predict(self, X):
        return self.forward(X).detach().numpy()

    def loss(self, ypred, ytrue):
        # compute cross entropy loss according to pg47 from guide.pdf
        loss = (-1 / ytrue.shape[0]) * tr.sum(ytrue * tr.log(ypred))

        return loss

    def fit(self, X, y, nsteps=1000, bs=100, plot=False):
        X, y = tr.tensor(X), tr.tensor(y)
        optimizer = tr.optim.SGD(self.parameters, lr=self.lr, weight_decay=self.lam)

        I = tr.randperm(X.shape[0])
        n = int(np.ceil(.1 * X.shape[0]))
        Xtrain, ytrain = X[I[:n]], y[I[:n]]
        Xval, yval = X[I[n:]], y[I[n:]]

        Ltrain, Lval, Aval = [], [], []
        for i in range(nsteps):
            optimizer.zero_grad()
            I = tr.randperm(Xtrain.shape[0])[:bs]
            self.train = True
            output = self.loss(self.forward(Xtrain[I]), ytrain[I])
            self.train = False
            Ltrain += [output.item()]
            output.backward()
            optimizer.step()

            outval = self.forward(Xval)
            Lval += [self.loss(outval, yval).item()]
            Aval += [np.array(outval.argmax(-1) == yval.argmax(-1)).mean()]

        if plot:
            plt.plot(range(nsteps), Ltrain, label='Training loss')
            plt.plot(range(nsteps), Lval, label='Validation loss')
            plt.plot(range(nsteps), Aval, label='Validation acc')
            plt.legend()
            plt.show()

In [11]:
#from ps4_tests.py
m = torch.nn.ReLU()
inp = torch.randn(2)
print(inp)
output = m(inp)
print(output)

tensor([-0.5478, -1.0110])
tensor([0., 0.])


In [287]:
#from ps4_tests.py

X = torch.tensor([[1, 1], [0, 0]], dtype=torch.float)
y = torch.tensor([[0, 1], [1, 0]], dtype=torch.int)
W = torch.tensor([[1, .2], [.5, 1]], dtype=torch.float)
b = torch.tensor([-1, -1], dtype=torch.float)
#m = neural_network(layers=[2,2,2], p=0, lam=0)

#m.fit(X, y, nsteps=1, bs=1, plot=False)

relu_out = torch.tensor([[.5,.2],[0, 0]])
#self.assertTrue(np.allclose(m.relu(X, W, b), relu_out), msg='neural_network: Error. ReLU output not correct')
print(relu_out)

tensor([[0.5000, 0.2000],
        [0.0000, 0.0000]])


In [288]:
#testing relu
def relu(p, X, W, b):
    # YOUR CODE HERE!
    #algorithm 15, pg 46 from guide.pdf
    if train:
        delta = bernoulli.rvs(1-p, size=W.shape[1]) #sample 'out' many samples from Bernoulli distribution B(1-p) 
        Z = torch.from_numpy(delta) * torch.max(torch.zeros(X.shape[0],W.shape[1]),torch.mm(X,W)+b)
        #Z = delta.detach().numpy() * np.maximum(0,torch.mm(X,W)+b)
        
    else:
        Z = torch.max(torch.zeros(X.shape[0],W.shape[1]),(1-p)*torch.mm(X,W)+b)
            
    return Z

In [289]:
Z = relu(p, X, W, b)
print(Z)

tensor([[0.5000, 0.2000],
        [0.0000, 0.0000]])


In [290]:
print(Z.T)

tensor([[0.5000, 0.0000],
        [0.2000, 0.0000]])


In [291]:
#testing softmax
def softmax(Z, W, b):
    # algorithm 16, pg 46 from guide.pdf
    Z = torch.mm(Z,W)+b
    y_hat = torch.div(torch.exp(Z).T,torch.sum(torch.exp(Z),dim=1)).T #can't specify division dim so double transpose to get desired behaviour
    return y_hat

In [292]:
y_hat = softmax(Z, W, b)

In [293]:
y_hat

tensor([[0.5744, 0.4256],
        [0.5000, 0.5000]])

In [295]:
layers = [2,100,2]
scale = .1
weights = torch.nn.ParameterList([torch.nn.Parameter(scale*torch.randn(m, n)) for m, n in zip(layers[:-1], layers[1:])])
biases = torch.nn.ParameterList([torch.nn.Parameter(scale*torch.randn(n)) for n in layers[1:]])

In [296]:
print(weights[:2])

ParameterList(
    (0): Parameter containing: [torch.FloatTensor of size 2x100]
    (1): Parameter containing: [torch.FloatTensor of size 100x2]
)


In [297]:
#testing forward
def forward(weights,biases, X):
    # algorithm 14, pg 45 from guide.pdf
    X = torch.tensor(X, dtype=torch.float)
    Z = X
    #apply ReLU to all layers but the last
    for w, b in zip(weights[:len(weights)-1], biases[:len(biases)-1]): #iterate through L-1 layers
        Z = relu(p, Z, w, b)
    #apply softmax to last layer
    y_hat = softmax(Z, weights[len(weights)-1], biases[len(biases)-1])
    
    return y_hat

In [299]:
y_hat=forward(weights,biases,X)

  This is separate from the ipykernel package so we can avoid doing imports until


In [304]:
def loss(self, ypred, ytrue):
    # compute cross entropy loss according to pg47 from guide.pdf
    loss = (-1/ytrue.shape[0])*torch.sum(ytrue*torch.log(ypred))
    
    return loss