In [1]:
from abc import ABCMeta, abstractmethod
import numpy as np
import scipy
import scipy.stats as stats
import tensorflow as tf
import time

import os
import math

import skimage as ski
import skimage.io

from im2col_cython import col2im_cython, im2col_cython

## 1. ZADATAK

Dovršite implementacije potpuno povezanog sloja, sloja nelinearnosti te funkcije gubitka u razredima FC, ReLU i SoftmaxCrossEntropyWithLogits.

## Layer implementations

In [2]:
zero_init = np.zeros

def variance_scaling_initializer(shape, fan_in, factor=2.0, seed=None):
    sigma = np.sqrt(factor / fan_in)
    return stats.truncnorm(-2, 2, loc=0, scale=sigma).rvs(shape)


# -- ABSTRACT CLASS DEFINITION --
class Layer(metaclass = ABCMeta):
    "Interface for layers"
    # See documentation of abstract base classes (ABC): https://docs.python.org/3/library/abc.html

    @abstractmethod
    def forward(self, inputs):
        """
        Args:
          inputs: ndarray tensor.
        Returns:
          ndarray tensor, result of the forward pass.
        """
        pass

    @abstractmethod
    def backward_inputs(self, grads):
        """
        Args:
          grads: gradient of the loss with respect to the output of the layer.
        Returns:
          Gradient of the loss with respect to the input of the layer.
        """
        pass

    def backward_params(self, grads):
        """
        Args:
          grads: gradient of the loss with respect to the output of the layer.
        Returns:
          Gradient of the loss with respect to all the parameters of the layer as a list
          [[w0, g0], ..., [wk, gk], self.name] where w are parameter weights and g their gradient.
          Note that wk and gk must have the same shape.
        """
        pass


# -- CONVOLUTION LAYER --
class Convolution(Layer):
    "N-dimensional convolution layer"

    def __init__(self, input_layer, num_filters, kernel_size, name, padding='SAME',
               weights_initializer_fn=variance_scaling_initializer,
               bias_initializer_fn=zero_init):
        self.input_shape = input_layer.shape
        N, C, H, W = input_layer.shape
        self.C = C
        self.N = N
        self.num_filters = num_filters
        self.kernel_size = kernel_size

        assert kernel_size % 2 == 1

        self.padding = padding

        if padding == 'SAME':
            # with zero padding
            self.shape = (N, num_filters, H, W)
            self.pad = (kernel_size - 1) // 2
        else:
            # without padding
            self.shape = (N, num_filters, H - kernel_size + 1, W - kernel_size + 1)
            self.pad = 0

        fan_in = C * kernel_size**2
        self.weights = weights_initializer_fn([num_filters, kernel_size**2 * C], fan_in)
        self.bias = bias_initializer_fn([num_filters])
        # this implementation doesn't support strided convolutions
        self.stride = 1
        self.name = name
        self.has_params = True

    def forward(self, x):
        k = self.kernel_size
        self.x_cols = im2col_cython(x, k, k, self.pad, self.stride)
        res = self.weights.dot(self.x_cols) + self.bias.reshape(-1, 1)
        N, C, H, W = x.shape
        out = res.reshape(self.num_filters, self.shape[2], self.shape[3], N)
        return out.transpose(3, 0, 1, 2)

    def backward_inputs(self, grad_out):
        # nice trick from CS231n, backward pass can be done with just matrix mul and col2im
        grad_out = grad_out.transpose(1, 2, 3, 0).reshape(self.num_filters, -1)
        grad_x_cols = self.weights.T.dot(grad_out)
        N, C, H, W = self.input_shape
        k = self.kernel_size
        grad_x = col2im_cython(grad_x_cols, N, C, H, W, k, k, self.pad, self.stride)
        return grad_x

    def backward_params(self, grad_out):
        grad_bias = np.sum(grad_out, axis=(0, 2, 3))
        grad_out = grad_out.transpose(1, 2, 3, 0).reshape(self.num_filters, -1)
        grad_weights = grad_out.dot(self.x_cols.T).reshape(self.weights.shape)
        return [[self.weights, grad_weights], [self.bias, grad_bias], self.name]


class MaxPooling(Layer):
    def __init__(self, input_layer, name, pool_size=2, stride=2):
        self.name = name
        self.input_shape = input_layer.shape
        N, C, H, W = self.input_shape
        self.stride = stride
        self.shape = (N, C, H // stride, W // stride)
        self.pool_size = pool_size
        assert pool_size == stride, 'Invalid pooling params'
        assert H % pool_size == 0
        assert W % pool_size == 0
        self.has_params = False

    def forward(self, x):
        N, C, H, W = x.shape
        self.input_shape = x.shape
        # with this clever reshaping we can implement pooling where pool_size == stride
        self.x = x.reshape(N, C, H // self.pool_size, self.pool_size,
                           W // self.pool_size, self.pool_size)
        self.out = self.x.max(axis=3).max(axis=4)
        # if you are returning class member be sure to return a copy
        return self.out.copy()

    def backward_inputs(self, grad_out):
        grad_x = np.zeros_like(self.x)
        out_newaxis = self.out[:, :, :, np.newaxis, :, np.newaxis]
        mask = (self.x == out_newaxis)
        dout_newaxis = grad_out[:, :, :, np.newaxis, :, np.newaxis]
        dout_broadcast, _ = np.broadcast_arrays(dout_newaxis, grad_x)
        # this is almost the same as the real backward pass
        grad_x[mask] = dout_broadcast[mask]
        # in the very rare case that more then one input have the same max value
        # we can aprox the real gradient routing by evenly distributing across multiple inputs
        # but in almost all cases this sum will be 1
        grad_x /= np.sum(mask, axis=(3, 5), keepdims=True)
        grad_x = grad_x.reshape(self.input_shape)
        return grad_x


class Flatten(Layer):
    def __init__(self, input_layer, name):
        self.input_shape = input_layer.shape
        self.N = self.input_shape[0]
        self.num_outputs = 1
        for i in range(1, len(self.input_shape)):
            self.num_outputs *= self.input_shape[i]
        self.shape = (self.N, self.num_outputs)
        self.has_params = False
        self.name = name

    def forward(self, inputs):
        self.input_shape = inputs.shape
        inputs_flat = inputs.reshape(self.input_shape[0], -1)
        self.shape = inputs_flat.shape
        return inputs_flat

    def backward_inputs(self, grads):
        return grads.reshape(self.input_shape)


class FC(Layer):
    def __init__(self, input_layer, num_outputs, name,
               weights_initializer_fn=variance_scaling_initializer,
               bias_initializer_fn=zero_init):
        """
        Args:
          input_layer: layer below
          num_outputs: number of neurons in this layer
          weights_initializer_fn: initializer function for weights,
          bias_initializer_fn: initializer function for biases
        """

        self.input_shape = input_layer.shape
        self.N = self.input_shape[0]
        self.shape = (self.N, num_outputs)
        self.num_outputs = num_outputs

        self.num_inputs = 1
        for i in range(1, len(self.input_shape)):
            self.num_inputs *= self.input_shape[i]

        self.weights = weights_initializer_fn([num_outputs, self.num_inputs], fan_in=self.num_inputs)
        self.bias = bias_initializer_fn([num_outputs])
        self.name = name
        self.has_params = True

    def forward(self, inputs):
        """
        Args:
          inputs: ndarray of shape (N, num_inputs)
        Returns:
          An ndarray of shape (N, num_outputs)
        """
        self.inputs = inputs
        return inputs.dot(self.weights.T) + self.bias

    def backward_inputs(self, grads):
        """
        Args:
          grads: ndarray of shape (N, num_outputs)
        Returns:
          An ndarray of shape (N, num_inputs)
        """
        return grads.dot(self.weights)

    def backward_params(self, grads):
        """
        Args:
          grads: ndarray of shape (N, num_outputs)
        Returns:
          List of params and gradient pairs.
        """
        grad_weights = grads.T.dot(self.inputs)
        grad_bias = grads.sum(axis = 0)
        return [[self.weights, grad_weights], [self.bias, grad_bias], self.name]



class ReLU(Layer):
    def __init__(self, input_layer, name):
        self.shape = input_layer.shape
        self.name = name
        self.has_params = False

    def forward(self, inputs):
        """
        Args:
          inputs: ndarray of shape (N, C, H, W).
        Returns:
          ndarray of shape (N, C, H, W).
        """
        self.inputs = inputs
        return np.maximum(0, inputs)

    def backward_inputs(self, grads):
        """
        Args:
          grads: ndarray of shape (N, C, H, W).
        Returns:
          ndarray of shape (N, C, H, W).
        """
        grads[self.inputs < 0] = 0
        return grads

def softmax(x):
    x -= np.max(x)
    logits_exp = np.exp(x)
    return logits_exp / np.sum(logits_exp, axis=1, keepdims=True)

class SoftmaxCrossEntropyWithLogits():
    def __init__(self):
        self.has_params = False

    def forward(self, x, y):
        """
        Args:
          x: ndarray of shape (N, num_classes).
          y: ndarray of shape (N, num_classes).
        Returns:
          Scalar, average loss over N examples.
          It is better to compute average loss here instead of just sum
          because then learning rate and weight decay won't depend on batch size.

        """
        return (-np.log(softmax(x)) * y).sum(axis=1).mean()

    def backward_inputs(self, x, y):
        """
        Args:
          x: ndarray of shape (N, num_classes).
          y: ndarray of shape (N, num_classes).
        Returns:
          Gradient with respect to the x, ndarray of shape (N, num_classes).
        """
        # Hint: don't forget that we took the average in the forward pass
        N = len(x)
        return (softmax(x) - y) / N


class L2Regularizer():
    def __init__(self, weights, weight_decay, name):
        """
        Args:
          weights: parameters which will be regularizerized
          weight_decay: lambda, regularization strength
          name: layer name
        """
        # this is still a reference to original tensor so don't change self.weights
        self.weights = weights
        self.weight_decay = weight_decay
        self.name = name

    def forward(self):
        """
         Returns:
          Scalar, loss due to the L2 regularization.
        """
        return self.weight_decay * 0.5 * np.sum(self.weights * self.weights)

    def backward_params(self):
        """
        Returns:
          Gradient of the L2 loss with respect to the regularized weights.
        """
        grad_weights = self.weight_decay * self.weights
        return [[self.weights, grad_weights], self.name]


class RegularizedLoss():
    def __init__(self, data_loss, regularizer_losses):
        self.data_loss = data_loss
        self.regularizer_losses = regularizer_losses
        self.has_params = True
        self.name = 'RegularizedLoss'

    def forward(self, x, y):
        loss_val = self.data_loss.forward(x, y)
        for loss in self.regularizer_losses:
            loss_val += loss.forward()
        return loss_val

    def backward_inputs(self, x, y):
        return self.data_loss.backward_inputs(x, y)

    def backward_params(self):
        grads = []
        for loss in self.regularizer_losses:
            grads += [loss.backward_params()]
        return grads



## Gradient checker

In [29]:
def rel_error(x, y):
    """ returns relative error """
    return np.max(np.abs(x - y) / (np.maximum(1e-8, np.abs(x) + np.abs(y))))

def eval_numerical_gradient(f, x, df, h=1e-5):
    """
    Evaluate a numeric gradient for a function that accepts a numpy
    array and returns a numpy array.
    - f should be a function that takes a single argument
    - x is the point (numpy array) to evaluate the gradient at
    """
    grad = np.zeros_like(x)
    it = np.nditer(x, flags=['multi_index'], op_flags=['readwrite'])
    while not it.finished:
        ix = it.multi_index

        oldval = x[ix]
        x[ix] = oldval + h
        # evaluate f(x + h)
        pos = f(x.copy()).copy()
        x[ix] = oldval - h
        # evaluate f(x - h)
        neg = f(x.copy()).copy()
        x[ix] = oldval

        # compute the partial derivative with centered formula
        grad[ix] = np.sum((pos - neg) * df) / (2 * h)
        # step to next dimension
        it.iternext()
    return grad

def check_grad_inputs(layer, x, grad_out):
    """
    Args:
    layer: Layer object
    x: ndarray tensor input data
    grad_out: ndarray tensor gradient from the next layer
    """
    grad_x_num = eval_numerical_gradient(layer.forward, x, grad_out)
    grad_x = layer.backward_inputs(grad_out)
    print("Relative error = ", rel_error(grad_x_num, grad_x))
    print("Error norm = ", np.linalg.norm(grad_x_num - grad_x))

def check_grad_params(layer, x, w, b, grad_out):
    """
    Args:
    layer: Layer object
    x: ndarray tensor input data
    w: ndarray tensor layer weights
    b: ndarray tensor layer biases
    grad_out: ndarray tensor gradient from the next layer
    """
    func = lambda params: layer.forward(x)
    grad_w_num = eval_numerical_gradient(func, w, grad_out)
    grad_b_num = eval_numerical_gradient(func, b, grad_out)
    grads = layer.backward_params(grad_out)
    grad_w = grads[0][1]
    grad_b = grads[1][1]
    print("Check weights:")
    print("Relative error = ", rel_error(grad_w_num, grad_w))
    print("Error norm = ", np.linalg.norm(grad_w_num - grad_w))
    print("Check biases:")
    print("Relative error = ", rel_error(grad_b_num, grad_b))
    print("Error norm = ", np.linalg.norm(grad_b_num - grad_b))

print("Convolution")
x = np.random.randn(4, 3, 5, 5)
grad_out = np.random.randn(4, 2, 5, 5)
conv = Convolution(x, 2, 3, "conv1")
print("Check grad wrt input")
check_grad_inputs(conv, x, grad_out)
print("Check grad wrt params")
check_grad_params(conv, x, conv.weights, conv.bias, grad_out)

print("\nMaxPooling")
x = np.random.randn(5, 4, 8, 8)
grad_out = np.random.randn(5, 4, 4, 4)
pool = MaxPooling(x, "pool", 2, 2)
print("Check grad wrt input")
check_grad_inputs(pool, x, grad_out)

print("\nReLU")
x = np.random.randn(4, 3, 5, 5)
grad_out = np.random.randn(4, 3, 5, 5)
relu = ReLU(x, "relu")
print("Check grad wrt input")
check_grad_inputs(relu, x, grad_out)

print("\nFC")
x = np.random.randn(20, 40)
grad_out = np.random.randn(20, 30)
fc = FC(x, 30, "fc")
print("Check grad wrt input")
check_grad_inputs(fc, x, grad_out)
print("Check grad wrt params")
check_grad_params(fc, x, fc.weights, fc.bias, grad_out)

print("\nSoftmaxCrossEntropyWithLogits")
x = np.random.randn(50, 20)
y = np.zeros([50, 20])
y[:,0] = 1
loss = SoftmaxCrossEntropyWithLogits()
grad_x_num = eval_numerical_gradient(lambda x: loss.forward(x, y), x, 1)
out = loss.forward(x, y)
grad_x = loss.backward_inputs(x, y)
print("Relative error = ", rel_error(grad_x_num, grad_x))
print("Error norm = ", np.linalg.norm(grad_x_num - grad_x))

"""
print("\nL2Regularizer")
x = np.random.randn(5, 4, 8, 8)
grad_out = np.random.randn(5, 4, 4, 4)
l2reg = L2Regularizer(x, 1e-2, 'L2reg')
print("Check grad wrt params")
func = lambda params: l2reg.forward()
grad_num = eval_numerical_gradient(func, l2reg.weights, 1)
grads = l2reg.backward_params()
grad = grads[0][1]
print("Relative error = ", rel_error(grad_num, grad))
print("Error norm = ", np.linalg.norm(grad_num - grad))
"""


Convolution
Check grad wrt input
Relative error =  7.1781396328e-09
Error norm =  4.08426369364e-10
Check grad wrt params
Check weights:
Relative error =  3.79737411839e-10
Error norm =  3.9800787816e-10
Check biases:
Relative error =  1.3861585688e-12
Error norm =  4.00344010748e-11

MaxPooling
Check grad wrt input
Relative error =  3.27563595128e-12
Error norm =  9.20217924051e-11

ReLU
Check grad wrt input
Relative error =  3.27562605199e-12
Error norm =  4.55006932058e-11

FC
Check grad wrt input
Relative error =  7.54229391412e-08
Error norm =  7.10190512924e-10
Check grad wrt params
Check weights:
Relative error =  2.82027039502e-09
Error norm =  7.4205428145e-10
Check biases:
Relative error =  3.50880153745e-11
Error norm =  1.01330545962e-10

SoftmaxCrossEntropyWithLogits
Relative error =  4.27615033983e-07
Error norm =  5.07774274724e-10


'\nprint("\nL2Regularizer")\nx = np.random.randn(5, 4, 8, 8)\ngrad_out = np.random.randn(5, 4, 4, 4)\nl2reg = L2Regularizer(x, 1e-2, \'L2reg\')\nprint("Check grad wrt params")\nfunc = lambda params: l2reg.forward()\ngrad_num = eval_numerical_gradient(func, l2reg.weights, 1)\ngrads = l2reg.backward_params()\ngrad = grads[0][1]\nprint("Relative error = ", rel_error(grad_num, grad))\nprint("Error norm = ", np.linalg.norm(grad_num - grad))\n'

In [7]:
def forward_pass(net, inputs):
    output = inputs
    for layer in net:
        output = layer.forward(output)
    return output


def backward_pass(net, loss, x, y):
    grads = []
    grad_out = loss.backward_inputs(x, y)
    if loss.has_params:
        grads += loss.backward_params()
    for layer in reversed(net):
        grad_inputs = layer.backward_inputs(grad_out)
        if layer.has_params:
            grads += [layer.backward_params(grad_out)]
        grad_out = grad_inputs
    return grads

def sgd_update_params(grads, config):
    lr = config['lr']
    for layer_grads in grads:
        for i in range(len(layer_grads) - 1):
            params = layer_grads[i][0]
            grads = layer_grads[i][1]
            #print(layer_grads[-1], " -> ", grads.sum())
            params -= lr * grads


def draw_conv_filters(epoch, step, layer, save_dir):
    C = layer.C
    w = layer.weights.copy()
    num_filters = w.shape[0]
    k = int(np.sqrt(w.shape[1] / C))
    w = w.reshape(num_filters, C, k, k)
    w -= w.min()
    w /= w.max()
    border = 1
    cols = 8
    rows = math.ceil(num_filters / cols)
    width = cols * k + (cols-1) * border
    height = rows * k + (rows-1) * border
    #for i in range(C):
    for i in range(1):
        img = np.zeros([height, width])
        
        for j in range(num_filters):
            r = int(j / cols) * (k + border)
            c = int(j % cols) * (k + border)
            img[r:r+k,c:c+k] = w[j,i]
            
        filename = '%s_epoch_%02d_step_%06d_input_%03d.png' % (layer.name, epoch, step, i)
        ski.io.imsave(os.path.join(save_dir, filename), img)


def train(train_x, train_y, valid_x, valid_y, net, loss, config):
    lr_policy = config['lr_policy']
    batch_size = config['batch_size']
    max_epochs = config['max_epochs']
    save_dir = config['save_dir']
    num_examples = train_x.shape[0]
    assert num_examples % batch_size == 0
    num_batches = num_examples // batch_size
    for epoch in range(1, max_epochs+1):
        if epoch in lr_policy:
            solver_config = lr_policy[epoch]
            
        cnt_correct = 0
        #for i in range(num_batches):
        # shuffle the data at the beggining of each epoch
        permutation_idx = np.random.permutation(num_examples)
        train_x = train_x[permutation_idx]
        train_y = train_y[permutation_idx]
        #for i in range(100):
        for i in range(num_batches):
            # store mini-batch to ndarray
            batch_x = train_x[i*batch_size:(i+1)*batch_size, :]
            batch_y = train_y[i*batch_size:(i+1)*batch_size, :]
            logits = forward_pass(net, batch_x)
            loss_val = loss.forward(logits, batch_y)
            # compute classification accuracy
            yp = np.argmax(logits, 1)
            yt = np.argmax(batch_y, 1)
            cnt_correct += (yp == yt).sum()
            grads = backward_pass(net, loss, logits, batch_y)
            sgd_update_params(grads, solver_config)
            
            if i % 5 == 0:
                print("epoch %d, step %d/%d, batch loss = %.2f" % (epoch, i*batch_size, num_examples, loss_val))
                
            if i % 100 == 0:
                draw_conv_filters(epoch, i*batch_size, net[0], save_dir)
                #draw_conv_filters(epoch, i*batch_size, net[3])
            if i > 0 and i % 50 == 0:
                print("Train accuracy = %.2f" % (cnt_correct / ((i+1)*batch_size) * 100))
        print("Train accuracy = %.2f" % (cnt_correct / num_examples * 100))
        evaluate("Validation", valid_x, valid_y, net, loss, config)
    return net


def evaluate(name, x, y, net, loss, config):
    print("\nRunning evaluation: ", name)
    batch_size = config['batch_size']
    num_examples = x.shape[0]
    assert num_examples % batch_size == 0
    num_batches = num_examples // batch_size
    cnt_correct = 0
    loss_avg = 0
    for i in range(num_batches):
        batch_x = x[i*batch_size:(i+1)*batch_size, :]
        batch_y = y[i*batch_size:(i+1)*batch_size, :]
        logits = forward_pass(net, batch_x)
        yp = np.argmax(logits, 1)
        yt = np.argmax(batch_y, 1)
        cnt_correct += (yp == yt).sum()
        loss_val = loss.forward(logits, batch_y)
        loss_avg += loss_val
        #print("step %d / %d, loss = %.2f" % (i*batch_size, num_examples, loss_val / batch_size))
    valid_acc = cnt_correct / num_examples * 100
    loss_avg /= num_batches
    print(name + " accuracy = %.2f" % valid_acc)
    print(name + " avg loss = %.2f\n" % loss_avg)



## NN Training without regularization

In [4]:
from tensorflow.examples.tutorials.mnist import input_data

In [5]:
tf.app.flags.DEFINE_string('data_dir', 
  '/tmp/data/', 'Directory for storing data')
mnist = input_data.read_data_sets(
  tf.app.flags.FLAGS.data_dir, one_hot=True)

Extracting /tmp/data/train-images-idx3-ubyte.gz
Extracting /tmp/data/train-labels-idx1-ubyte.gz
Extracting /tmp/data/t10k-images-idx3-ubyte.gz
Extracting /tmp/data/t10k-labels-idx1-ubyte.gz


In [33]:
SAVE_DIR = "/Users/goran/Documents/III_Semestar/DU/labosi/2_lab/zad1_images/"

config = {}
config['max_epochs'] = 8
config['batch_size'] = 50
config['save_dir'] = SAVE_DIR
config['lr_policy'] = {1:{'lr':1e-1}, 3:{'lr':1e-2}, 5:{'lr':1e-3}, 7:{'lr':1e-4}}

#np.random.seed(100) 
np.random.seed(int(time.time() * 1e6) % 2**31)
train_x = mnist.train.images
train_x = train_x.reshape([-1, 1, 28, 28])
train_y = mnist.train.labels
valid_x = mnist.validation.images
valid_x = valid_x.reshape([-1, 1, 28, 28])
valid_y = mnist.validation.labels
test_x = mnist.test.images
test_x = test_x.reshape([-1, 1, 28, 28])
test_y = mnist.test.labels
train_mean = train_x.mean()
train_x -= train_mean
valid_x -= train_mean
test_x -= train_mean


net = []
inputs = np.random.randn(config['batch_size'], 1, 28, 28)
net += [Convolution(inputs, 16, 5, "conv1")]
net += [MaxPooling(net[-1], "pool1")]
net += [ReLU(net[-1], "relu1")]
net += [Convolution(net[-1], 32, 5, "conv2")]
net += [MaxPooling(net[-1], "pool2")]
net += [ReLU(net[-1], "relu2")]
# out = 7x7
net += [Flatten(net[-1], "flatten3")]
net += [FC(net[-1], 512, "fc3")]
net += [ReLU(net[-1], "relu3")]
net += [FC(net[-1], 10, "logits")]

loss = SoftmaxCrossEntropyWithLogits()

train(train_x, train_y, valid_x, valid_y, net, loss, config)
evaluate("Test", test_x, test_y, net, loss, config)



epoch 1, step 0/55000, batch loss = 2.40


  "%s to %s" % (dtypeobj_in, dtypeobj))


epoch 1, step 250/55000, batch loss = 1.96
epoch 1, step 500/55000, batch loss = 1.45
epoch 1, step 750/55000, batch loss = 0.94
epoch 1, step 1000/55000, batch loss = 0.58
epoch 1, step 1250/55000, batch loss = 0.64
epoch 1, step 1500/55000, batch loss = 0.47
epoch 1, step 1750/55000, batch loss = 0.71
epoch 1, step 2000/55000, batch loss = 0.29
epoch 1, step 2250/55000, batch loss = 0.44
epoch 1, step 2500/55000, batch loss = 0.40
Train accuracy = 70.39
epoch 1, step 2750/55000, batch loss = 0.18
epoch 1, step 3000/55000, batch loss = 0.33
epoch 1, step 3250/55000, batch loss = 0.31
epoch 1, step 3500/55000, batch loss = 0.38
epoch 1, step 3750/55000, batch loss = 0.43
epoch 1, step 4000/55000, batch loss = 0.15
epoch 1, step 4250/55000, batch loss = 0.50
epoch 1, step 4500/55000, batch loss = 0.42
epoch 1, step 4750/55000, batch loss = 0.16
epoch 1, step 5000/55000, batch loss = 0.30
Train accuracy = 80.83
epoch 1, step 5250/55000, batch loss = 0.18
epoch 1, step 5500/55000, batch l

epoch 1, step 44000/55000, batch loss = 0.02
epoch 1, step 44250/55000, batch loss = 0.01
epoch 1, step 44500/55000, batch loss = 0.02
epoch 1, step 44750/55000, batch loss = 0.02
epoch 1, step 45000/55000, batch loss = 0.00
Train accuracy = 95.28
epoch 1, step 45250/55000, batch loss = 0.02
epoch 1, step 45500/55000, batch loss = 0.04
epoch 1, step 45750/55000, batch loss = 0.02
epoch 1, step 46000/55000, batch loss = 0.03
epoch 1, step 46250/55000, batch loss = 0.04
epoch 1, step 46500/55000, batch loss = 0.07
epoch 1, step 46750/55000, batch loss = 0.02
epoch 1, step 47000/55000, batch loss = 0.02
epoch 1, step 47250/55000, batch loss = 0.10
epoch 1, step 47500/55000, batch loss = 0.01
Train accuracy = 95.40
epoch 1, step 47750/55000, batch loss = 0.08
epoch 1, step 48000/55000, batch loss = 0.08
epoch 1, step 48250/55000, batch loss = 0.05
epoch 1, step 48500/55000, batch loss = 0.02
epoch 1, step 48750/55000, batch loss = 0.07
epoch 1, step 49000/55000, batch loss = 0.24
epoch 1, 

epoch 2, step 32250/55000, batch loss = 0.01
epoch 2, step 32500/55000, batch loss = 0.00
Train accuracy = 98.61
epoch 2, step 32750/55000, batch loss = 0.02
epoch 2, step 33000/55000, batch loss = 0.00
epoch 2, step 33250/55000, batch loss = 0.08
epoch 2, step 33500/55000, batch loss = 0.03
epoch 2, step 33750/55000, batch loss = 0.02
epoch 2, step 34000/55000, batch loss = 0.07
epoch 2, step 34250/55000, batch loss = 0.06
epoch 2, step 34500/55000, batch loss = 0.04
epoch 2, step 34750/55000, batch loss = 0.03
epoch 2, step 35000/55000, batch loss = 0.02
Train accuracy = 98.63
epoch 2, step 35250/55000, batch loss = 0.05
epoch 2, step 35500/55000, batch loss = 0.04
epoch 2, step 35750/55000, batch loss = 0.03
epoch 2, step 36000/55000, batch loss = 0.04
epoch 2, step 36250/55000, batch loss = 0.20
epoch 2, step 36500/55000, batch loss = 0.06
epoch 2, step 36750/55000, batch loss = 0.05
epoch 2, step 37000/55000, batch loss = 0.03
epoch 2, step 37250/55000, batch loss = 0.06
epoch 2, 

epoch 3, step 20250/55000, batch loss = 0.02
epoch 3, step 20500/55000, batch loss = 0.01
epoch 3, step 20750/55000, batch loss = 0.03
epoch 3, step 21000/55000, batch loss = 0.05
epoch 3, step 21250/55000, batch loss = 0.02
epoch 3, step 21500/55000, batch loss = 0.00
epoch 3, step 21750/55000, batch loss = 0.11
epoch 3, step 22000/55000, batch loss = 0.01
epoch 3, step 22250/55000, batch loss = 0.01
epoch 3, step 22500/55000, batch loss = 0.02
Train accuracy = 99.37
epoch 3, step 22750/55000, batch loss = 0.00
epoch 3, step 23000/55000, batch loss = 0.02
epoch 3, step 23250/55000, batch loss = 0.00
epoch 3, step 23500/55000, batch loss = 0.01
epoch 3, step 23750/55000, batch loss = 0.00
epoch 3, step 24000/55000, batch loss = 0.02
epoch 3, step 24250/55000, batch loss = 0.01
epoch 3, step 24500/55000, batch loss = 0.02
epoch 3, step 24750/55000, batch loss = 0.00
epoch 3, step 25000/55000, batch loss = 0.01
Train accuracy = 99.40
epoch 3, step 25250/55000, batch loss = 0.01
epoch 3, 

epoch 4, step 8500/55000, batch loss = 0.00
epoch 4, step 8750/55000, batch loss = 0.00
epoch 4, step 9000/55000, batch loss = 0.03
epoch 4, step 9250/55000, batch loss = 0.01
epoch 4, step 9500/55000, batch loss = 0.00
epoch 4, step 9750/55000, batch loss = 0.00
epoch 4, step 10000/55000, batch loss = 0.01
Train accuracy = 99.53
epoch 4, step 10250/55000, batch loss = 0.00
epoch 4, step 10500/55000, batch loss = 0.01
epoch 4, step 10750/55000, batch loss = 0.01
epoch 4, step 11000/55000, batch loss = 0.00
epoch 4, step 11250/55000, batch loss = 0.01
epoch 4, step 11500/55000, batch loss = 0.01
epoch 4, step 11750/55000, batch loss = 0.02
epoch 4, step 12000/55000, batch loss = 0.00
epoch 4, step 12250/55000, batch loss = 0.02
epoch 4, step 12500/55000, batch loss = 0.01
Train accuracy = 99.52
epoch 4, step 12750/55000, batch loss = 0.00
epoch 4, step 13000/55000, batch loss = 0.03
epoch 4, step 13250/55000, batch loss = 0.00
epoch 4, step 13500/55000, batch loss = 0.01
epoch 4, step 1

epoch 4, step 52000/55000, batch loss = 0.08
epoch 4, step 52250/55000, batch loss = 0.01
epoch 4, step 52500/55000, batch loss = 0.00
Train accuracy = 99.52
epoch 4, step 52750/55000, batch loss = 0.05
epoch 4, step 53000/55000, batch loss = 0.01
epoch 4, step 53250/55000, batch loss = 0.01
epoch 4, step 53500/55000, batch loss = 0.05
epoch 4, step 53750/55000, batch loss = 0.00
epoch 4, step 54000/55000, batch loss = 0.00
epoch 4, step 54250/55000, batch loss = 0.00
epoch 4, step 54500/55000, batch loss = 0.00
epoch 4, step 54750/55000, batch loss = 0.04
Train accuracy = 99.51

Running evaluation:  Validation
Validation accuracy = 99.18
Validation avg loss = 0.03

epoch 5, step 0/55000, batch loss = 0.00
epoch 5, step 250/55000, batch loss = 0.00
epoch 5, step 500/55000, batch loss = 0.02
epoch 5, step 750/55000, batch loss = 0.00
epoch 5, step 1000/55000, batch loss = 0.01
epoch 5, step 1250/55000, batch loss = 0.00
epoch 5, step 1500/55000, batch loss = 0.03
epoch 5, step 1750/5500

epoch 5, step 40250/55000, batch loss = 0.00
epoch 5, step 40500/55000, batch loss = 0.02
epoch 5, step 40750/55000, batch loss = 0.01
epoch 5, step 41000/55000, batch loss = 0.01
epoch 5, step 41250/55000, batch loss = 0.04
epoch 5, step 41500/55000, batch loss = 0.01
epoch 5, step 41750/55000, batch loss = 0.01
epoch 5, step 42000/55000, batch loss = 0.00
epoch 5, step 42250/55000, batch loss = 0.19
epoch 5, step 42500/55000, batch loss = 0.00
Train accuracy = 99.61
epoch 5, step 42750/55000, batch loss = 0.01
epoch 5, step 43000/55000, batch loss = 0.01
epoch 5, step 43250/55000, batch loss = 0.17
epoch 5, step 43500/55000, batch loss = 0.11
epoch 5, step 43750/55000, batch loss = 0.00
epoch 5, step 44000/55000, batch loss = 0.05
epoch 5, step 44250/55000, batch loss = 0.00
epoch 5, step 44500/55000, batch loss = 0.02
epoch 5, step 44750/55000, batch loss = 0.00
epoch 5, step 45000/55000, batch loss = 0.01
Train accuracy = 99.60
epoch 5, step 45250/55000, batch loss = 0.00
epoch 5, 

epoch 6, step 28500/55000, batch loss = 0.00
epoch 6, step 28750/55000, batch loss = 0.03
epoch 6, step 29000/55000, batch loss = 0.01
epoch 6, step 29250/55000, batch loss = 0.00
epoch 6, step 29500/55000, batch loss = 0.02
epoch 6, step 29750/55000, batch loss = 0.00
epoch 6, step 30000/55000, batch loss = 0.02
Train accuracy = 99.53
epoch 6, step 30250/55000, batch loss = 0.01
epoch 6, step 30500/55000, batch loss = 0.00
epoch 6, step 30750/55000, batch loss = 0.02
epoch 6, step 31000/55000, batch loss = 0.03
epoch 6, step 31250/55000, batch loss = 0.01
epoch 6, step 31500/55000, batch loss = 0.05
epoch 6, step 31750/55000, batch loss = 0.00
epoch 6, step 32000/55000, batch loss = 0.01
epoch 6, step 32250/55000, batch loss = 0.00
epoch 6, step 32500/55000, batch loss = 0.00
Train accuracy = 99.54
epoch 6, step 32750/55000, batch loss = 0.01
epoch 6, step 33000/55000, batch loss = 0.01
epoch 6, step 33250/55000, batch loss = 0.00
epoch 6, step 33500/55000, batch loss = 0.03
epoch 6, 

epoch 7, step 16750/55000, batch loss = 0.00
epoch 7, step 17000/55000, batch loss = 0.00
epoch 7, step 17250/55000, batch loss = 0.00
epoch 7, step 17500/55000, batch loss = 0.01
Train accuracy = 99.57
epoch 7, step 17750/55000, batch loss = 0.01
epoch 7, step 18000/55000, batch loss = 0.19
epoch 7, step 18250/55000, batch loss = 0.00
epoch 7, step 18500/55000, batch loss = 0.06
epoch 7, step 18750/55000, batch loss = 0.00
epoch 7, step 19000/55000, batch loss = 0.00
epoch 7, step 19250/55000, batch loss = 0.01
epoch 7, step 19500/55000, batch loss = 0.00
epoch 7, step 19750/55000, batch loss = 0.00
epoch 7, step 20000/55000, batch loss = 0.01
Train accuracy = 99.56
epoch 7, step 20250/55000, batch loss = 0.00
epoch 7, step 20500/55000, batch loss = 0.01
epoch 7, step 20750/55000, batch loss = 0.00
epoch 7, step 21000/55000, batch loss = 0.01
epoch 7, step 21250/55000, batch loss = 0.01
epoch 7, step 21500/55000, batch loss = 0.04
epoch 7, step 21750/55000, batch loss = 0.00
epoch 7, 

epoch 8, step 4750/55000, batch loss = 0.16
epoch 8, step 5000/55000, batch loss = 0.00
Train accuracy = 99.66
epoch 8, step 5250/55000, batch loss = 0.00
epoch 8, step 5500/55000, batch loss = 0.03
epoch 8, step 5750/55000, batch loss = 0.00
epoch 8, step 6000/55000, batch loss = 0.00
epoch 8, step 6250/55000, batch loss = 0.01
epoch 8, step 6500/55000, batch loss = 0.01
epoch 8, step 6750/55000, batch loss = 0.00
epoch 8, step 7000/55000, batch loss = 0.04
epoch 8, step 7250/55000, batch loss = 0.00
epoch 8, step 7500/55000, batch loss = 0.00
Train accuracy = 99.66
epoch 8, step 7750/55000, batch loss = 0.02
epoch 8, step 8000/55000, batch loss = 0.00
epoch 8, step 8250/55000, batch loss = 0.01
epoch 8, step 8500/55000, batch loss = 0.00
epoch 8, step 8750/55000, batch loss = 0.01
epoch 8, step 9000/55000, batch loss = 0.01
epoch 8, step 9250/55000, batch loss = 0.00
epoch 8, step 9500/55000, batch loss = 0.01
epoch 8, step 9750/55000, batch loss = 0.01
epoch 8, step 10000/55000, bat

epoch 8, step 48250/55000, batch loss = 0.00
epoch 8, step 48500/55000, batch loss = 0.00
epoch 8, step 48750/55000, batch loss = 0.01
epoch 8, step 49000/55000, batch loss = 0.00
epoch 8, step 49250/55000, batch loss = 0.04
epoch 8, step 49500/55000, batch loss = 0.03
epoch 8, step 49750/55000, batch loss = 0.00
epoch 8, step 50000/55000, batch loss = 0.00
Train accuracy = 99.59
epoch 8, step 50250/55000, batch loss = 0.02
epoch 8, step 50500/55000, batch loss = 0.01
epoch 8, step 50750/55000, batch loss = 0.04
epoch 8, step 51000/55000, batch loss = 0.01
epoch 8, step 51250/55000, batch loss = 0.01
epoch 8, step 51500/55000, batch loss = 0.00
epoch 8, step 51750/55000, batch loss = 0.00
epoch 8, step 52000/55000, batch loss = 0.01
epoch 8, step 52250/55000, batch loss = 0.00
epoch 8, step 52500/55000, batch loss = 0.00
Train accuracy = 99.60
epoch 8, step 52750/55000, batch loss = 0.00
epoch 8, step 53000/55000, batch loss = 0.04
epoch 8, step 53250/55000, batch loss = 0.00
epoch 8, 

## 2. ZADATAK
U ovom zadatku trebate dodati podršku za L2 regularizaciju parametara. Dovršite implementaciju L2Regularizer sloja te naučite regularizirani model iz prethodnog zadatka koji se nalazi u train_l2reg.py. Igrajte se s regularizacijskim parametrom tako da naučite tri različite mreže λ=1e−3, λ=1e−2, λ=1e−1 te usporedite naučene filtre u prvom sloju i dobivenu točnost.

## NN training with regularization

In [10]:
SAVE_DIR = "/Users/goran/Documents/III_Semestar/DU/labosi/2_lab/zad2_images/"


config = {}
config['max_epochs'] = 5
config['batch_size'] = 50
config['save_dir'] = SAVE_DIR
config['lr_policy'] = {1:{'lr':1e-1}, 3:{'lr':1e-2}, 5:{'lr':1e-3}, 7:{'lr':1e-4}}

#np.random.seed(100)
i = 0
names = ["0_1", "0_01", "0_01"]
for lmbd in [1e-1, 1e-2, 1e-3]:
    print("LAMBDA:", lmbd, "\n")
    SAVE_DIR = "/Users/goran/Documents/III_Semestar/DU/labosi/2_lab/zad2_images/" + names[i] + "/"
    i += 1
    config['save_dir'] = SAVE_DIR
    np.random.seed(int(time.time() * 1e6) % 2**31)

    train_x = mnist.train.images
    train_x = train_x.reshape([-1, 1, 28, 28])
    train_y = mnist.train.labels

    valid_x = mnist.validation.images
    valid_x = valid_x.reshape([-1, 1, 28, 28])
    valid_y = mnist.validation.labels

    test_x = mnist.test.images
    test_x = test_x.reshape([-1, 1, 28, 28])
    test_y = mnist.test.labels

    train_mean = train_x.mean()
    train_x -= train_mean
    valid_x -= train_mean
    test_x -= train_mean

    weight_decay = lmbd
    net = []

    regularizers = []
    inputs = np.random.randn(config['batch_size'], 1, 28, 28)
    net += [Convolution(inputs, 16, 5, "conv1")]
    regularizers += [L2Regularizer(net[-1].weights, weight_decay, 'conv1_l2reg')]
    net += [MaxPooling(net[-1], "pool1")]
    net += [ReLU(net[-1], "relu1")]
    net += [Convolution(net[-1], 32, 5, "conv2")]
    regularizers += [L2Regularizer(net[-1].weights, weight_decay, 'conv2_l2reg')]
    net += [MaxPooling(net[-1], "pool2")]
    net += [ReLU(net[-1], "relu2")]
    ## 7x7
    net += [Flatten(net[-1], "flatten3")]
    net += [FC(net[-1], 512, "fc3")]
    regularizers += [L2Regularizer(net[-1].weights, weight_decay, 'fc3_l2reg')]
    net += [ReLU(net[-1], "relu3")]
    net += [FC(net[-1], 10, "logits")]

    data_loss = SoftmaxCrossEntropyWithLogits()
    loss = RegularizedLoss(data_loss, regularizers)

    train(train_x, train_y, valid_x, valid_y, net, loss, config)
    evaluate("Test", test_x, test_y, net, loss, config)
    print("\n\n")

LAMBDA: 0.1 

epoch 1, step 0/55000, batch loss = 45.55


  "%s to %s" % (dtypeobj_in, dtypeobj))


epoch 1, step 250/55000, batch loss = 40.97
epoch 1, step 500/55000, batch loss = 36.90
epoch 1, step 750/55000, batch loss = 33.36
epoch 1, step 1000/55000, batch loss = 30.08
epoch 1, step 1250/55000, batch loss = 27.34
epoch 1, step 1500/55000, batch loss = 24.92
epoch 1, step 1750/55000, batch loss = 22.21
epoch 1, step 2000/55000, batch loss = 20.28
epoch 1, step 2250/55000, batch loss = 18.91
epoch 1, step 2500/55000, batch loss = 16.69
Train accuracy = 66.71
epoch 1, step 2750/55000, batch loss = 15.07
epoch 1, step 3000/55000, batch loss = 13.72
epoch 1, step 3250/55000, batch loss = 12.45
epoch 1, step 3500/55000, batch loss = 11.31
epoch 1, step 3750/55000, batch loss = 10.47
epoch 1, step 4000/55000, batch loss = 9.42
epoch 1, step 4250/55000, batch loss = 8.71
epoch 1, step 4500/55000, batch loss = 7.96
epoch 1, step 4750/55000, batch loss = 7.34
epoch 1, step 5000/55000, batch loss = 6.51
Train accuracy = 76.73
epoch 1, step 5250/55000, batch loss = 6.06
epoch 1, step 5500

epoch 1, step 43750/55000, batch loss = 0.76
epoch 1, step 44000/55000, batch loss = 0.54
epoch 1, step 44250/55000, batch loss = 0.65
epoch 1, step 44500/55000, batch loss = 0.86
epoch 1, step 44750/55000, batch loss = 0.66
epoch 1, step 45000/55000, batch loss = 0.70
Train accuracy = 88.81
epoch 1, step 45250/55000, batch loss = 0.80
epoch 1, step 45500/55000, batch loss = 0.86
epoch 1, step 45750/55000, batch loss = 0.51
epoch 1, step 46000/55000, batch loss = 0.59
epoch 1, step 46250/55000, batch loss = 0.88
epoch 1, step 46500/55000, batch loss = 0.55
epoch 1, step 46750/55000, batch loss = 0.84
epoch 1, step 47000/55000, batch loss = 0.60
epoch 1, step 47250/55000, batch loss = 0.76
epoch 1, step 47500/55000, batch loss = 0.66
Train accuracy = 88.93
epoch 1, step 47750/55000, batch loss = 0.75
epoch 1, step 48000/55000, batch loss = 0.54
epoch 1, step 48250/55000, batch loss = 0.60
epoch 1, step 48500/55000, batch loss = 0.75
epoch 1, step 48750/55000, batch loss = 0.57
epoch 1, 

epoch 2, step 32000/55000, batch loss = 0.64
epoch 2, step 32250/55000, batch loss = 0.64
epoch 2, step 32500/55000, batch loss = 0.51
Train accuracy = 91.62
epoch 2, step 32750/55000, batch loss = 0.58
epoch 2, step 33000/55000, batch loss = 0.63
epoch 2, step 33250/55000, batch loss = 0.50
epoch 2, step 33500/55000, batch loss = 0.60
epoch 2, step 33750/55000, batch loss = 0.75
epoch 2, step 34000/55000, batch loss = 0.47
epoch 2, step 34250/55000, batch loss = 0.55
epoch 2, step 34500/55000, batch loss = 0.83
epoch 2, step 34750/55000, batch loss = 0.58
epoch 2, step 35000/55000, batch loss = 2.87
Train accuracy = 91.57
epoch 2, step 35250/55000, batch loss = 1.06
epoch 2, step 35500/55000, batch loss = 0.72
epoch 2, step 35750/55000, batch loss = 1.42
epoch 2, step 36000/55000, batch loss = 1.09
epoch 2, step 36250/55000, batch loss = 1.10
epoch 2, step 36500/55000, batch loss = 0.72
epoch 2, step 36750/55000, batch loss = 0.68
epoch 2, step 37000/55000, batch loss = 0.68
epoch 2, 

epoch 3, step 20250/55000, batch loss = 0.46
epoch 3, step 20500/55000, batch loss = 0.52
epoch 3, step 20750/55000, batch loss = 0.46
epoch 3, step 21000/55000, batch loss = 0.42
epoch 3, step 21250/55000, batch loss = 0.40
epoch 3, step 21500/55000, batch loss = 0.49
epoch 3, step 21750/55000, batch loss = 0.53
epoch 3, step 22000/55000, batch loss = 0.51
epoch 3, step 22250/55000, batch loss = 0.39
epoch 3, step 22500/55000, batch loss = 0.37
Train accuracy = 95.60
epoch 3, step 22750/55000, batch loss = 0.41
epoch 3, step 23000/55000, batch loss = 0.49
epoch 3, step 23250/55000, batch loss = 0.46
epoch 3, step 23500/55000, batch loss = 0.48
epoch 3, step 23750/55000, batch loss = 0.49
epoch 3, step 24000/55000, batch loss = 0.42
epoch 3, step 24250/55000, batch loss = 0.54
epoch 3, step 24500/55000, batch loss = 0.44
epoch 3, step 24750/55000, batch loss = 0.46
epoch 3, step 25000/55000, batch loss = 0.54
Train accuracy = 95.65
epoch 3, step 25250/55000, batch loss = 0.44
epoch 3, 

epoch 4, step 8500/55000, batch loss = 0.37
epoch 4, step 8750/55000, batch loss = 0.53
epoch 4, step 9000/55000, batch loss = 0.40
epoch 4, step 9250/55000, batch loss = 0.44
epoch 4, step 9500/55000, batch loss = 0.38
epoch 4, step 9750/55000, batch loss = 0.38
epoch 4, step 10000/55000, batch loss = 0.47
Train accuracy = 96.08
epoch 4, step 10250/55000, batch loss = 0.43
epoch 4, step 10500/55000, batch loss = 0.48
epoch 4, step 10750/55000, batch loss = 0.47
epoch 4, step 11000/55000, batch loss = 0.49
epoch 4, step 11250/55000, batch loss = 0.53
epoch 4, step 11500/55000, batch loss = 0.43
epoch 4, step 11750/55000, batch loss = 0.43
epoch 4, step 12000/55000, batch loss = 0.40
epoch 4, step 12250/55000, batch loss = 0.39
epoch 4, step 12500/55000, batch loss = 0.49
Train accuracy = 96.06
epoch 4, step 12750/55000, batch loss = 0.46
epoch 4, step 13000/55000, batch loss = 0.51
epoch 4, step 13250/55000, batch loss = 0.53
epoch 4, step 13500/55000, batch loss = 0.44
epoch 4, step 1

epoch 4, step 52000/55000, batch loss = 0.38
epoch 4, step 52250/55000, batch loss = 0.35
epoch 4, step 52500/55000, batch loss = 0.42
Train accuracy = 95.87
epoch 4, step 52750/55000, batch loss = 0.36
epoch 4, step 53000/55000, batch loss = 0.50
epoch 4, step 53250/55000, batch loss = 0.42
epoch 4, step 53500/55000, batch loss = 0.56
epoch 4, step 53750/55000, batch loss = 0.40
epoch 4, step 54000/55000, batch loss = 0.55
epoch 4, step 54250/55000, batch loss = 0.45
epoch 4, step 54500/55000, batch loss = 0.42
epoch 4, step 54750/55000, batch loss = 0.38
Train accuracy = 95.85

Running evaluation:  Validation
Validation accuracy = 96.58
Validation avg loss = 0.42

epoch 5, step 0/55000, batch loss = 0.44
epoch 5, step 250/55000, batch loss = 0.43
epoch 5, step 500/55000, batch loss = 0.41
epoch 5, step 750/55000, batch loss = 0.43
epoch 5, step 1000/55000, batch loss = 0.41
epoch 5, step 1250/55000, batch loss = 0.52
epoch 5, step 1500/55000, batch loss = 0.43
epoch 5, step 1750/5500

epoch 5, step 40250/55000, batch loss = 0.43
epoch 5, step 40500/55000, batch loss = 0.45
epoch 5, step 40750/55000, batch loss = 0.40
epoch 5, step 41000/55000, batch loss = 0.50
epoch 5, step 41250/55000, batch loss = 0.52
epoch 5, step 41500/55000, batch loss = 0.38
epoch 5, step 41750/55000, batch loss = 0.48
epoch 5, step 42000/55000, batch loss = 0.47
epoch 5, step 42250/55000, batch loss = 0.41
epoch 5, step 42500/55000, batch loss = 0.46
Train accuracy = 96.26
epoch 5, step 42750/55000, batch loss = 0.39
epoch 5, step 43000/55000, batch loss = 0.36
epoch 5, step 43250/55000, batch loss = 0.46
epoch 5, step 43500/55000, batch loss = 0.46
epoch 5, step 43750/55000, batch loss = 0.34
epoch 5, step 44000/55000, batch loss = 0.53
epoch 5, step 44250/55000, batch loss = 0.51
epoch 5, step 44500/55000, batch loss = 0.48
epoch 5, step 44750/55000, batch loss = 0.43
epoch 5, step 45000/55000, batch loss = 0.42
Train accuracy = 96.29
epoch 5, step 45250/55000, batch loss = 0.47
epoch 5, 

epoch 1, step 28000/55000, batch loss = 1.57
epoch 1, step 28250/55000, batch loss = 1.58
epoch 1, step 28500/55000, batch loss = 1.59
epoch 1, step 28750/55000, batch loss = 1.53
epoch 1, step 29000/55000, batch loss = 1.59
epoch 1, step 29250/55000, batch loss = 1.61
epoch 1, step 29500/55000, batch loss = 1.53
epoch 1, step 29750/55000, batch loss = 1.54
epoch 1, step 30000/55000, batch loss = 1.45
Train accuracy = 93.02
epoch 1, step 30250/55000, batch loss = 1.56
epoch 1, step 30500/55000, batch loss = 1.52
epoch 1, step 30750/55000, batch loss = 1.61
epoch 1, step 31000/55000, batch loss = 1.60
epoch 1, step 31250/55000, batch loss = 1.41
epoch 1, step 31500/55000, batch loss = 1.36
epoch 1, step 31750/55000, batch loss = 1.37
epoch 1, step 32000/55000, batch loss = 1.34
epoch 1, step 32250/55000, batch loss = 1.34
epoch 1, step 32500/55000, batch loss = 1.34
Train accuracy = 93.29
epoch 1, step 32750/55000, batch loss = 1.45
epoch 1, step 33000/55000, batch loss = 1.46
epoch 1, 

epoch 2, step 16250/55000, batch loss = 0.40
epoch 2, step 16500/55000, batch loss = 0.37
epoch 2, step 16750/55000, batch loss = 0.46
epoch 2, step 17000/55000, batch loss = 0.45
epoch 2, step 17250/55000, batch loss = 0.41
epoch 2, step 17500/55000, batch loss = 0.40
Train accuracy = 97.37
epoch 2, step 17750/55000, batch loss = 0.58
epoch 2, step 18000/55000, batch loss = 0.40
epoch 2, step 18250/55000, batch loss = 0.36
epoch 2, step 18500/55000, batch loss = 0.35
epoch 2, step 18750/55000, batch loss = 0.37
epoch 2, step 19000/55000, batch loss = 0.35
epoch 2, step 19250/55000, batch loss = 0.37
epoch 2, step 19500/55000, batch loss = 0.38
epoch 2, step 19750/55000, batch loss = 0.36
epoch 2, step 20000/55000, batch loss = 0.35
Train accuracy = 97.52
epoch 2, step 20250/55000, batch loss = 0.41
epoch 2, step 20500/55000, batch loss = 0.39
epoch 2, step 20750/55000, batch loss = 0.44
epoch 2, step 21000/55000, batch loss = 0.37
epoch 2, step 21250/55000, batch loss = 0.35
epoch 2, 

epoch 3, step 4250/55000, batch loss = 0.28
epoch 3, step 4500/55000, batch loss = 0.23
epoch 3, step 4750/55000, batch loss = 0.34
epoch 3, step 5000/55000, batch loss = 0.29
Train accuracy = 98.14
epoch 3, step 5250/55000, batch loss = 0.16
epoch 3, step 5500/55000, batch loss = 0.20
epoch 3, step 5750/55000, batch loss = 0.18
epoch 3, step 6000/55000, batch loss = 0.22
epoch 3, step 6250/55000, batch loss = 0.23
epoch 3, step 6500/55000, batch loss = 0.24
epoch 3, step 6750/55000, batch loss = 0.19
epoch 3, step 7000/55000, batch loss = 0.16
epoch 3, step 7250/55000, batch loss = 0.26
epoch 3, step 7500/55000, batch loss = 0.18
Train accuracy = 98.37
epoch 3, step 7750/55000, batch loss = 0.19
epoch 3, step 8000/55000, batch loss = 0.31
epoch 3, step 8250/55000, batch loss = 0.16
epoch 3, step 8500/55000, batch loss = 0.27
epoch 3, step 8750/55000, batch loss = 0.17
epoch 3, step 9000/55000, batch loss = 0.25
epoch 3, step 9250/55000, batch loss = 0.17
epoch 3, step 9500/55000, batc

epoch 3, step 47750/55000, batch loss = 0.29
epoch 3, step 48000/55000, batch loss = 0.22
epoch 3, step 48250/55000, batch loss = 0.20
epoch 3, step 48500/55000, batch loss = 0.15
epoch 3, step 48750/55000, batch loss = 0.26
epoch 3, step 49000/55000, batch loss = 0.20
epoch 3, step 49250/55000, batch loss = 0.27
epoch 3, step 49500/55000, batch loss = 0.19
epoch 3, step 49750/55000, batch loss = 0.21
epoch 3, step 50000/55000, batch loss = 0.15
Train accuracy = 98.57
epoch 3, step 50250/55000, batch loss = 0.16
epoch 3, step 50500/55000, batch loss = 0.23
epoch 3, step 50750/55000, batch loss = 0.20
epoch 3, step 51000/55000, batch loss = 0.17
epoch 3, step 51250/55000, batch loss = 0.17
epoch 3, step 51500/55000, batch loss = 0.20
epoch 3, step 51750/55000, batch loss = 0.29
epoch 3, step 52000/55000, batch loss = 0.15
epoch 3, step 52250/55000, batch loss = 0.33
epoch 3, step 52500/55000, batch loss = 0.23
Train accuracy = 98.56
epoch 3, step 52750/55000, batch loss = 0.19
epoch 3, 

epoch 4, step 36000/55000, batch loss = 0.16
epoch 4, step 36250/55000, batch loss = 0.18
epoch 4, step 36500/55000, batch loss = 0.17
epoch 4, step 36750/55000, batch loss = 0.15
epoch 4, step 37000/55000, batch loss = 0.21
epoch 4, step 37250/55000, batch loss = 0.15
epoch 4, step 37500/55000, batch loss = 0.18
Train accuracy = 98.63
epoch 4, step 37750/55000, batch loss = 0.17
epoch 4, step 38000/55000, batch loss = 0.15
epoch 4, step 38250/55000, batch loss = 0.24
epoch 4, step 38500/55000, batch loss = 0.16
epoch 4, step 38750/55000, batch loss = 0.17
epoch 4, step 39000/55000, batch loss = 0.18
epoch 4, step 39250/55000, batch loss = 0.14
epoch 4, step 39500/55000, batch loss = 0.15
epoch 4, step 39750/55000, batch loss = 0.31
epoch 4, step 40000/55000, batch loss = 0.16
Train accuracy = 98.64
epoch 4, step 40250/55000, batch loss = 0.14
epoch 4, step 40500/55000, batch loss = 0.14
epoch 4, step 40750/55000, batch loss = 0.20
epoch 4, step 41000/55000, batch loss = 0.15
epoch 4, 

epoch 5, step 24250/55000, batch loss = 0.13
epoch 5, step 24500/55000, batch loss = 0.13
epoch 5, step 24750/55000, batch loss = 0.21
epoch 5, step 25000/55000, batch loss = 0.16
Train accuracy = 98.76
epoch 5, step 25250/55000, batch loss = 0.14
epoch 5, step 25500/55000, batch loss = 0.14
epoch 5, step 25750/55000, batch loss = 0.15
epoch 5, step 26000/55000, batch loss = 0.14
epoch 5, step 26250/55000, batch loss = 0.16
epoch 5, step 26500/55000, batch loss = 0.17
epoch 5, step 26750/55000, batch loss = 0.15
epoch 5, step 27000/55000, batch loss = 0.18
epoch 5, step 27250/55000, batch loss = 0.16
epoch 5, step 27500/55000, batch loss = 0.23
Train accuracy = 98.76
epoch 5, step 27750/55000, batch loss = 0.18
epoch 5, step 28000/55000, batch loss = 0.14
epoch 5, step 28250/55000, batch loss = 0.16
epoch 5, step 28500/55000, batch loss = 0.18
epoch 5, step 28750/55000, batch loss = 0.17
epoch 5, step 29000/55000, batch loss = 0.20
epoch 5, step 29250/55000, batch loss = 0.16
epoch 5, 

epoch 1, step 12000/55000, batch loss = 0.50
epoch 1, step 12250/55000, batch loss = 0.55
epoch 1, step 12500/55000, batch loss = 0.55
Train accuracy = 89.26
epoch 1, step 12750/55000, batch loss = 0.51
epoch 1, step 13000/55000, batch loss = 0.54
epoch 1, step 13250/55000, batch loss = 0.50
epoch 1, step 13500/55000, batch loss = 0.73
epoch 1, step 13750/55000, batch loss = 0.63
epoch 1, step 14000/55000, batch loss = 0.57
epoch 1, step 14250/55000, batch loss = 0.50
epoch 1, step 14500/55000, batch loss = 0.51
epoch 1, step 14750/55000, batch loss = 0.54
epoch 1, step 15000/55000, batch loss = 0.48
Train accuracy = 90.32
epoch 1, step 15250/55000, batch loss = 0.52
epoch 1, step 15500/55000, batch loss = 0.68
epoch 1, step 15750/55000, batch loss = 0.45
epoch 1, step 16000/55000, batch loss = 0.46
epoch 1, step 16250/55000, batch loss = 0.45
epoch 1, step 16500/55000, batch loss = 0.58
epoch 1, step 16750/55000, batch loss = 0.49
epoch 1, step 17000/55000, batch loss = 0.46
epoch 1, 

epoch 2, step 0/55000, batch loss = 0.46
epoch 2, step 250/55000, batch loss = 0.49
epoch 2, step 500/55000, batch loss = 0.37
epoch 2, step 750/55000, batch loss = 0.55
epoch 2, step 1000/55000, batch loss = 0.39
epoch 2, step 1250/55000, batch loss = 0.39
epoch 2, step 1500/55000, batch loss = 0.51
epoch 2, step 1750/55000, batch loss = 0.51
epoch 2, step 2000/55000, batch loss = 0.41
epoch 2, step 2250/55000, batch loss = 0.48
epoch 2, step 2500/55000, batch loss = 0.41
Train accuracy = 98.24
epoch 2, step 2750/55000, batch loss = 0.40
epoch 2, step 3000/55000, batch loss = 0.37
epoch 2, step 3250/55000, batch loss = 0.44
epoch 2, step 3500/55000, batch loss = 0.36
epoch 2, step 3750/55000, batch loss = 0.46
epoch 2, step 4000/55000, batch loss = 0.38
epoch 2, step 4250/55000, batch loss = 0.37
epoch 2, step 4500/55000, batch loss = 0.44
epoch 2, step 4750/55000, batch loss = 0.38
epoch 2, step 5000/55000, batch loss = 0.38
Train accuracy = 98.28
epoch 2, step 5250/55000, batch loss

epoch 2, step 43750/55000, batch loss = 0.32
epoch 2, step 44000/55000, batch loss = 0.32
epoch 2, step 44250/55000, batch loss = 0.34
epoch 2, step 44500/55000, batch loss = 0.33
epoch 2, step 44750/55000, batch loss = 0.33
epoch 2, step 45000/55000, batch loss = 0.35
Train accuracy = 98.41
epoch 2, step 45250/55000, batch loss = 0.39
epoch 2, step 45500/55000, batch loss = 0.33
epoch 2, step 45750/55000, batch loss = 0.44
epoch 2, step 46000/55000, batch loss = 0.32
epoch 2, step 46250/55000, batch loss = 0.32
epoch 2, step 46500/55000, batch loss = 0.36
epoch 2, step 46750/55000, batch loss = 0.39
epoch 2, step 47000/55000, batch loss = 0.32
epoch 2, step 47250/55000, batch loss = 0.37
epoch 2, step 47500/55000, batch loss = 0.34
Train accuracy = 98.42
epoch 2, step 47750/55000, batch loss = 0.50
epoch 2, step 48000/55000, batch loss = 0.32
epoch 2, step 48250/55000, batch loss = 0.31
epoch 2, step 48500/55000, batch loss = 0.32
epoch 2, step 48750/55000, batch loss = 0.34
epoch 2, 

epoch 3, step 32000/55000, batch loss = 0.32
epoch 3, step 32250/55000, batch loss = 0.32
epoch 3, step 32500/55000, batch loss = 0.33
Train accuracy = 99.30
epoch 3, step 32750/55000, batch loss = 0.32
epoch 3, step 33000/55000, batch loss = 0.30
epoch 3, step 33250/55000, batch loss = 0.31
epoch 3, step 33500/55000, batch loss = 0.31
epoch 3, step 33750/55000, batch loss = 0.34
epoch 3, step 34000/55000, batch loss = 0.36
epoch 3, step 34250/55000, batch loss = 0.30
epoch 3, step 34500/55000, batch loss = 0.33
epoch 3, step 34750/55000, batch loss = 0.30
epoch 3, step 35000/55000, batch loss = 0.33
Train accuracy = 99.31
epoch 3, step 35250/55000, batch loss = 0.32
epoch 3, step 35500/55000, batch loss = 0.31
epoch 3, step 35750/55000, batch loss = 0.30
epoch 3, step 36000/55000, batch loss = 0.31
epoch 3, step 36250/55000, batch loss = 0.30
epoch 3, step 36500/55000, batch loss = 0.30
epoch 3, step 36750/55000, batch loss = 0.32
epoch 3, step 37000/55000, batch loss = 0.31
epoch 3, 

epoch 4, step 20250/55000, batch loss = 0.30
epoch 4, step 20500/55000, batch loss = 0.31
epoch 4, step 20750/55000, batch loss = 0.29
epoch 4, step 21000/55000, batch loss = 0.29
epoch 4, step 21250/55000, batch loss = 0.33
epoch 4, step 21500/55000, batch loss = 0.39
epoch 4, step 21750/55000, batch loss = 0.29
epoch 4, step 22000/55000, batch loss = 0.36
epoch 4, step 22250/55000, batch loss = 0.32
epoch 4, step 22500/55000, batch loss = 0.34
Train accuracy = 99.36
epoch 4, step 22750/55000, batch loss = 0.29
epoch 4, step 23000/55000, batch loss = 0.30
epoch 4, step 23250/55000, batch loss = 0.30
epoch 4, step 23500/55000, batch loss = 0.30
epoch 4, step 23750/55000, batch loss = 0.29
epoch 4, step 24000/55000, batch loss = 0.30
epoch 4, step 24250/55000, batch loss = 0.30
epoch 4, step 24500/55000, batch loss = 0.30
epoch 4, step 24750/55000, batch loss = 0.30
epoch 4, step 25000/55000, batch loss = 0.30
Train accuracy = 99.39
epoch 4, step 25250/55000, batch loss = 0.31
epoch 4, 

epoch 5, step 8500/55000, batch loss = 0.31
epoch 5, step 8750/55000, batch loss = 0.40
epoch 5, step 9000/55000, batch loss = 0.32
epoch 5, step 9250/55000, batch loss = 0.30
epoch 5, step 9500/55000, batch loss = 0.31
epoch 5, step 9750/55000, batch loss = 0.29
epoch 5, step 10000/55000, batch loss = 0.35
Train accuracy = 99.39
epoch 5, step 10250/55000, batch loss = 0.30
epoch 5, step 10500/55000, batch loss = 0.41
epoch 5, step 10750/55000, batch loss = 0.29
epoch 5, step 11000/55000, batch loss = 0.36
epoch 5, step 11250/55000, batch loss = 0.33
epoch 5, step 11500/55000, batch loss = 0.31
epoch 5, step 11750/55000, batch loss = 0.31
epoch 5, step 12000/55000, batch loss = 0.31
epoch 5, step 12250/55000, batch loss = 0.30
epoch 5, step 12500/55000, batch loss = 0.31
Train accuracy = 99.39
epoch 5, step 12750/55000, batch loss = 0.30
epoch 5, step 13000/55000, batch loss = 0.61
epoch 5, step 13250/55000, batch loss = 0.30
epoch 5, step 13500/55000, batch loss = 0.30
epoch 5, step 1

epoch 5, step 52000/55000, batch loss = 0.29
epoch 5, step 52250/55000, batch loss = 0.29
epoch 5, step 52500/55000, batch loss = 0.31
Train accuracy = 99.44
epoch 5, step 52750/55000, batch loss = 0.30
epoch 5, step 53000/55000, batch loss = 0.31
epoch 5, step 53250/55000, batch loss = 0.30
epoch 5, step 53500/55000, batch loss = 0.29
epoch 5, step 53750/55000, batch loss = 0.32
epoch 5, step 54000/55000, batch loss = 0.29
epoch 5, step 54250/55000, batch loss = 0.35
epoch 5, step 54500/55000, batch loss = 0.30
epoch 5, step 54750/55000, batch loss = 0.31
Train accuracy = 99.44

Running evaluation:  Validation
Validation accuracy = 99.10
Validation avg loss = 0.32


Running evaluation:  Test
Test accuracy = 99.15
Test avg loss = 0.31






## 3. ZADATAK - usporedba s Tensorflowom

U Tensorflowu definirajte i naučite model koji je ekvivalentan regulariziranom modelu iz 2. zadatka. Korisite identičnu arhitekturu i parametre učenja da biste reproducirali rezultate. Tijekom učenja vizualizirajte filtre u prvom sloju kao u prethodnoj vježbi. Kako biste u graf dodali operaciju konvolucije koristite tf.nn.conv2d ili tf.contrib.layers.convolution2d.