### 1. Load training/validation image dataset

In [0]:
import torch
import torchvision
import torchvision.transforms as transforms

data_root = './data/horse-or-human'

# define vectorize transformer
class VectorizeTransform:
    def __call__(self, img):
        return torch.reshape(img, (-1, ))

# compose image transformer
transform = transforms.Compose([
    transforms.Grayscale(),
    transforms.ToTensor(),
    VectorizeTransform()    # for vectorizing input image
])

# load training dataset
train_data_path = data_root + '/train'
train_dataset = torchvision.datasets.ImageFolder(root=train_data_path, transform=transform)
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=2048,
    shuffle=True,
    num_workers=0
)

# load validation dataset
valid_data_path = data_root + '/validation'
valid_dataset = torchvision.datasets.ImageFolder(root=valid_data_path, transform=transform)
valid_loader = torch.utils.data.DataLoader(
    valid_dataset,
    batch_size=2048,
    shuffle=True,
    num_workers=0
)

### 2. Define model and functions for learning neural network

In [0]:
def sigmoid(z):
  return 1 / (1 + torch.exp(-z))

def loss(y_pred, y):
  epsilon = 1e-12
  return -1 * torch.mean(
      y * torch.log(y_pred + epsilon) + (1 - y) * torch.log(1 - y_pred + epsilon)
  )

def accuracy(y_pred, y):
  answer = (y_pred > 0.5).float()
  return torch.mean((answer == y).float())

class ThreeLayerNN:
  # initialize model parameters
  def __init__(self, input_size, hidden1_size, hidden2_size, output_size):
    self.results = {}
    self.params = {}

    self.params['w1'] = torch.zeros((hidden1_size, input_size))
    self.params['b1'] = torch.zeros((1, hidden1_size))
    
    self.params['w2'] = torch.zeros((hidden2_size, hidden1_size))
    self.params['b2'] = torch.zeros((1, hidden2_size))
    
    self.params['w3'] = torch.zeros((output_size, hidden2_size))
    self.params['b3'] = torch.zeros((1, output_size))

  # compute y prediction
  def predict(self, x):
    self.forward(x) # forward propagation
    pred = self.results['a3']

    return pred

  # compute gradients of each parameters
  def gradient(self, x, y):
    # fetch model parameters
    w1, w2, w3 = self.params['w1'], self.params['w2'], self.params['w3']
    b1, b2, b3 = self.params['b1'], self.params['b2'], self.params['b3']

    self.forward(x) # forward propagation

    a0 = x
    # fetch forward propagation results
    a1, a2, a3 = self.results['a1'], self.results['a2'], self.results['a3']
    z1, z2, z3 = self.results['z1'], self.results['z2'], self.results['z3']

    grads = {}
    batch_size = x.shape[0]

    # backward propagation
    dz3 = (a3 - y) / batch_size
    grads['dw3'] = torch.matmul(dz3.T, a2)
    grads['db3'] = torch.sum(dz3, axis=0)

    da2 = torch.matmul(dz3, w3)
    dz2 = a2 * (1 - a2) * da2
    grads['dw2'] = torch.matmul(dz2.T, a1)
    grads['db2'] = torch.sum(dz2, axis=0)

    da1 = torch.matmul(dz2, w2)
    dz1 = a1 * (1 - a1) * da1
    grads['dw1'] = torch.matmul(dz1.T, a0)
    grads['db1'] = torch.sum(dz1, axis=0)

    return grads

  # run forward propagation
  def forward(self, x):
    # fetch model parameters
    w1, w2, w3 = self.params['w1'], self.params['w2'], self.params['w3']
    b1, b2, b3 = self.params['b1'], self.params['b2'], self.params['b3']

    a0 = x

    z1 = torch.matmul(a0, w1.T) + b1
    a1 = sigmoid(z1)

    z2 = torch.matmul(a1, w2.T) + b2
    a2 = sigmoid(z2)

    z3 = torch.matmul(a2, w3.T) + b3
    a3 = sigmoid(z3)

    # store intermediate forward propagation results
    self.results['a1'], self.results['a2'], self.results['a3'] = a1, a2, a3
    self.results['z1'], self.results['z2'], self.results['z3'] = z1, z2, z3
