### 1. Load training/validation image dataset

In [0]:
import torch
import torchvision
import torchvision.transforms as transforms

data_root = './data/horse-or-human'

# define vectorize transformer
class VectorizeTransform:
    def __call__(self, img):
        return torch.reshape(img, (-1, ))

# compose image transformer
transform = transforms.Compose([
    transforms.Grayscale(),
    transforms.ToTensor(),
    VectorizeTransform()    # for vectorizing input image
])

# load training dataset
train_data_path = data_root + '/train'
train_dataset = torchvision.datasets.ImageFolder(root=train_data_path, transform=transform)
train_loader = torch.utils.data.DataLoader(
    train_dataset,
    batch_size=2048,
    shuffle=True,
    num_workers=0
)

# load validation dataset
valid_data_path = data_root + '/validation'
valid_dataset = torchvision.datasets.ImageFolder(root=valid_data_path, transform=transform)
valid_loader = torch.utils.data.DataLoader(
    valid_dataset,
    batch_size=2048,
    shuffle=True,
    num_workers=0
)

### 2. Define model and functions for learning neural network

In [0]:
import datetime

def elapsed_time_in_milli(start_time):
  curr_time = datetime.datetime.now()
  return int((curr_time - start_time).total_seconds() * 1000)

def sigmoid(z):
  return 1 / (1 + torch.exp(-z))

def loss(y_pred, y):
  epsilon = 1e-12
  return -1 * torch.mean(
      y * torch.log(y_pred + epsilon) + (1 - y) * torch.log(1 - y_pred + epsilon)
  )

def accuracy(y_pred, y):
  answer = (y_pred > 0.5).float()
  return torch.mean((answer == y).float())

class ThreeLayerNN:
  # initialize parameters for neural network
  def __init__(self, input_size, hidden1_size, hidden2_size, output_size, learning_rate, weight_init_std):
    self.results = {}
    self.params = {}

    self.params['w1'] = weight_init_std * torch.randn(hidden1_size, input_size)
    self.params['b1'] = torch.zeros((1, hidden1_size))
    
    self.params['w2'] = weight_init_std * torch.randn(hidden2_size, hidden1_size)
    self.params['b2'] = torch.zeros((1, hidden2_size))
    
    self.params['w3'] = weight_init_std * torch.randn(output_size, hidden2_size)
    self.params['b3'] = torch.zeros((1, output_size))

    self.lr = learning_rate

  # compute y prediction
  def predict(self, x):
    self.forward(x) # forward propagation
    pred = self.results['a3']

    return pred

  # compute gradients of each parameters and run the gradient descent algorithm
  def gradient_descent(self, x, y):
    # fetch model parameters
    w1, w2, w3 = self.params['w1'], self.params['w2'], self.params['w3']
    b1, b2, b3 = self.params['b1'], self.params['b2'], self.params['b3']

    self.forward(x) # forward propagation

    a0 = x
    # fetch forward propagation results
    a1, a2, a3 = self.results['a1'], self.results['a2'], self.results['a3']
    z1, z2, z3 = self.results['z1'], self.results['z2'], self.results['z3']

    batch_size = x.shape[0]

    # backward propagation
    dz3 = (a3 - y) / batch_size
    dw3 = torch.matmul(dz3.T, a2)
    db3 = torch.sum(dz3, axis=0)

    da2 = torch.matmul(dz3, w3)
    dz2 = a2 * (1 - a2) * da2
    dw2 = torch.matmul(dz2.T, a1)
    db2 = torch.sum(dz2, axis=0)

    da1 = torch.matmul(dz2, w2)
    dz1 = a1 * (1 - a1) * da1
    dw1 = torch.matmul(dz1.T, a0)
    db1 = torch.sum(dz1, axis=0)

    # gradient descent
    self.params['w1'] = w1 - self.lr * dw1
    self.params['b1'] = b1 - self.lr * db1
    self.params['w2'] = w2 - self.lr * dw2
    self.params['b2'] = b2 - self.lr * db2
    self.params['w3'] = w3 - self.lr * dw3
    self.params['b3'] = b3 - self.lr * db3

  # run forward propagation
  def forward(self, x):
    # fetch model parameters
    w1, w2, w3 = self.params['w1'], self.params['w2'], self.params['w3']
    b1, b2, b3 = self.params['b1'], self.params['b2'], self.params['b3']

    a0 = x

    z1 = torch.matmul(a0, w1.T) + b1
    a1 = sigmoid(z1)

    z2 = torch.matmul(a1, w2.T) + b2
    a2 = sigmoid(z2)

    z3 = torch.matmul(a2, w3.T) + b3
    a3 = sigmoid(z3)

    # store intermediate forward propagation results
    self.results['a1'], self.results['a2'], self.results['a3'] = a1, a2, a3
    self.results['z1'], self.results['z2'], self.results['z3'] = z1, z2, z3


### 3. Learning with the gradient descent algorithm


In [6]:
it = iter(train_loader)
images, labels = it.next()

train_data_count = len(train_loader.dataset)
valid_data_count = len(valid_loader.dataset)

# initialize parameters
epoch_count = 100
learning_rate = 0.2
weight_init_std = 0.3

# create new neural network
nn = ThreeLayerNN(images.shape[1], 100, 50, 1, learning_rate, weight_init_std)

train_losses = []
train_accs = []

valid_losses = []
valid_accs = []

elapsed_times = []

for epoch in range(epoch_count):
  # initialize current loss and accuracy
  mean_train_loss, mean_train_acc = 0, 0
  mean_valid_loss, mean_valid_acc = 0, 0

  # timer start
  start_time = datetime.datetime.now()

  # calculate train loss and accuracy at this epoch
  for batch_idx, (x_train, y_train) in enumerate(train_loader):
    batch_size = x_train.shape[0]

    y_train = torch.reshape(y_train, (batch_size, 1))
    y_pred_train = nn.predict(x_train)

    mean_train_loss += (batch_size / train_data_count) * loss(y_pred_train, y_train)
    mean_train_acc += (batch_size / train_data_count) * accuracy(y_pred_train, y_train)

  # calculate validation loss and accuracy at this epoch
  for batch_idx, (x_valid, y_valid) in enumerate(valid_loader):
    batch_size = x_valid.shape[0]

    y_valid = torch.reshape(y_valid, (batch_size, 1))
    y_pred_valid = nn.predict(x_valid)

    mean_valid_loss += (batch_size / valid_data_count) * loss(y_pred_valid, y_valid)
    mean_valid_acc += (batch_size / valid_data_count) * accuracy(y_pred_valid, y_valid)

  # run the gradient descent algorithm using train dataset at this epoch
  for batch_idx, (x_train, y_train) in enumerate(train_loader):
    batch_size = x_train.shape[0]

    y_train = torch.reshape(y_train, (batch_size, 1))
    nn.gradient_descent(x_train, y_train)

  # timer end
  # measure and save elapsed time in this epoch
  elapsed_times.append(elapsed_time_in_milli(start_time))  

  # save losses and accuracies at this epoch
  train_losses.append(mean_train_loss)
  train_accs.append(mean_train_acc)
  valid_losses.append(mean_valid_loss)
  valid_accs.append(mean_valid_acc)

  if epoch % 1 == 0:
    print(f'epoch: {epoch}')
    print(f'train loss: {mean_train_loss}')
    print(f'train accuracy: {mean_train_acc}')
    print(f'validation loss: {mean_valid_loss}')
    print(f'validation accuracy: {mean_valid_acc}\n\n')


epoch: 0
train loss: 1.4362828731536865
train accuracy: 0.5131450891494751
validation loss: 1.4368375539779663
validation accuracy: 0.5


epoch: 1
train loss: 0.7321149110794067
train accuracy: 0.5141187906265259
validation loss: 0.7164052724838257
validation accuracy: 0.5


epoch: 2
train loss: 0.6954746246337891
train accuracy: 0.4741967022418976
validation loss: 0.6836119294166565
validation accuracy: 0.546875


epoch: 3
train loss: 0.6896675825119019
train accuracy: 0.5258033275604248
validation loss: 0.6782488822937012
validation accuracy: 0.61328125


epoch: 4
train loss: 0.6852723360061646
train accuracy: 0.5472249388694763
validation loss: 0.6748610734939575
validation accuracy: 0.59765625


epoch: 5
train loss: 0.6809864640235901
train accuracy: 0.567672848701477
validation loss: 0.6712064146995544
validation accuracy: 0.625


epoch: 6
train loss: 0.6767470836639404
train accuracy: 0.5851995944976807
validation loss: 0.6675363779067993
validation accuracy: 0.6484375


epoch: 7