##Setup##

In [None]:
# Mount Google Drive

from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Import libraries

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
# Load dataset
path = "/content/drive/My Drive/Colab Notebooks/dl_coursework1/data"

trainxs = np.load(f"{path}/fashion-train-imgs.npz")
trainys = np.load(f"{path}/fashion-train-labels.npz")
devxs   = np.load(f"{path}/fashion-dev-imgs.npz")
devys   = np.load(f"{path}/fashion-dev-labels.npz")
testxs  = np.load(f"{path}/fashion-test-imgs.npz")
testys = np.load(f"{path}/fashion-test-labels.npz")

##Data exploration & preparation##

In [None]:
# Display an example image with label, specify number below

examples_to_show = list(range(40,42))

for example in examples_to_show:
  print(f"Y Label: {trainys[example]}")

  plt.imshow(trainxs[:, :, example].T, cmap='gray')
  plt.axis('off')
  plt.show()

In [None]:
# Reshape data (flatten images to 1D)

def flatten_2D_images(nparray):
  return np.reshape(nparray, (nparray.shape[0]*nparray.shape[1], nparray.shape[2]))

trainxs_flat = flatten_2D_images(trainxs)
devxs_flat = flatten_2D_images(devxs)
testxs_flat = flatten_2D_images(testxs)

print(f"Original train data shape: {trainxs.shape}")
print(f"Flattened train data shape: {trainxs_flat.shape}")
print(f"Original dev data shape: {devxs.shape}")
print(f"Flattened dev data shape: {devxs_flat.shape}")
print(f"Original test data shape: {testxs.shape}")
print(f"Flattened test data shape: {testxs_flat.shape}")

##Useful Values##

In [None]:
# Number of examples in each dataset
train_n = trainxs_flat.shape[1]
dev_n = devxs_flat.shape[1]
test_n = testxs_flat.shape[1]

# Number of dimesions in each image
dims = trainxs_flat.shape[0]

# Pixel value range
pixel_value_max = np.amax(trainxs_flat)
pixel_value_min = np.amin(trainxs_flat)
print(f"Max pixel value: {pixel_value_max}")
print(f"Min pixel value: {pixel_value_min}")

NUMBER_OF_EPOCHS = 500

##Common Functions##

In [None]:
# Copy, paste and modify this code for accuracy and loss plots
# The input for each is two lists one for training dataset, and one for the dev dataset

# Accuracy
list_of_train_accuracies = [] # change the name of variable so don't cause clashes
list_of_dev_accuracies = [] # change the name of variable so don't cause clashes

plt.plot(list_of_train_accuracies, color='k', linestyle='-')
plt.plot(list_of_dev_accuracies, color='r', linestyle='-')
plt.title(f'Model accuracy',  color='k')
plt.ylabel('Accuracy',  color='k')
plt.xlabel('Epoch',  color='k')
plt.legend(['Training', 'Validation'], loc='upper left')
plt.tick_params(colors='k')
plt.xlim(0, NUMBER_OF_EPOCHS)
plt.ylim(0, 1)
plt.show()

# Loss
list_of_train_loss = []
list_of_dev_loss = []

plt.plot(list_of_train_loss, color='k', linestyle='-')
plt.plot(list_of_dev_loss, color='r', linestyle='-')
plt.title(f'Model loss',  color='k')
plt.ylabel('loss',  color='k')
plt.xlabel('Epoch',  color='k')
plt.legend(['Training', 'Validation'], loc='upper left')
plt.tick_params(colors='k')
plt.xlim(0, NUMBER_OF_EPOCHS)
plt.ylim(0, 1)
plt.show()


## Assignment 1.2: Mean squared-loss logistic regression ##

In [None]:
# 2.2
def sigmoid(z):
  return 1/(1 + np.exp(z))

def f(w, b, x):
  return sigmoid(np.dot(w, x) + b)

def loss(w, b, x, y):
  """Compute the the mean-squared loss"""
  return -(1/2)*(y-f(w, b, x))**2

def df_dw_fd(w, b, x, y, epsilon = 10**(-5)):
  """Compute the gradient with respect to the weights using finite differences"""
  gradients = []
  for i in range(0, w.shape[0]):
    w_i = w[i]
    w[i] += epsilon / 2.0
    lhs = loss(w, b, x, y)
    w[i] = w_i
    w[i] -= epsilon / 2.0
    rhs = loss(w, b, x, y)
    w[i] = w_i
    df_dw_i = (lhs - rhs) / epsilon
    gradients.append(df_dw_i)
  return gradients

def df_db_fd(w, b, x, y, epsilon = 10**(-5)):
  """Compute the gradient with respect to the bias using finite differences"""
  lhs = loss(w, b + epsilon / 2.0, x, y)
  rhs = loss(w, b - epsilon / 2.0, x, y)
  return (lhs - rhs) / epsilon

def df_dw_analytical(w, b, x, y):
  """Compute the gradient with respect to the weights analytically"""
  y_hat = f(w, b, x)
  return -(y - y_hat)*y_hat*(1-y_hat)*x

def df_db_analytical(w, b, x, y):
  """Compute the gradient with respect to the weights analytically"""
  y_hat = f(w, b, x)
  return -(y - y_hat)*y_hat*(1-y_hat)

training_samples = list(range(38,41))

for i in training_samples:
  x, y = trainxs_flat[:,i], trainys[i]

  w = np.random.normal(0, 1, dims)/100
  b = 0.0

  fd_bias = df_db_fd(w, b, x, y)
  fd_weights = df_dw_fd(w, b, x, y)

  analytical_bias = df_db_analytical(w, b, x, y)
  analytical_weights = df_dw_analytical(w, b, x, y)

  absolute_bias_difference = np.absolute(fd_bias - analytical_bias)
  absolute_weight_differences = np.absolute(fd_weights - analytical_weights)

  print(f"Training set index {i}, y is {y}, y_hat is {f(w, b, x)}")
  print('Finite differences estimates:')
  print(f"* Bias: {fd_bias}")
  print(f"* Weights: {fd_weights}")
  print('Analytical gradient estimates:')
  print(f"* Bias: {analytical_bias}")
  print(f"* Weights: {analytical_weights.tolist()}")
  print('Differences')
  print(f"* Bias difference: {absolute_bias_difference}")
  print(f"* Bias difference less than e^(-10)? {absolute_bias_difference < np.e**(-10)}")
  print(f"* Sample weight differences: {np.extract(x > 0, absolute_weight_differences)[0:10].tolist()}")
  print(f"* Weight differences all less than e^(-10)? {(absolute_weight_differences < np.e**(-10)).all()}")
  print('\n')

# 2.3 - 2.4
def predict(w, b, x):
  """Predict the class of a feature vector x given some weights and a bias"""
  return 1 if f(w, b, x) >= 0.5 else 0

def accuracy(xs, ys, w, b):
  """Compute the accuracy for a set of feature vectors and their targets given
  some weights and a bias"""
  n = xs.shape[1]

  correct = 0
  for i in range(n):
    x = xs[:,i]
    y = ys[i]
    if predict(w, b, x) == y:
      correct += 1
  return correct/n

train_loss_per_epoch = []
train_accuracy_per_epoch = []
dev_accuracy_per_epoch = []

def train_logistic_regression(training_xs, training_ys, max_epochs=50, learning_rate=0.01):
  """Train a logistic regression model for a set of feature vectors and their
  targets, for a given learning rate, and a maximum number of epochs unless
  it meets the convergence criterion before this epoch"""
  assert(training_xs.shape[1] == training_ys.shape[0])

  train_n = training_xs.shape[1]
  dims = training_xs.shape[0]
  w = np.random.normal(0, 1, dims)/100
  b = 0.0

  for epoch in range(max_epochs):
    grad_w = np.zeros(dims)
    grad_b = 0

    mean_squared_loss = 0

    shuffled_indices = [*range(0,train_n,1)]
    np.random.shuffle(shuffled_indices)

    for i in shuffled_indices:
      x, y = training_xs[:,i], training_ys[i]
      y_hat = f(w, b, x)

      grad_w -= (y - y_hat) * y_hat * (1 - y_hat) * x
      grad_b -= (y - y_hat) * y_hat * (1 - y_hat)

      mean_squared_loss += (y - y_hat)**2/(2*train_n)

    grad_w /= train_n
    grad_b /= train_n

    w += learning_rate * grad_w
    b += learning_rate * grad_b

    train_accuracy = accuracy(training_xs, training_ys, w, b)
    dev_accuracy = accuracy(devxs_flat, devys, w, b)

    train_loss_per_epoch.append(mean_squared_loss)
    train_accuracy_per_epoch.append(train_accuracy)
    dev_accuracy_per_epoch.append(dev_accuracy)

    # Early stopping (convergence criterion)
    if len(train_accuracy_per_epoch) >= 30:
      if np.mean(train_accuracy_per_epoch[-15:]) - np.mean(train_accuracy_per_epoch[-30:-15]) < 0.001:
          print(f"Break on epoch {epoch}")
          break

  return w, b

MAX_EPOCHS = 1000
LEARNING_RATE = 0.01
w_hat, b_hat = train_logistic_regression(trainxs_flat, trainys, learning_rate=LEARNING_RATE, max_epochs=MAX_EPOCHS)

# 2.5 — Loss

plt.plot(train_loss_per_epoch, color='k', linestyle='-')
plt.title(f'Logistic regression: Loss (learning rate = {LEARNING_RATE})',  color='k')
plt.ylabel('Loss',  color='k')
plt.xlabel('Epoch',  color='k')
plt.legend(['Training', 'Validation'], loc='upper left')
plt.tick_params(colors='k')
plt.xlim(0, len(train_loss_per_epoch))
plt.ylim(0, 1)
plt.savefig(f"q2_loss_{LEARNING_RATE}.png", dpi=300)
plt.show()

# 2.6 — Accuracy

plt.plot(train_accuracy_per_epoch, color='k', linestyle='-')
plt.plot(dev_accuracy_per_epoch, color='r', linestyle='-')
plt.title(f'Logistic regression: Accuracy (learning rate = {LEARNING_RATE})',  color='k')
plt.ylabel('Accuracy',  color='k')
plt.xlabel('Epoch',  color='k')
plt.legend(['Training', 'Validation'], loc='upper left')
plt.tick_params(colors='k')
plt.xlim(0, len(train_accuracy_per_epoch))
plt.ylim(0, 1)
plt.savefig(f"q2_accuracy_{LEARNING_RATE}.png", dpi=300)
plt.show()

# 2.7 — Epoch with highest validation accuracy
index_of_maximum = np.argmax(np.array(dev_accuracy_per_epoch))
epoch = index_of_maximum + 1
print(f"Epoch: {epoch}")
print(f"Training accuracy on epoch {epoch}: {train_accuracy_per_epoch[index_of_maximum]}")
print(f"Validation accuracy on epoch {epoch}: {dev_accuracy_per_epoch[index_of_maximum]}")