In [1]:
import pickle
data = pickle.load(open('data.pk','rb'))
import numpy as np

In [3]:
vocab_size = 250

def calculate_log_co_occurence(word_data):
  "Compute the log-co-occurence matrix for our data."
  log_co_occurence = np.zeros((vocab_size, vocab_size))
  for input in word_data:
    log_co_occurence[input[0], input[1]] += 1
    log_co_occurence[input[1], input[2]] += 1
    # If we want symmetric co-occurence can also increment for these.
    # Optional: How would you generalize the model if our target co-occurence isn't symmetric?
    log_co_occurence[input[1], input[0]] += 1
    log_co_occurence[input[2], input[1]] += 1
  delta_smoothing = 0.5  # A hyperparameter.  You can play with this if you want.
  log_co_occurence += delta_smoothing  # Add delta so log doesn't break on 0's.
  log_co_occurence = np.log(log_co_occurence)
  return log_co_occurence

In [47]:
log_co_occurence_train = calculate_log_co_occurence(data['train_inputs'])
log_co_occurence_valid = calculate_log_co_occurence(data['valid_inputs'])

In [11]:
w = 0.1 * np.random.normal(size=(250, 5))

In [48]:

np.ones((1,5))


array([[1., 1., 1., 1., 1.]])

In [31]:
log_co_occurence_train[0][1]

2.803360380906535

In [None]:
def loss_GLoVE(W, b, log_co_occurence):
  "Compute the GLoVE loss."
  n,_ = log_co_occurence.shape
  return np.sum((W @ W.T + b @ np.ones([1,n]) + np.ones([n,1])@b.T - log_co_occurence)**2)

def grad_GLoVE(W,  b, log_co_occurence):
  "Return the gradient of GLoVE objective w.r.t W and b."
  "INPUT: W - Vxd; b - Vx1; log_co_occurence: VxV"
  "OUTPUT: grad_W - Vxd; grad_b - Vx1"
    n,_ = log_co_occurence.shape
  ###########################   YOUR CODE HERE  ##############################
    grad_W = []
    for i in range(len(W)):
        res = np.zeros(W.shape[1])
        for j range(len(W)):
            res += (W[j].T @ W[i] + b[j] + b[i] - log_co_occurence[i][j])@ W[j]
        grad_W.append([4*res])
    grad_W = np.array(d_W)
  ############################################################################
  return grad_W, grad_b

def train_GLoVE(W, b, log_co_occurence_train, log_co_occurence_valid, n_epochs, do_print=False):
  "Traing W and b according to GLoVE objective."
  n,_ = log_co_occurence_train.shape
  learning_rate = 0.5 / n  # A hyperparameter.  You can play with this if you want.
  for epoch in range(n_epochs):
    grad_W, grad_b = grad_GLoVE(W, b, log_co_occurence_train)
    W -= learning_rate * grad_W
    b -= learning_rate * grad_b
    train_loss, valid_loss = loss_GLoVE(W, b, log_co_occurence_train), loss_GLoVE(W, b, log_co_occurence_valid)
    if do_print:
      print(f"Train Loss: {train_loss}, valid loss: {valid_loss}, grad_norm: {np.sum(grad_w**2)}")
  return W, b, train_loss, valid_loss

In [None]:
np.random.seed(1)
n_epochs = 500  # A hyperparameter.  You can play with this if you want.
embedding_dims = np.array([1,3,5,7,10,12,15,20,25,30,40,50])  # Play with this
final_train_losses, final_val_losses = [], []  # Store the final losses for graphing
W_final_2d, b_final_2d = None, None
do_print = False  # If you want to see diagnostic information during training
for embedding_dim in tqdm(embedding_dims):
  init_variance = 0.1  # A hyperparameter.  You can play with this if you want.
  W = init_variance * np.random.normal(size=(250, embedding_dim))
  b = init_variance * np.random.normal(size=(250, 1))
  if do_print:
    print(f"Training for embedding dimension: {embedding_dim}")
  W_final, b_final, train_loss, valid_loss = train_GLoVE(W, b, log_co_occurence_train, log_co_occurence_valid, n_epochs, do_print=do_print)
  if embedding_dim == 2:
    # Save a parameter copy if we are training 2d embedding for visualization later
    W_final_2d = W_final
    b_final_2d = b_final
  final_train_losses += [train_loss]
  final_val_losses += [valid_loss]
  if do_print:
    print(f"Final validation loss: {valid_loss}")


In [None]:
pylab.loglog(embedding_dims, final_train_losses)
pylab.xlabel("Embedding Dimension")
pylab.ylabel("Training Loss")
pylab.legend()

In [None]:
pylab.loglog(embedding_dims, final_val_losses)
pylab.xlabel("Embedding Dimension")
pylab.ylabel("Validation Loss")
pylab.legend()