## Quiz 03 Answers

1. False
2. True
3. False
4. [5, 0, 2.5]
5. 64
6. $2n + 1$

## Lab 03 Objectives

In [None]:
# Objective 1: Be able to implement LSTM in an efficient way
import torch
from torch import nn

class LSTM(nn.Module):
  def __init__(self, d):
    super(LSTM, self).__init__()
    self.linear = nn.Linear(2 * d, 4 * d, bias=True)
    self.sigmoid = nn.Sigmoid()
    self.tanh = nn.Tanh()
    self.output_size = 2 * d # hidden state and memory state are concatenated
  
  def forward(self, prev_hidden_tensor, input_tensor):
    prev_h, prev_c = prev_hidden_tensor.chunk(2, -1)
    tensor = torch.cat([prev_h, input_tensor], dim=-1)
    tensor = self.linear(tensor)
    input_, forget, output, cand_c = tensor.chunk(4, -1)
    input_ = self.sigmoid(input_)
    forget = self.sigmoid(forget)
    output = self.sigmoid(output)
    cand_c = self.tanh(cand_c)
    cur_c = input_ * cand_c + forget * prev_c
    cur_h = output * self.tanh(cur_c)
    return torch.cat([cur_h, cur_c], -1)


class RNNLayer(nn.Module):
  def __init__(self, rnn_module):
    super(RNNLayer, self).__init__()
    self.rnn_module = rnn_module
    self.output_size = rnn_module.output_size
  
  def forward(self, input_tensor):
    cur_hidden_tensor = torch.zeros(input_tensor.shape[0], self.output_size)
    h_list = []
    for time_step in range(input_tensor.shape[1]):
      cur_input_tensor = input_tensor[:, time_step, :]
      cur_hidden_tensor = self.rnn_module(cur_hidden_tensor, cur_input_tensor)
      h, c = cur_hidden_tensor.chunk(2, -1)
      h_list.append(h)
    return torch.stack(h_list, dim=1)

batch_size = 4
seq_len = 8
hidden_size = 16
lstm = LSTM(hidden_size)
lstm_layer = RNNLayer(lstm)

input_tensor = torch.randn(batch_size, seq_len, hidden_size)
output_tensor = lstm_layer(input_tensor)
assert input_tensor.size() == output_tensor.size()

In [None]:
# Objective 2: Verify that the upscaled dropout indeed has the same expected value as no-dropout layer
import torch
from torch import nn

torch.manual_seed(605)

p = 0.3
bias = 2.1
num_examples = 1000
dim = 128
dropout = nn.Dropout(p)
input_ = torch.randn(num_examples, dim) + bias
output = dropout(input_)

assert (input_.mean()-output.mean()).abs() < 0.01

In [None]:
# Objective 3: Create an named entity extractor given BIO tags
all_tags = ("B_person", "I_person", "B_location", "I_location", "B_organization", "I_organization", "O")
text = "Joe Biden is the president of the United States"
tokens = text.split()
tags = ["B_person", "I_person", "O", "O", "O", "O", "O", "B_location", "I_location"]
print(list(zip(tokens, tags)))

[('Joe', 'B_person'), ('Biden', 'I_person'), ('is', 'O'), ('the', 'O'), ('president', 'O'), ('of', 'O'), ('the', 'O'), ('United', 'B_location'), ('States', 'I_location')]


In [None]:
def named_entity_extractor(tokens, tags):
  named_entities = []
  named_entity, entity_type = None, None
  for token, tag in zip(tokens, tags):
    if tag == "":
      named_entity = token
      entity_type = tag.split('_')[1]
    elif tag == "O":
      if named_entity is not None:
        named_entities.append((named_entity, entity_type))
        named_entity, entity_type = None, None
    elif tag.startswith("B"):
      if named_entity is not None:
        named_entities.append((named_entity, entity_type))
      named_entity = token
      entity_type = tag.split('_')[1]
    elif tag.startswith("I"):
      assert tag.split('_')[1] == entity_type
      named_entity = f"{named_entity} {token}"
  if named_entity is not None:
    named_entities.append((named_entity, entity_type))
  return named_entities

assert named_entity_extractor(tokens, tags) == [('Joe Biden', 'person'), ('United States', 'location')]

[('Joe Biden', 'person'), ('United States', 'location')]
