# Environment Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive/')

In [None]:
"""
Change directory to where this file is located
"""
%cd 'COPY&PASTE FILE DIRECTORY HERE'

In [None]:
!pip install portalocker>=2.0.0 # 해당 셀 실행 이후 '런타임 > 런타임 다시 시작' 후 위에서부터 다시 실행

In [None]:
%load_ext tensorboard

In [None]:
import time
import math
import numpy as np
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
from typing import List, Tuple

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
import torchtext
from torchtext.vocab import build_vocab_from_iterator
from torchtext.data.utils import get_tokenizer
from torchtext.data.functional import to_map_style_dataset

In [None]:
"""
import modules you need
"""


In [None]:
DEVICE = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')

print("Using PyTorch version: {}, Device: {}".format(torch.__version__, DEVICE))
print("Using torchtext version: {}".format(torchtext.__version__))

# Load Data

In [None]:
"""
Load AG_NEWS dataset and set up the tokenizer and encoder pipeline.

Do NOT modify.
"""

train_data, test_data = torchtext.datasets.AG_NEWS(root='./data')

tokenizer = get_tokenizer('basic_english')

def tokens(data_iter):
    for _, text in data_iter:
        yield tokenizer(text)

encoder = build_vocab_from_iterator(tokens(train_data), specials=["<unk>"])
encoder.set_default_index(encoder["<unk>"])

text_pipeline = lambda x: encoder(tokenizer(x))
label_pipeline = lambda x: int(x) - 1

In [None]:
def collate_batch(
    batch: List[Tuple[int, str]]
    ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
    """
    Creates a batch of encoded text, label and token length tensors.

    Question (a)
    - The input texts in the batch have different lengths.
    - Complete your code to make them have same length using their average.
    - This means that the length of token sequence in each batch is determined by
      the average of token length of all sequences in each batch.
    - Text tensors are stacked with dimension of (TOKEN_LENGTH, BATCH),
      for easier process in RNN model.
    - Token length tensors are used to index the last valid hidden token for classification.

    Args:
      batch: list of tuples, each containing an integer label and a text input.
      - ex) [(3, "Wall St. Bears..."), (4, "Comtes, Asteroids and ..."), ...]
      - number of tuples in the list is same as BATCH SIZE.

    Returns:
      text_list: batch of encoded long type text tensors with size (TOKEN_LENGTH, BATCH)
      label_list: batch of label tensors with size (BATCH)
      len_list: batch of token length tensors with size (BATCH)
    """

    ##### YOUR CODE #####
    text_list, label_list, len_list = [], [], []
    for (_label, _text) in batch:
        label_list.append(label_pipeline(_label))
        processed_text = torch.tensor(text_pipeline(_text), dtype=torch.long)
        text_list.append(processed_text)
        len_list.append(processed_text.size(0))
        
    avg_len = int(sum(len_list) / len(len_list))
    text_list = torch.stack([F.pad(text, (0, 0, 0, avg_len - text.size(0))) for text in text_list])
    len_list = torch.tensor(len_list, dtype=torch.long)
    
    text_tensor = torch.stack(text_list, dim=1)
    label_tensor = torch.tensor(label_list, dtype=torch.long)
    len_tensor = torch.stack(len_list, dim=0)
    
    assert text_list.size(1) == len(batch)

    return (text_list, label_list, len_list)
    #####################

In [None]:
"""
Load the data loader.

Do NOT modify.
"""

BATCH_SIZE = 512

train_dataset = to_map_style_dataset(train_data)
test_dataset = to_map_style_dataset(test_data)
train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE,
                              shuffle=True, collate_fn=collate_batch)
valid_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE,
                              shuffle=False, collate_fn=collate_batch)

In [None]:
"""
Print out the first batch in the train loader.
Check if the collate function is implemented correctly.

Do NOT modify.
"""

batch_x, batch_y, len_x = next(iter(train_dataloader))
print(batch_x[:10])
print(batch_y[:10])
print(len_x[:10])

In [None]:
"""
Plot the sequence length distribution of the batches in the train dataloader.
Make sure that all batches have difference sequence lengths.

Do NOT modify.
"""

batch_len = []
for batch_x, _, _ in train_dataloader:
    seq_len = batch_x.size(0)
    batch_len.append(seq_len)
plt.hist(batch_len)
plt.show()

# Model

In [None]:
class RNN(nn.Module):
    def __init__(
        self,
        vocab_size: int,
        input_size: int,
        hidden_size: int,
        num_class: int,
        dropout_ratio: float,
    ):
        """
        Define the model weight parameters and initialize the weights.

        Question (b)
        - Complete the dimension and shape of the weights and biases.
        - Use the model parameters (vocab_size, input_size, hidden_size, num_class).

        Args:
          vocab_size: size of dictionary of vocabularies.
          input_size: size of each embedding vector.
          hidden_size: size of hidden dimension.
          num_class: size of output classes.
          dropout_ratio: probability of an element to be zeroed.
        """
        super(RNN, self).__init__()

        ##### YOUR CODE #####
        whh_size = None
        wxh_size = None
        why_size = None
        bhh_size = None
        bxh_size = None
        bhy_size = None
        #####################

        kwargs = {'device': DEVICE, 'dtype': torch.float}
        self.dropout = dropout_ratio
        self.hidden = hidden_size
        self.num_class = num_class
        self.embedding = nn.Embedding(vocab_size, input_size)
        self.W_hh = nn.parameter.Parameter(torch.empty(whh_size, **kwargs))
        self.W_xh = nn.parameter.Parameter(torch.empty(wxh_size, **kwargs))
        self.W_hy = nn.parameter.Parameter(torch.empty(why_size, **kwargs))
        self.b_hh = nn.parameter.Parameter(torch.empty(bhh_size, **kwargs))
        self.b_xh = nn.parameter.Parameter(torch.empty(bxh_size, **kwargs))
        self.b_hy = nn.parameter.Parameter(torch.empty(bhy_size, **kwargs))

        self.init_parameters()

    def init_parameters(self):
        """
        Initialize the parameters with Kaiming uniform initialization.

        Do NOT modify this method.
        """
        nn.init.kaiming_uniform_(self.W_hh, a=math.sqrt(5))
        fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.W_hh)
        bound = 1 / math.sqrt(fan_in)
        nn.init.uniform_(self.b_hh, -bound, bound)
        nn.init.kaiming_uniform_(self.W_xh, a=math.sqrt(5))
        fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.W_xh)
        bound = 1 / math.sqrt(fan_in)
        nn.init.uniform_(self.b_xh, -bound, bound)
        nn.init.kaiming_uniform_(self.W_hy, a=math.sqrt(5))
        fan_in, _ = nn.init._calculate_fan_in_and_fan_out(self.W_hy)
        bound = 1 / math.sqrt(fan_in)
        nn.init.uniform_(self.b_hy, -bound, bound)

    def forward(self, inputs: torch.Tensor, length: torch.Tensor) -> torch.Tensor:
        """
        Question (c)
        - Randomly initialize h_0 with appropriate shape.
        - Pass a sequence of tokens into the recurrent network.
        - Implement dropout to embedded tokens with the given probability (self.dropout).
          For example, if self.dropout is 0.3, 30% of the embedded tokens will be dropped out.
        - We do not want to use a hidden cell of a zero-padded token for classification!
        - Index the hidden cell of the last valid token (excluding the zero-padding)
          based on the token length of each example in the batch.
        - Do NOT use pre-defined PyTorch layers for this question. (e.g. nn.RNN, nn.Dropout)

        Args:
          inputs: a batch of encoded token sequences with shape (SEQ_LEN, BATCH_SIZE)
          length: a batch of token lengths with shape (BATCH_SIZE)

        Returns:
          Softmax probabilites for each class with shape (BATCH_SIZE, NUM_CLASS)
        """

        ##### YOUR CODE #####
        softmax_probs = None

        return softmax_probs
        #####################

    def compute_loss(
        self,
        prediction: torch.Tensor,
        label: torch.Tensor) -> Tuple[torch.Tensor, int]:
        """
        Question (d)

        - Compute the cross entropy loss and the number of correct predictions
        - Do NOT use loss function in torch.nn library (e.g. nn.CrossEntropyLoss())

        Args:
          prediction: output(softmax probabilities) from self.forward function with shape (BATCH_SIZE, NUM_CLASS)
          label: integer labels of the batch inputs with shape (BATCH_SIZE)

        Returns:
          cross entropy loss of the batch (float tensor) and the number of correct predictions (integer)
        """
        ##### YOUR CODE #####
        loss = None
        correct = None

        return (loss, correct)
        #####################

# Training Modules

In [None]:
class ScheduledOptim():
    """
    Learning rate scheduler.

    Do NOT modify.
    """

    def __init__(self, optimizer, n_warmup_steps, decay_rate):
        self._optimizer = optimizer
        self.n_warmup_steps = n_warmup_steps
        self.decay = decay_rate
        self.n_steps = 0
        self.initial_lr = optimizer.param_groups[0]['lr']
        self.current_lr = optimizer.param_groups[0]['lr']

    def zero_grad(self):
        self._optimizer.zero_grad()

    def step(self):
        self._optimizer.step()

    def get_lr(self):
        return self.current_lr

    def update(self):
        if self.n_steps < self.n_warmup_steps:
            lr = self.n_steps / self.n_warmup_steps * self.initial_lr
        elif self.n_steps == self.n_warmup_steps:
            lr = self.initial_lr
        else:
            lr = self.current_lr * self.decay

        self.current_lr = lr
        for param_group in self._optimizer.param_groups:
            param_group['lr'] = lr

        self.n_steps += 1

# Model Training

In [None]:
"""
Functions for training and evaluating the model.

Question (e)
- Compared to practice 3 (RNN Text Classification) covered in the lab session 3, there has been minor
  modification with scheduler and loss computation. Check what should have been changed, and complete
  the train and evaluate function that works for the current training pipeline
- Use the methods of the ScheduledOptim class above to perform necessary operations on the optimizer.
- Do NOT change the arguments given to the train, evaluate functions.
"""

def train(model, train_loader, scheduler):
    ##### YOUR CODE #####
    model.train()
    train_loss = 0
    correct = 0

    tqdm_bar = tqdm(train_loader)

    for text, label, length in tqdm_bar:
        text = text.to(DEVICE)
        label = label.to(DEVICE)
        length = length.to(DEVICE)

    train_loss /= len(train_loader.dataset)
    train_acc = 100. * correct / len(train_loader.dataset)

    return train_loss, train_acc
    #####################


def evaluate(model, test_loader):
    ##### YOUR CODE #####
    model.eval()
    test_loss = 0
    correct = 0

    tqdm_bar = tqdm(test_loader)

    with torch.no_grad():
        for text, label, length in tqdm_bar:
            text = text.to(DEVICE)
            label = label.to(DEVICE)
            length = length.to(DEVICE)

    test_loss /= len(test_loader.dataset)
    test_acc = 100. * correct / len(test_loader.dataset)

    return test_loss, test_acc
    #####################

In [None]:
"""
Question (f)
- Train your RNN model and obtain the test accuracy of 70%.
- Select the input size, hidden size of your choice
- Try various optimizer type, learning rate and scheduler options for the best performance.
- Visualize your experiments with Tensorboard.
- Your TensorBoard results should include Train/Validation Loss and Accuracy.
"""

##### YOUR CODE #####
writer = SummaryWriter(log_dir="./logs")
EPOCHS = 0
BATCH_SIZE = 0
vocab_size = 0
input_size = 0
hidden_size = 0
num_class = 0
dropout_ratio = 0
learning_rate = 0

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE,
                              shuffle=True, collate_fn=collate_batch)
valid_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE,
                              shuffle=False, collate_fn=collate_batch)

model = None
optimizer = None
scheduler = None

for epoch in range(1, EPOCHS + 1):
    loss_train, accu_train = train(model, train_dataloader, scheduler)
    loss_val, accu_val = evaluate(model, valid_dataloader)
    lr = scheduler.get_lr()
    print('-' * 83)
    print('| end of epoch {:2d} | lr: {:5.4f} | train loss: {:8.3f} | train accuracy: {:8.3f} | '
          'valid accuracy {:8.3f} '.format(epoch, lr, loss_train, accu_train, accu_val))
    print('-' * 83)

writer.flush()
writer.close()
#####################

In [None]:
# (f) Visualize the experimental logs with TensorBoard
# Submission of the visualization result is not required.
%tensorboard --logdir ./logs