In [None]:
import numpy as np
import pandas as pd
import random
import torch
import torch.nn as nn

In [None]:
SEED = 1234

In [None]:
def set_seeds(seed=1234):
    """Set seeds for reproducibility."""
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed) # multi-GPU

In [None]:
# Set seeds for reproducibility
set_seeds(seed=SEED)

In [None]:
# Set device
cuda = True
device = torch.device('cuda' if (
    torch.cuda.is_available() and cuda) else 'cpu')
torch.set_default_tensor_type('torch.FloatTensor')
if device.type == 'cuda':
    torch.set_default_tensor_type('torch.cuda.FloatTensor')
print (device)

cpu


In [None]:
# Load data
url = "https://raw.githubusercontent.com/GokuMohandas/MadeWithML/main/datasets/news.csv"
df = pd.read_csv(url, header=0) # load
df = df.sample(frac=1).reset_index(drop=True) # shuffle
df.head()

Unnamed: 0,title,category
0,Sharon Accepts Plan to Reduce Gaza Army Operat...,World
1,Internet Key Battleground in Wildlife Crime Fight,Sci/Tech
2,July Durable Good Orders Rise 1.7 Percent,Business
3,Growing Signs of a Slowing on Wall Street,Business
4,The New Faces of Reality TV,World


In [None]:
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
import re

In [None]:
nltk.download('stopwords')
STOPWORDS = stopwords.words('english')
print (STOPWORDS[:5])
porter = PorterStemmer()

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
['i', 'me', 'my', 'myself', 'we']


In [None]:
def preprocess(text, stopwords=STOPWORDS):
    """Conditional preprocessing on our text unique to our task."""
    # Lower
    text = text.lower()

    # Remove stopwords
    pattern = re.compile(r'\b(' + r'|'.join(stopwords) + r')\b\s*')
    text = pattern.sub('', text)

    # Remove words in paranthesis
    text = re.sub(r'\([^)]*\)', '', text)

    # Spacing and filters
    text = re.sub(r"([-;;.,!?<=>])", r" \1 ", text)
    text = re.sub('[^A-Za-z0-9]+', ' ', text) # remove non alphanumeric chars
    text = re.sub(' +', ' ', text)  # remove multiple spaces
    text = text.strip()

    return text

In [None]:
# Sample
text = "Great week for the NYSE!"
preprocess(text=text)

'great week nyse'

In [None]:
# Apply to dataframe
preprocessed_df = df.copy()
preprocessed_df.title = preprocessed_df.title.apply(preprocess)
print (f"{df.title.values[0]}\n\n{preprocessed_df.title.values[0]}")

Sharon Accepts Plan to Reduce Gaza Army Operation, Haaretz Says

sharon accepts plan reduce gaza army operation haaretz says


In [None]:
import collections
from sklearn.model_selection import train_test_split

In [None]:
TRAIN_SIZE = 0.7
VAL_SIZE = 0.15
TEST_SIZE = 0.15

In [None]:
def train_val_test_split(X, y, train_size):
    """Split dataset into data splits."""
    X_train, X_, y_train, y_ = train_test_split(X, y, train_size=TRAIN_SIZE, stratify=y)
    X_val, X_test, y_val, y_test = train_test_split(X_, y_, train_size=0.5, stratify=y_)
    return X_train, X_val, X_test, y_train, y_val, y_test

In [None]:
# Data
X = preprocessed_df["title"].values
y = preprocessed_df["category"].values

In [None]:
# Create data splits
X_train, X_val, X_test, y_train, y_val, y_test = train_val_test_split(
    X=X, y=y, train_size=TRAIN_SIZE)
print (f"X_train: {X_train.shape}, y_train: {y_train.shape}")
print (f"X_val: {X_val.shape}, y_val: {y_val.shape}")
print (f"X_test: {X_test.shape}, y_test: {y_test.shape}")
print (f"Sample point: {X_train[0]} → {y_train[0]}")

X_train: (84000,), y_train: (84000,)
X_val: (18000,), y_val: (18000,)
X_test: (18000,), y_test: (18000,)
Sample point: extinct humans left louse legacy → Sci/Tech


In [None]:
import itertools

In [None]:
class LabelEncoder(object):
    def __init__(self, class_to_index={}):
        self.class_to_index = class_to_index
        self.index_to_class = {v:k for k,v in self.class_to_index.items()}
        self.classes = list(self.class_to_index.keys())

    def __len__(self):
        return len(self.class_to_index)

    def __str__(self):
        return f"<LabelEncoder(num_classes={len(self)})>"

    def fit(self, y):
        classes = np.unique(y)

        for i, class_ in enumerate(classes):
            self.class_to_index[class_] = i
        self.index_to_class = {v:k for k, v in self.class_to_index.items()}
        self.classes = list(self.class_to_index.keys())

        return self
    
    def encode(self, y):
        encoded = np.zeros(len(y), dtype=int)
        for i, item in enumerate(y):
            encoded[i] = self.class_to_index[item]
        return encoded

    def decode(self, y):
        classes = []
        for i, item in enumerate(y):
            classes.append(self.index_to_class[item])
        return classes

    def save(self, fp):
        with open(fp, 'w') as fp:
            contents = {'class_to_index': self.class_to_index}
            json.dump(contents, fp, indent=4, sort_keys=False)

    @classmethod
    def load(cls, fp):
        with open(fp, 'r') as fp:
            kwargs = json.load(fp=fp)
        return cls(**kwargs)

In [None]:
# Encode
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
NUM_CLASSES = len(label_encoder)
label_encoder.class_to_index

{'Business': 0, 'Sci/Tech': 1, 'Sports': 2, 'World': 3}

In [None]:
# Convert labels to tokens
print (f"y_train[0]: {y_train[0]}")
y_train = label_encoder.encode(y_train)
y_val = label_encoder.encode(y_val)
y_test = label_encoder.encode(y_test)
print (f"y_train[0]: {y_train[0]}")

y_train[0]: Sci/Tech
y_train[0]: 1


In [None]:
# Class weights
counts = np.bincount(y_train)
class_weights = {i: 1.0/count for i, count in enumerate(counts)}
print (f"counts: {counts}\nweights: {class_weights}")

counts: [21000 21000 21000 21000]
weights: {0: 4.761904761904762e-05, 1: 4.761904761904762e-05, 2: 4.761904761904762e-05, 3: 4.761904761904762e-05}


In [None]:
import json
from collections import Counter
from more_itertools import take

In [None]:
class Tokenizer(object):
    def __init__(self, char_level, num_tokens=None, pad_token='<PAD>', 
                 oov_token='<UNK>', token_to_index=None):
        self.char_level = char_level
        self.separator = '' if self.char_level else ' '
        if num_tokens: num_tokens -= 2
        self.num_tokens = num_tokens
        self.pad_token = pad_token
        self.oov_token = oov_token
        if not token_to_index:
            token_to_index = {pad_token: 0, oov_token: 1}
        self.token_to_index = token_to_index
        self.index_to_token = {v:k for k, v in self.token_to_index.items()}

    def __len__(self):
        return len(self.token_to_index)

    def __str__(self):
        return f"<Tokenizer(num_tokens={len(self)})>"

    def fit_on_texts(self, texts):
        if not self.char_level:
            texts = [text.split(" ") for text in texts]
        all_tokens = [token for text in texts for token in text]
        counts = Counter(all_tokens).most_common(self.num_tokens)
        self.min_token_freq = counts[-1][-1]
        for token, count in counts:
            index = len(self)
            self.token_to_index[token] = index
            self.index_to_token[index] = token

        return self

    def texts_to_sequences(self, texts):
        sequences = []
        for text in texts:
            if not self.char_level:
                text = text.split(' ')
            sequence = []
            for token in text:
                sequence.append(self.token_to_index.get(
                    token, self.token_to_index[self.oov_token]
                ))
            sequences.append(np.asarray(sequence))

        return sequences

    def sequences_to_texts(self, sequences):
        texts = []
        for sequence in sequences:
            text = []
            for index in sequence:
                text.append(self.index_to_token.get(
                    index, self.oov_token
                ))
            texts.append(self.separator.join([token for token in text]))
        return texts

    def save(self, fp):
        with open(fp, 'w') as fp:
            contents = {
                'char_level': self.char_level,
                'oov_token': self.oov_token,
                'token_to_index': self.token_to_index
            }
            json.dump(contents, fp, indent=4, sort_keys=False)
    
    @classmethod
    def load(cls, fp):
        with open(fp, 'r') as fp:
            kwargs = json.load(fp=fp)
        return cls(**kwargs)

In [None]:
# Tokenize
tokenizer = Tokenizer(char_level=False, num_tokens=5000)
tokenizer.fit_on_texts(texts=X_train)
VOCAB_SIZE = len(tokenizer)
print (tokenizer)

<Tokenizer(num_tokens=5000)>


In [None]:
# Sample of tokens
print (take(5, tokenizer.token_to_index.items()))
print (f"least freq token's freq: {tokenizer.min_token_freq}") # use this to adjust num_tokens

[('<PAD>', 0), ('<UNK>', 1), ('39', 2), ('b', 3), ('gt', 4)]
least freq token's freq: 14


In [None]:
# Convert texts to sequences of indices
X_train = tokenizer.texts_to_sequences(X_train)
X_val = tokenizer.texts_to_sequences(X_val)
X_test = tokenizer.texts_to_sequences(X_test)
preprocessed_text = tokenizer.sequences_to_texts([X_train[0]])[0]
print ("Text to indices:\n"
    f"  (preprocessed) → {preprocessed_text}\n"
    f"  (tokenized) → {X_train[0]}")

Text to indices:
  (preprocessed) → <UNK> humans left <UNK> legacy
  (tokenized) → [   1 2038  963    1 3257]


In [None]:
def pad_sequences(sequences, max_seq_len=0):
    """Pad sequences to max length in sequence."""
    max_seq_len = max(max_seq_len, max(len(sequence) for sequence in sequences))
    padded_sequences = np.zeros((len(sequences), max_seq_len))
    for i, sequence in enumerate(sequences):
        padded_sequences[i][:len(sequence)] = sequence
    return padded_sequences

In [None]:
# 2D sequences
padded = pad_sequences(X_train[0:3])
print (padded.shape)
print (padded)

(3, 7)
[[1.000e+00 2.038e+03 9.630e+02 1.000e+00 3.257e+03 0.000e+00 0.000e+00]
 [1.313e+03 2.680e+02 3.670e+02 9.860e+02 1.000e+00 2.680e+02 0.000e+00]
 [4.490e+02 7.000e+01 9.640e+02 1.000e+00 5.000e+01 1.341e+03 1.678e+03]]


[array([   1, 2038,  963,    1, 3257]),
 array([1313,  268,  367,  986,    1,  268]),
 array([ 449,   70,  964,    1,   50, 1341, 1678])]

In [None]:
FILTER_SIZES = list(range(1, 4)) # uni, bi and tri grams

In [None]:
class Dataset(torch.utils.data.Dataset):
    def __init__(self, X, y, max_filter_size):
        self.X = X
        self.y = y
        self.max_filter_size = max_filter_size

    def __len__(self):
        return len(self.y)

    def __str__(self):
        return f"<Dataset(N={len(self.y)})>"

    def __getitem__(self, index):
        X = self.X[index]
        y = self.y[index]

        return [X, len(X), y]

    def collate_fn(self, batch):
        batch = np.array(batch, dtype=object)
        X = batch[:, 0]
        seq_lens = batch[:, 1]
        y = np.stack(batch[:, 2], axis=0)

        X = pad_sequences(sequences=X)

        X = torch.LongTensor(X.astype(np.int32))
        seq_lens = torch.LongTensor(seq_lens.astype(np.int32))
        y = torch.LongTensor(y.astype(np.int32))

        return X, seq_lens, y

    def create_dataloader(self, batch_size, shuffle=False, drop_last=False):
        return torch.utils.data.DataLoader(
            dataset=self, batch_size=batch_size, collate_fn=self.collate_fn,
            shuffle=shuffle, drop_last=drop_last, pin_memory=True)

In [None]:
# Create datasets
max_filter_size = max(FILTER_SIZES)
train_dataset = Dataset(X=X_train, y=y_train, max_filter_size=max_filter_size)
val_dataset = Dataset(X=X_val, y=y_val, max_filter_size=max_filter_size)
test_dataset = Dataset(X=X_test, y=y_test, max_filter_size=max_filter_size)
print ("Datasets:\n"
    f"  Train dataset:{train_dataset.__str__()}\n"
    f"  Val dataset: {val_dataset.__str__()}\n"
    f"  Test dataset: {test_dataset.__str__()}\n"
    "Sample point:\n"
    f"  X: {train_dataset[0][0]}\n"
    f"  seq_len: {train_dataset[0][1]}\n"
    f"  y: {train_dataset[0][2]}")

Datasets:
  Train dataset:<Dataset(N=84000)>
  Val dataset: <Dataset(N=18000)>
  Test dataset: <Dataset(N=18000)>
Sample point:
  X: [   1 2038  963    1 3257]
  seq_len: 5
  y: 1


In [None]:
# Create dataloaders
batch_size = 64
train_dataloader = train_dataset.create_dataloader(
    batch_size=batch_size)
val_dataloader = val_dataset.create_dataloader(
    batch_size=batch_size)
test_dataloader = test_dataset.create_dataloader(
    batch_size=batch_size)
batch_X, batch_seq_lens, batch_y = next(iter(train_dataloader))
print ("Sample batch:\n"
    f"  X: {list(batch_X.size())}\n"
    f"  seq_lens: {list(batch_seq_lens.size())}\n"
    f"  y: {list(batch_y.size())}\n"
    "Sample point:\n"
    f"  X: {batch_X[0]}\n"
    f" seq_len: {batch_seq_lens[0]}\n"
    f"  y: {batch_y[0]}")

Sample batch:
  X: [64, 11]
  seq_lens: [64]
  y: [64]
Sample point:
  X: tensor([   1, 2038,  963,    1, 3257,    0,    0,    0,    0,    0,    0])
 seq_len: 5
  y: 1


In [None]:
import sys

In [None]:
class Trainer(object):
    def __init__(self, model, device, loss_fn=None, optimizer=None, scheduler=None):

        # Set params
        self.model = model
        self.device = device
        self.loss_fn = loss_fn
        self.optimizer = optimizer
        self.scheduler = scheduler

    def train_step(self, dataloader):
        """Train step."""
        # Set model to train mode
        self.model.train()
        loss = 0.0

        # Iterate over train batches
        for i, batch in enumerate(dataloader):

            # Step
            batch = [item.to(self.device) for item in batch]  # Set device
            inputs, targets = batch[:-1], batch[-1]
            self.optimizer.zero_grad()  # Reset gradients
            z = self.model(inputs)  # Forward pass
            J = self.loss_fn(z, targets)  # Define loss
            J.backward()  # Backward pass
            self.optimizer.step()  # Update weights

            # Cumulative Metrics
            loss += (J.detach().item() - loss) / (i + 1)

        return loss

    def eval_step(self, dataloader):
        """Validation or test step."""
        # Set model to eval mode
        self.model.eval()
        loss = 0.0
        y_trues, y_probs = [], []

        # Iterate over val batches
        with torch.no_grad():
            for i, batch in enumerate(dataloader):

                # Step
                batch = [item.to(self.device) for item in batch]  # Set device
                inputs, y_true = batch[:-1], batch[-1]
                z = self.model(inputs)  # Forward pass
                J = self.loss_fn(z, y_true).item()

                # Cumulative Metrics
                loss += (J - loss) / (i + 1)

                # Store outputs
                y_prob = torch.sigmoid(z).cpu().numpy()
                y_probs.extend(y_prob)
                y_trues.extend(y_true.cpu().numpy())

        return loss, np.vstack(y_trues), np.vstack(y_probs)

    def predict_step(self, dataloader):
        """Prediction step."""
        # Set model to eval mode
        self.model.eval()
        y_probs = []

        # Iterate over val batches
        with torch.no_grad():
            for i, batch in enumerate(dataloader):

                # Forward pass w/ inputs
                inputs, targets = batch[:-1], batch[-1]
                y_prob = self.model(inputs, apply_softmax=True)

                # Store outputs
                y_probs.extend(y_prob)

        return np.vstack(y_probs)

    def train(self, num_epochs, patience, train_dataloader, val_dataloader):
        best_val_loss = np.inf
        for epoch in range(num_epochs):
            # Steps
            train_loss = self.train_step(dataloader=train_dataloader)
            val_loss, _, _ = self.eval_step(dataloader=val_dataloader)
            self.scheduler.step(val_loss)

            # Early stopping
            if val_loss < best_val_loss:
                best_val_loss = val_loss
                best_model = self.model
                _patience = patience  # reset _patience
            else:
                _patience -= 1
            if not _patience:  # 0
                print("Stopping early!")
                break

            # Logging
            print(
                f"Epoch: {epoch+1} | "
                f"train_loss: {train_loss:.5f}, "
                f"val_loss: {val_loss:.5f}, "
                f"lr: {self.optimizer.param_groups[0]['lr']:.2E}, "
                f"_patience: {_patience}"
            )
        return best_model

In [None]:
BATCH_SIZE = 64
EMBEDDING_DIM = 100

In [None]:
# Input
sequence_size = 8 # words per input
x = torch.rand((BATCH_SIZE, sequence_size, EMBEDDING_DIM))
seq_lens = torch.randint(high=sequence_size, size=(1, BATCH_SIZE))
print (x.shape)
print (seq_lens.shape)

torch.Size([64, 8, 100])
torch.Size([1, 64])


In [None]:
RNN_HIDDEN_DIM = 128
DROPOUT_P = 0.1
RNN_DROPOUT_P = 0.1

In [None]:
# Initialize hidden state
hidden_t = torch.zeros((BATCH_SIZE, RNN_HIDDEN_DIM))
print (hidden_t.size())

torch.Size([64, 128])


In [None]:
# Initialize RNN cell
rnn_cell = nn.RNNCell(EMBEDDING_DIM, RNN_HIDDEN_DIM)
print (rnn_cell)

RNNCell(100, 128)


In [None]:
# Forward pass through RNN
x = x.permute(1, 0, 2) # RNN needs batch_size to be at dim 1

# Loop through the inputs time steps
hiddens = []
for t in range(sequence_size):
    hidden_t = rnn_cell(x[t], hidden_t)
    hiddens.append(hidden_t)
hiddens = torch.stack(hiddens)
hiddens = hiddens.permute(1, 0, 2) # bring batch_size back to dim 0
print (hiddens.size())

torch.Size([64, 8, 128])


In [None]:
# We also could've used a more abstracted layer
x = torch.rand((BATCH_SIZE, sequence_size, EMBEDDING_DIM))
rnn = nn.RNN(EMBEDDING_DIM, RNN_HIDDEN_DIM, batch_first=True)
out, h_n = rnn(x) # h_n is the last hidden state
print ("out: ", out.shape)
print ("h_n: ", h_n.shape)

out:  torch.Size([64, 8, 128])
h_n:  torch.Size([1, 64, 128])


In [None]:
# The same tensors
print (out[:,-1,:])
print (h_n.squeeze(0))

tensor([[-0.5106,  0.1550, -0.1367,  ..., -0.2666, -0.1516, -0.1995],
        [-0.3352,  0.0989, -0.1955,  ..., -0.2617, -0.4543, -0.1326],
        [-0.6425,  0.2002, -0.1804,  ..., -0.3815, -0.4300, -0.2057],
        ...,
        [-0.4790,  0.1860,  0.1568,  ..., -0.4702, -0.3543, -0.2998],
        [-0.5783,  0.3212, -0.1418,  ...,  0.0799, -0.1650, -0.0704],
        [-0.4951,  0.4392, -0.4269,  ..., -0.2001, -0.3974, -0.1463]],
       grad_fn=<SliceBackward>)
tensor([[-0.5106,  0.1550, -0.1367,  ..., -0.2666, -0.1516, -0.1995],
        [-0.3352,  0.0989, -0.1955,  ..., -0.2617, -0.4543, -0.1326],
        [-0.6425,  0.2002, -0.1804,  ..., -0.3815, -0.4300, -0.2057],
        ...,
        [-0.4790,  0.1860,  0.1568,  ..., -0.4702, -0.3543, -0.2998],
        [-0.5783,  0.3212, -0.1418,  ...,  0.0799, -0.1650, -0.0704],
        [-0.4951,  0.4392, -0.4269,  ..., -0.2001, -0.3974, -0.1463]],
       grad_fn=<SqueezeBackward1>)


In [None]:
def gather_last_relevant_hidden(hiddens, seq_lens):
    """Extract and collect the last relevant
    hidden state based on the sequence length."""
    print(seq_lens)
    seq_lens = seq_lens.long().detach().cpu().numpy() - 1
    out = []
    for batch_index, column_index in enumerate(seq_lens):
        print(f"column_index:{column_index}")
        print(f"batch_index:{batch_index}")
        out.append(hiddens[batch_index, column_index])
    return torch.stack(out)

In [None]:
# Get the last relevant hidden state
gather_last_relevant_hidden(hiddens=out, seq_lens=seq_lens).squeeze(0).shape

tensor([[1, 4, 1, 6, 1, 0, 0, 6, 6, 1, 1, 6, 5, 2, 2, 2, 2, 0, 0, 3, 3, 4, 4, 0,
         3, 0, 4, 2, 3, 4, 4, 7, 4, 3, 1, 4, 1, 0, 3, 7, 1, 6, 5, 7, 4, 6, 5, 4,
         0, 3, 2, 2, 2, 1, 1, 4, 0, 3, 3, 5, 5, 0, 4, 1]])
column_index:[ 0  3  0  5  0 -1 -1  5  5  0  0  5  4  1  1  1  1 -1 -1  2  2  3  3 -1
  2 -1  3  1  2  3  3  6  3  2  0  3  0 -1  2  6  0  5  4  6  3  5  4  3
 -1  2  1  1  1  0  0  3 -1  2  2  4  4 -1  3  0]
batch_index:0


torch.Size([64, 256])

In [None]:
import torch.nn.functional as F

In [None]:
HIDDEN_DIM = 100

In [None]:
class RNN(nn.Module):
    def __init__(self, embedding_dim, vocab_size, rnn_hidden_dim,
                 hidden_dim, dropout_p, num_classes, padding_idx=0):
        super(RNN, self).__init__()

        # Initialize embeddings
        self.embeddings = nn.Embedding(
            embedding_dim=embedding_dim, num_embeddings=vocab_size,
            padding_idx=padding_idx)

        # RNN
        self.rnn = nn.RNN(embedding_dim, rnn_hidden_dim, batch_first=True)

        # FC weights
        self.dropout = nn.Dropout(dropout_p)
        self.fc1 = nn.Linear(rnn_hidden_dim, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, num_classes)

    def forward(self, inputs, apply_softmax=False):
        # Embed
        x_in, seq_lens = inputs
        x_in = self.embeddings(x_in)

        # Rnn outputs
        out, h_n = self.rnn(x_in)
        z = gather_last_relevant_hidden(hiddens=out, seq_lens=seq_lens)
        print(z)
        print(h_n)
        sys.exit(0)

        # FC layers
        z = self.fc1(z)
        z = self.dropout(z)
        y_pred = self.fc2(z)

        if apply_softmax:
            y_pred = F.softmax(y_pred, dim=1)
        return y_pred

In [None]:
# Simple RNN cell
model = RNN(
    embedding_dim=EMBEDDING_DIM, vocab_size=VOCAB_SIZE,
    rnn_hidden_dim=RNN_HIDDEN_DIM, hidden_dim=HIDDEN_DIM,
    dropout_p=DROPOUT_P, num_classes=NUM_CLASSES)
model = model.to(device) # set device
print (model.named_parameters)

<bound method Module.named_parameters of RNN(
  (embeddings): Embedding(5000, 100, padding_idx=0)
  (rnn): RNN(100, 128, batch_first=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (fc1): Linear(in_features=128, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=4, bias=True)
)>


In [None]:
from torch.optim import Adam

In [None]:
NUM_LAYERS = 1
LEARNING_RATE = 1e-4
PATIENCE = 10
NUM_EPOCHS = 1

In [None]:
# Define Loss
class_weights_tensor = torch.Tensor(list(class_weights.values())).to(device)
loss_fn = nn.CrossEntropyLoss(weight=class_weights_tensor)

In [None]:
# Define optimizer & scheduler
optimizer = Adam(model.parameters(), lr=LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.1, patience=3)

In [None]:
# Trainer module
trainer = Trainer(
    model=model, device=device, loss_fn=loss_fn,
    optimizer=optimizer, scheduler=scheduler)

In [None]:
# Train
best_model = trainer.train(
    NUM_EPOCHS, PATIENCE, train_dataloader, val_dataloader)

tensor([[-0.6144,  0.1413,  0.2809,  ...,  0.4665,  0.5974,  0.2586],
        [-0.4076,  0.2020, -0.0709,  ...,  0.4942, -0.4969,  0.1077],
        [ 0.1707,  0.6015,  0.0212,  ..., -0.1275,  0.3425, -0.1844],
        ...,
        [ 0.1270,  0.6666,  0.8198,  ...,  0.1013, -0.1388,  0.6960],
        [ 0.1975, -0.4622, -0.6726,  ...,  0.1162, -0.5428, -0.8140],
        [-0.3054,  0.2000, -0.8098,  ...,  0.7265, -0.1572, -0.6151]],
       grad_fn=<StackBackward>)
tensor([[[-1.3210e-01,  1.5695e-01, -2.5950e-02,  ...,  9.4291e-02,
          -1.2255e-01, -1.6328e-02],
         [-1.0918e-01,  1.2790e-01, -6.0527e-02,  ...,  8.4810e-02,
          -1.4664e-01, -4.0609e-03],
         [-1.8552e-02,  7.4403e-02, -6.6307e-02,  ...,  2.1516e-01,
          -1.7411e-01,  1.2210e-02],
         ...,
         [-1.2856e-01,  1.2487e-01, -1.1161e-02,  ...,  5.1288e-02,
          -1.7624e-01, -5.5097e-06],
         [-1.1352e-01,  1.0907e-01, -6.2950e-02,  ...,  1.1151e-01,
          -9.5556e-02,  1.3124e-

SystemExit: ignored

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
import json
from sklearn.metrics import precision_recall_fscore_support

In [None]:
def get_metrics(y_true, y_pred, classes):
    """Per-class performance metrics."""
    # Performance
    performance = {"overall": {}, "class": {}}

    # Overall performance
    metrics = precision_recall_fscore_support(y_true, y_pred, average="weighted")
    performance["overall"]["precision"] = metrics[0]
    performance["overall"]["recall"] = metrics[1]
    performance["overall"]["f1"] = metrics[2]
    performance["overall"]["num_samples"] = np.float64(len(y_true))

    # Per-class performance
    metrics = precision_recall_fscore_support(y_true, y_pred, average=None)
    for i in range(len(classes)):
        performance["class"][classes[i]] = {
            "precision": metrics[0][i],
            "recall": metrics[1][i],
            "f1": metrics[2][i],
            "num_samples": np.float64(metrics[3][i]),
        }

    return performance

In [None]:
# Get predictions
test_loss, y_true, y_prob = trainer.eval_step(dataloader=test_dataloader)
y_pred = np.argmax(y_prob, axis=1)

In [None]:
# Determine performance
performance = get_metrics(
    y_true=y_test, y_pred=y_pred, classes=label_encoder.classes)
print (json.dumps(performance['overall'], indent=2))

{
  "precision": 0.535014901521111,
  "recall": 0.5327222222222222,
  "f1": 0.5318451846429071,
  "num_samples": 18000.0
}


In [None]:
# Input
sequence_size = 8 # words per input
x = torch.rand((BATCH_SIZE, sequence_size, EMBEDDING_DIM))
print (x.shape)

torch.Size([64, 8, 100])


In [None]:
# GRU
gru = nn.GRU(input_size=EMBEDDING_DIM, hidden_size=RNN_HIDDEN_DIM, batch_first=True)

In [None]:
# Forward pass
out, h_n = gru(x)
print (f"out: {out.shape}")
print (f"h_n: {h_n.shape}")

out: torch.Size([64, 8, 128])
h_n: torch.Size([1, 64, 128])


In [None]:
# GRU
gru = nn.GRU(input_size=EMBEDDING_DIM, hidden_size=RNN_HIDDEN_DIM,
             batch_first=True, bidirectional=True)

In [None]:
# Forward pass
out, h_n = gru(x)
print (f"out: {out.shape}")
print (f"h_n: {h_n.shape}")

out: torch.Size([64, 8, 256])
h_n: torch.Size([2, 64, 128])


In [None]:
class GRU(nn.Module):
    def __init__(self, embedding_dim, vocab_size, rnn_hidden_dim,
                 hidden_dim, dropout_p, num_classes, padding_idx=0):
        super(GRU, self).__init__()

        # Initialize embeddings
        self.embeddings = nn.Embedding(embedding_dim=embedding_dim,
                                       num_embeddings=vocab_size,
                                       padding_idx=padding_idx)

        # RNN
        self.rnn = nn.GRU(embedding_dim, rnn_hidden_dim,
                          batch_first=True, bidirectional=True)

        # FC weights
        self.dropout = nn.Dropout(dropout_p)
        self.fc1 = nn.Linear(rnn_hidden_dim*2, hidden_dim)
        self.fc2 = nn.Linear(hidden_dim, num_classes)

    def forward(self, inputs, apply_softmax=False):
        # Embed
        x_in, seq_lens = inputs
        x_in = self.embeddings(x_in)

        # Rnn outputs
        out, h_n = self.rnn(x_in)
        z = gather_last_relevant_hidden(hiddens=out, seq_lens=seq_lens)
        print(z)
        print(h_n)
        sys.exit(0)
        # FC layers
        z = self.fc1(z)
        z = self.dropout(z)
        y_pred = self.fc2(z)

        if apply_softmax:
            y_pred = F.softmax(y_pred, dim=1)
        return y_pred

In [None]:
# Simple RNN cell
model = GRU(
    embedding_dim=EMBEDDING_DIM, vocab_size=VOCAB_SIZE,
    rnn_hidden_dim=RNN_HIDDEN_DIM, hidden_dim=HIDDEN_DIM,
    dropout_p=DROPOUT_P, num_classes=NUM_CLASSES)
model = model.to(device) # set device
print (model.named_parameters)

<bound method Module.named_parameters of GRU(
  (embeddings): Embedding(5000, 100, padding_idx=0)
  (rnn): GRU(100, 128, batch_first=True, bidirectional=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (fc1): Linear(in_features=256, out_features=100, bias=True)
  (fc2): Linear(in_features=100, out_features=4, bias=True)
)>


In [None]:
# Define Loss
class_weights_tensor = torch.Tensor(list(class_weights.values())).to(device)
loss = nn.CrossEntropyLoss(weight=class_weights_tensor)

In [None]:
# Define optimizer & scheduler
optimizer = Adam(model.parameters(), lr=LEARNING_RATE)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
    optimizer, mode='min', factor=0.1, patience=3)

In [None]:
# Trainer module
trainer = Trainer(
    model=model, device=device, loss_fn=loss,
    optimizer=optimizer, scheduler=scheduler)

In [None]:
# Train
best_model = trainer.train(
    NUM_EPOCHS, PATIENCE, train_dataloader, val_dataloader)

tensor([[-0.1076,  0.0203,  0.4070,  ..., -0.2434,  0.4046, -0.2906],
        [-0.0482, -0.3683,  0.0157,  ..., -0.1171,  0.0091, -0.1434],
        [-0.0730, -0.1345,  0.5942,  ..., -0.2555,  0.0346, -0.1389],
        ...,
        [ 0.2085,  0.0236,  0.2493,  ...,  0.1295, -0.0073, -0.3213],
        [-0.1409,  0.2499,  0.3724,  ..., -0.0078, -0.0047, -0.1569],
        [ 0.2241, -0.2549,  0.4227,  ...,  0.0763,  0.2559, -0.0822]],
       grad_fn=<StackBackward>)
tensor([[[-0.0306, -0.0049, -0.0881,  ..., -0.0214, -0.1131,  0.1141],
         [-0.0378, -0.0281, -0.1002,  ..., -0.0591, -0.0940,  0.0512],
         [-0.0304, -0.0214, -0.0334,  ..., -0.0550, -0.0926,  0.0420],
         ...,
         [-0.0232,  0.0061, -0.0393,  ..., -0.0387, -0.1295,  0.0571],
         [-0.0530, -0.0171, -0.0845,  ..., -0.0426, -0.1165,  0.0976],
         [-0.0521, -0.0198, -0.1019,  ..., -0.0458, -0.0954,  0.0683]],

        [[-0.1302, -0.4508, -0.1086,  ..., -0.0669,  0.0109, -0.0557],
         [ 0.2096,  0

SystemExit: ignored

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
#Testing cells

In [None]:
import torch
import numpy as np
import pandas as pd
import torch.nn as nn

In [None]:
def gather_last_relevant_hidden(hiddens, seq_lens):
    """Extract and collect the last relevant
    hidden state based on the sequence length."""
    seq_lens = seq_lens.long().detach().cpu().numpy() - 1
    out = []
    for batch_index, column_index in enumerate(seq_lens):
        out.append(hiddens[batch_index, column_index])
    return torch.stack(out)

In [None]:
BATCH_SIZE = 2
EMBEDDING_DIM = 4

In [None]:
# Input
sequence_size = 5 # words per input
x = torch.rand((BATCH_SIZE, sequence_size, EMBEDDING_DIM))
seq_lens = torch.randint(high=sequence_size, size=(1, BATCH_SIZE))
print (x.shape)
print (seq_lens.shape)

torch.Size([2, 5, 4])
torch.Size([1, 2])


In [None]:
seq_lens

tensor([[1, 2]])

In [None]:
x

tensor([[[0.3730, 0.8467, 0.4450, 0.9095],
         [0.0148, 0.7017, 0.3694, 0.6783],
         [0.5956, 0.6080, 0.3029, 0.9555],
         [0.7813, 0.1294, 0.5745, 0.1748],
         [0.5227, 0.9340, 0.7143, 0.7911]],

        [[0.5970, 0.3269, 0.2179, 0.8938],
         [0.0399, 0.7193, 0.7832, 0.2968],
         [0.1224, 0.7740, 0.2536, 0.1784],
         [0.0558, 0.6388, 0.8998, 0.9598],
         [0.2116, 0.9886, 0.9557, 0.8035]]])

In [None]:
RNN_HIDDEN_DIM = 5
DROPOUT_P = 0.1
RNN_DROPOUT_P = 0.1

In [None]:
# Initialize hidden state
hidden_t = torch.zeros((BATCH_SIZE, RNN_HIDDEN_DIM))
print (hidden_t.size())

torch.Size([2, 5])


In [None]:
# We also could've used a more abstracted layer
# x = torch.rand((BATCH_SIZE, sequence_size, EMBEDDING_DIM))
rnn = nn.RNN(EMBEDDING_DIM, RNN_HIDDEN_DIM, batch_first=True)
out, h_n = rnn(x) # h_n is the last hidden state
print ("out: ", out.shape)
print ("h_n: ", h_n.shape)

out:  torch.Size([2, 5, 5])
h_n:  torch.Size([1, 2, 5])


In [None]:
# Get the last relevant hidden state
z = gather_last_relevant_hidden(hiddens=out, seq_lens=seq_lens)

In [None]:
z

tensor([[[-0.4699, -0.2811,  0.3740, -0.1991, -0.4477],
         [-0.4313, -0.1389,  0.4519,  0.0634, -0.3792]]],
       grad_fn=<StackBackward>)

In [None]:
out

tensor([[[-0.4699, -0.2811,  0.3740, -0.1991, -0.4477],
         [-0.4313, -0.1389,  0.4519,  0.0634, -0.3792],
         [-0.3287, -0.2803,  0.5780, -0.0395, -0.2967],
         [-0.3167, -0.3641,  0.6104, -0.5065, -0.2157],
         [-0.4896, -0.3371,  0.5245, -0.2444, -0.6252]],

        [[-0.3406, -0.3013,  0.5843, -0.3599, -0.2522],
         [-0.4481, -0.2736,  0.3602, -0.2044, -0.5484],
         [-0.5782, -0.0377,  0.3846, -0.0162, -0.2145],
         [-0.1840, -0.4854,  0.2840,  0.0108, -0.5652],
         [-0.5505, -0.2334,  0.4232, -0.1274, -0.5333]]],
       grad_fn=<TransposeBackward1>)

In [None]:
h_n

tensor([[[-0.4896, -0.3371,  0.5245, -0.2444, -0.6252],
         [-0.5505, -0.2334,  0.4232, -0.1274, -0.5333]]],
       grad_fn=<StackBackward>)

In [None]:
z.shape

torch.Size([1, 4, 8])

In [None]:
h_n

tensor([[[-0.3996, -0.4355,  0.0680,  0.2319, -0.6096, -0.3701,  0.0888,
          -0.1354],
         [-0.6693, -0.5435,  0.2675,  0.0390, -0.3204, -0.2002,  0.4912,
          -0.0896],
         [-0.5305, -0.5099,  0.2141,  0.1776, -0.5199, -0.2458,  0.3208,
          -0.0570],
         [-0.3000, -0.5704,  0.1442, -0.0658, -0.5751, -0.5448,  0.2178,
           0.2336]]], grad_fn=<StackBackward>)

In [None]:
h_n.shape

torch.Size([1, 4, 8])