# Embedding Layers in RNN

In [18]:
#import libraries
import torch
import torch.nn as nn
from torch.autograd import Variable

#Recurrent, Embedding & Dropout Layers
inputs = [[1, 2, 3], [1, 0, 4], [1, 2, 4], [1, 4, 0], [1, 3, 3]]
x = Variable(torch.LongTensor(inputs))
embedding = nn.Embedding(num_embeddings=5, embedding_dim=20, padding_idx=1)
drop = nn.Dropout(p=0.5)
gru = nn.GRU(input_size=20, hidden_size=50, num_layers=2, batch_first=True,bidirectional=True, dropout=0.3)


In [19]:
emb = drop(embedding(x))
gru_h, gru_h_t = gru(emb)

print('Embedding size : ', emb.size())
print('GRU hidden states size : ', gru_h.size())
print('GRU last hidden state size : ', gru_h_t.size())

Embedding size :  torch.Size([5, 3, 20])
GRU hidden states size :  torch.Size([5, 3, 100])
GRU last hidden state size :  torch.Size([4, 5, 50])


In [20]:
emb

tensor([[[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
           0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
           0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
         [-0.0000, -0.0000, -0.0000, -0.1707,  2.6281, -2.1161, -0.0000,
           0.0000, -1.9245,  0.0000, -0.0000, -0.0000,  0.0000,  0.0000,
           0.8809, -4.3065,  0.0000, -0.3075,  0.0000, -0.0000],
         [ 0.0000, -0.4744, -0.0000,  0.0000,  0.9426, -0.4399,  0.6423,
           0.0000,  0.0000,  0.0000,  0.0000,  2.0492, -0.5493,  1.3244,
          -0.0000, -0.1863,  0.0000,  0.0000, -0.0000, -0.8903]],

        [[ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
           0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
           0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000],
         [-0.0000,  2.7458,  0.0000, -0.0000, -0.5961, -0.0000, -3.1967,
           0.0000, -0.0000, -2.3899,  0.0000,  0.0000, -0.0000, -0.7566,
        

In [21]:
!pip install pandas
!pip install tqdm



In [22]:
from collections import Counter, defaultdict

import numpy as np
import pandas as pd
import torch
from torch.nn import Embedding, Linear, LSTM, Module
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, SubsetRandomSampler
from tqdm import tqdm

In [23]:


class CharacterDataset(Dataset):
    """Custom dataset.

    Parameters
    ----------
    text : str
        Input text that will be used to create the entire database.

    window_size : int
        Number of characters to use as input features.

    vocab_size : int
        Number of characters in the vocabulary. Note that the last character
        is always reserved for a special "~" out-of-vocabulary character.

    Attributes
    ----------
    ch2ix : defaultdict
        Mapping from the character to the position of that character in the
        vocabulary. Note that all characters that are not in the vocabulary
        will get mapped into the index `vocab_size - 1`.

    ix2ch : dict
        Mapping from the character position in the vocabulary to the actual
        character.

    vocabulary : list
        List of all characters. `len(vocabulary) == vocab_size`.
    """
    def __init__(self, text, window_size=1, vocab_size=50):
        self.text = text.replace("\n", " ")
        self.window_size = window_size
        self.ch2ix = defaultdict(lambda: vocab_size - 1)

        most_common_ch2ix = {
            x[0]: i
            for i, x in enumerate(Counter(self.text).most_common()[: (vocab_size - 1)])
        }
        self.ch2ix.update(most_common_ch2ix)
        self.ch2ix["~"] = vocab_size - 1

        self.ix2ch = {v: k for k, v in self.ch2ix.items()}
        self.vocabulary = [self.ix2ch[i] for i in range(vocab_size)]

    def __len__(self):
        return len(self.text) - self.window_size

    def __getitem__(self, ix):
        X = torch.LongTensor(
            [self.ch2ix[c] for c in self.text[ix : ix + self.window_size]]
        )
        y = self.ch2ix[self.text[ix + self.window_size]]

        return X, y


class Network(Module):
    """Custom network predicting the next character of a string.

    Parameters
    ----------
    vocab_size : int
        The number of characters in the vocabulary.

    embedding_dim : int
        Dimension of the character embedding vectors.

    dense_dim : int
        Number of neurons in the linear layer that follows the LSTM.

    hidden_dim : int
        Size of the LSTM hidden state.

    max_norm : int
        If any of the embedding vectors has a higher L2 norm than `max_norm`
        it is rescaled.

    n_layers : int
        Number of the layers of the LSTM.
    """
    def __init__(
        self,
        vocab_size,
        embedding_dim=2,
        dense_dim=32,
        hidden_dim=8,
        max_norm=2,
        n_layers=1,
    ):
        super().__init__()

        self.embedding = Embedding(
                vocab_size,
                embedding_dim,
                padding_idx=vocab_size - 1,
                norm_type=2,
                max_norm=max_norm,
        )
        self.lstm = LSTM(
                embedding_dim, hidden_dim, batch_first=True, num_layers=n_layers
        )
        self.linear_1 = Linear(hidden_dim, dense_dim)
        self.linear_2 = Linear(dense_dim, vocab_size)


    def forward(self, x, h=None, c=None):
        """Run the forward pass.

        Parameters
        ----------
        x : torch.Tensor
            Input tensor of shape `(n_samples, window_size)` of dtype
            `torch.int64`.

        h, c : torch.Tensor or None
            Hidden states of the LSTM.

        Returns
        -------
        logits : torch.Tensor
            Tensor of shape `(n_samples, vocab_size)`.

        h, c : torch.Tensor or None
            Hidden states of the LSTM.
        """
        emb = self.embedding(x)  # (n_samples, window_size, embedding_dim)
        if h is not None and c is not None:
            _, (h, c) = self.lstm(emb, (h, c))
        else:
            _, (h, c) = self.lstm(emb)  # (n_layers, n_samples, hidden_dim)

        h_mean = h.mean(dim=0)  # (n_samples, hidden_dim)
        x = self.linear_1(h_mean)  # (n_samples, dense_dim)
        logits = self.linear_2(x)  # (n_samples, vocab_size)

        return logits, h, c


In [24]:

def compute_loss(cal, net, dataloader):
    """Computer average loss over a dataset."""
    net.eval()
    all_losses = []
    for X_batch, y_batch in dataloader:
        probs, _, _ = net(X_batch)

        all_losses.append(cal(probs, y_batch).item())

    return np.mean(all_losses)

def generate_text(n_chars, net, dataset, initial_text="Hello", random_state=None):
    """Generate text with the character-level model.

    Parameters
    ----------
    n_chars : int
        Number of characters to generate.

    net : Module
        Character-level model.

    dataset : CharacterDataset
        Instance of the `CharacterDataset`.

    initial_text : str
        The starting text to be used as the initial condition for the model.

    random_state : None or int
        If not None, then the result is reproducible.

    Returns
    -------
    res : str
        Generated text.
    """
    if not initial_text:
        raise ValueError("You need to specify the initial text")

    res = initial_text
    net.eval()
    h, c = None, None

    if random_state is not None:
        np.random.seed(random_state)

    for _ in range(n_chars):
        previous_chars = initial_text if res == initial_text else res[-1]
        features = torch.LongTensor([[dataset.ch2ix[c] for c in previous_chars]])
        logits, h, c = net(features, h, c)
        probs = F.softmax(logits[0], dim=0).detach().numpy()
        new_ch = np.random.choice(dataset.vocabulary, p=probs)
        res += new_ch

    return res


In [25]:

if __name__ == "__main__":
    with open("text.txt", "r") as f:
        text = "\n".join(f.readlines())

    # Hyperparameters model
    vocab_size = 70
    window_size = 10
    embedding_dim = 2
    hidden_dim = 16
    dense_dim = 32
    n_layers = 1
    max_norm = 2

    # Training config
    n_epochs = 25
    train_val_split = 0.8
    batch_size = 128
    random_state = 13

    torch.manual_seed(random_state)

    loss_f = torch.nn.CrossEntropyLoss()
    dataset = CharacterDataset(text, window_size=window_size, vocab_size=vocab_size)

    n_samples = len(dataset)
    split_ix = int(n_samples * train_val_split)

    train_indices, val_indices = np.arange(split_ix), np.arange(split_ix, n_samples)

    train_dataloader = DataLoader(
            dataset, sampler=SubsetRandomSampler(train_indices), batch_size=batch_size
    )
    val_dataloader = DataLoader(
            dataset, sampler=SubsetRandomSampler(val_indices), batch_size=batch_size
    )

    net = Network(
            vocab_size,
            hidden_dim=hidden_dim,
            n_layers=n_layers,
            dense_dim=dense_dim,
            embedding_dim=embedding_dim,
            max_norm=max_norm,
    )
    optimizer = torch.optim.Adam(
            net.parameters(),
            lr=1e-2,
    )

    emb_history = []

    for e in range(n_epochs + 1):
        net.train()
        for X_batch, y_batch in tqdm(train_dataloader):
            if e == 0:
                break

            optimizer.zero_grad()
            probs, _, _ = net(X_batch)
            loss = loss_f(probs, y_batch)
            loss.backward()

            optimizer.step()

        train_loss = compute_loss(loss_f, net, train_dataloader)
        val_loss = compute_loss(loss_f, net, val_dataloader)
        print(f"Epoch: {e}, {train_loss=:.3f}, {val_loss=:.3f}")

        # Generate one sentence
        initial_text = "I hope it works "
        generated_text = generate_text(
            100, net, dataset, initial_text=initial_text, random_state=random_state
        )
        print(generated_text)

        # Prepare DataFrame
        weights = net.embedding.weight.detach().clone().numpy()

        df = pd.DataFrame(weights, columns=[f"dim_{i}" for i in range(embedding_dim)])
        df["epoch"] = e
        df["character"] = dataset.vocabulary

        emb_history.append(df)

final_df = pd.concat(emb_history)
final_df.to_csv("res.csv", index=False)

  0%|          | 0/6405 [00:04<?, ?it/s]


Epoch: 0, train_loss=4.351, val_loss=4.351
I hope it works Ecz7QH:EJPtbo1"BgI v5f.UtoCF VRnqA€7 yP.,?Uyây"LEn?SrsydU?cDSn2'dw”P~F”sUvcFrc!D7HiOmu;k7r2Bsh7wngJS


100%|██████████| 6405/6405 [01:30<00:00, 71.01it/s] 


Epoch: 1, train_loss=1.792, val_loss=1.797
I hope it works  ad."  Shat  gorse  I  be  peny  so,  wond.  I  cen  thero  it  to  Crook  paoud.  Wood  that  to  d


100%|██████████| 6405/6405 [01:47<00:00, 59.67it/s] 


Epoch: 2, train_loss=1.759, val_loss=1.770
I hope it works  arbbislofe  come  ovenp  dlerlerte,  wondper  is  stet  ut  the  dikint,  ge  dusen  hem  we  a  lh


100%|██████████| 6405/6405 [02:05<00:00, 51.17it/s]


Epoch: 3, train_loss=1.740, val_loss=1.753
I hope it works  alfârtard  exad   nlerp  ruel,  of.  He  beoned  lone  bitht  on  wruin,  geade.  âr  exews  I  not


100%|██████████| 6405/6405 [01:44<00:00, 61.29it/s]


Epoch: 4, train_loss=1.736, val_loss=1.748
I hope it works  thry  mowe  for   nwt   epfelf  hav  was  the  meod  tother  for  ubkook  cangh.  Wo  hroy  tre  ro


100%|██████████| 6405/6405 [01:33<00:00, 68.64it/s]


Epoch: 5, train_loss=1.739, val_loss=1.750
I hope it works  thip  mice  you   nfe    sl  cest   as  with  is  see  but   gas  wliln.  Bethey  me  ap  u  I  ase


100%|██████████| 6405/6405 [01:44<00:00, 61.12it/s]


Epoch: 6, train_loss=1.726, val_loss=1.736
I hope it works  alcOnallies  of   his   ecd  mestec  wond,  and  woonet,  so  he  wlid  osc  in,  me  ap  leeve  lo


100%|██████████| 6405/6405 [01:40<00:00, 63.86it/s]


Epoch: 7, train_loss=1.749, val_loss=1.760
I hope it works  aly.  White  of   his    th  .tird'  word.  I  fert  told  it  to  Droagt   as  her  heg  we  tas  


100%|██████████| 6405/6405 [01:32<00:00, 68.95it/s]


Epoch: 8, train_loss=1.718, val_loss=1.727
I hope it works  alcOthurd  the  neab  be  perither"  woouper  is  seet  u  it  to  Mun  a  took.  Wothecech  Wan  s


100%|██████████| 6405/6405 [01:45<00:00, 60.77it/s]


Epoch: 9, train_loss=1.718, val_loss=1.730
I hope it works  thipesmome  cowe  aye   euw  feore.  Matty  and  woon  bitht  on  wris  anle  spoom  odSell  ve  do


100%|██████████| 6405/6405 [01:52<00:00, 57.05it/s]


Epoch: 10, train_loss=1.727, val_loss=1.738
I hope it works  adk,  mice  court  be    is  ferse,  wond,  and  when  binde  he  wlid  nale  ig  we  his  to,  as 


100%|██████████| 6405/6405 [01:47<00:00, 59.79it/s]


Epoch: 11, train_loss=1.718, val_loss=1.734
I hope it works  alm-  I   neptnen  kaic   bed,  hav  wost,  and  whean  waid  nettivlon,  be  ip  me  oveus  ve  un


100%|██████████| 6405/6405 [01:25<00:00, 75.24it/s]


Epoch: 12, train_loss=1.721, val_loss=1.734
I hope it works  alc'sed   tot  a  his     "  Dadouk  was  and  mead  a  waid  on  wliled  can  koof  orvelle  to  r


100%|██████████| 6405/6405 [01:24<00:00, 75.92it/s]


Epoch: 13, train_loss=1.725, val_loss=1.740
I hope it works  alc.  Muce  court  .ance  cell  som  unhokes  sitst  at,  so  on  wlilty  yastapt  at  I  leeke  da


100%|██████████| 6405/6405 [01:24<00:00, 75.52it/s]


Epoch: 14, train_loss=1.709, val_loss=1.721
I hope it works  arpker,   a  has  ovenp  lrews  sog  was  and  meson  time  exanted:,  qitce  ig  mandep  we  tast 


100%|██████████| 6405/6405 [01:20<00:00, 80.01it/s]


Epoch: 15, train_loss=1.719, val_loss=1.736
I hope it works  alp'erchuehelales  I  be  gell  so,  moal.  I  me  to  boous  he  wlire.  Bethago  to  I  letken  s


100%|██████████| 6405/6405 [01:22<00:00, 77.41it/s]


KeyboardInterrupt: 