# Importing Packages

In [1]:
!pip install torchtext





In [2]:
import torch
import torchtext

from collections import defaultdict

# Task 1 Create Dataset for Generative Learning

### Storing the haikus into strings.

In [3]:
haiku1 = "Tranquil waters flow, Whispering secrets of time, Embraced by the night."
haiku2 = "Moonlight dances soft, Through branches of ancient oak, Embraced by the night."
haiku3 = "Serene silence reigns, Stars shimmer in the night sky, Embraced by the night."
haiku4 = "Shadows dance gently, Across fields of golden wheat, Embraced by the night."
haiku5 = "Fireflies flicker bright, Illuminating the dark, Embraced by the night."

haikus = [haiku1, haiku2, haiku3, haiku4, haiku5]

for haiku in haikus:
    print(haiku)

Tranquil waters flow, Whispering secrets of time, Embraced by the night.
Moonlight dances soft, Through branches of ancient oak, Embraced by the night.
Serene silence reigns, Stars shimmer in the night sky, Embraced by the night.
Shadows dance gently, Across fields of golden wheat, Embraced by the night.
Fireflies flicker bright, Illuminating the dark, Embraced by the night.


### Tokenize haikus into words

In [4]:
tokenizer = torchtext.data.utils.get_tokenizer("basic_english")

tokenized_haikus = [tokenizer(haiku) for haiku in haikus]
tokenized_haikus

[['tranquil',
  'waters',
  'flow',
  ',',
  'whispering',
  'secrets',
  'of',
  'time',
  ',',
  'embraced',
  'by',
  'the',
  'night',
  '.'],
 ['moonlight',
  'dances',
  'soft',
  ',',
  'through',
  'branches',
  'of',
  'ancient',
  'oak',
  ',',
  'embraced',
  'by',
  'the',
  'night',
  '.'],
 ['serene',
  'silence',
  'reigns',
  ',',
  'stars',
  'shimmer',
  'in',
  'the',
  'night',
  'sky',
  ',',
  'embraced',
  'by',
  'the',
  'night',
  '.'],
 ['shadows',
  'dance',
  'gently',
  ',',
  'across',
  'fields',
  'of',
  'golden',
  'wheat',
  ',',
  'embraced',
  'by',
  'the',
  'night',
  '.'],
 ['fireflies',
  'flicker',
  'bright',
  ',',
  'illuminating',
  'the',
  'dark',
  ',',
  'embraced',
  'by',
  'the',
  'night',
  '.']]

### Tagging end of line with `</l>` and end of haiku with `</e>`

In [5]:
for haiku_tokens in tokenized_haikus:
    for i, token in enumerate(haiku_tokens):
        if token.endswith(','):
            haiku_tokens[i] += "</l>"
        elif token.endswith('.'):
            haiku_tokens[i] += "</e>"

tokenized_haikus

[['tranquil',
  'waters',
  'flow',
  ',</l>',
  'whispering',
  'secrets',
  'of',
  'time',
  ',</l>',
  'embraced',
  'by',
  'the',
  'night',
  '.</e>'],
 ['moonlight',
  'dances',
  'soft',
  ',</l>',
  'through',
  'branches',
  'of',
  'ancient',
  'oak',
  ',</l>',
  'embraced',
  'by',
  'the',
  'night',
  '.</e>'],
 ['serene',
  'silence',
  'reigns',
  ',</l>',
  'stars',
  'shimmer',
  'in',
  'the',
  'night',
  'sky',
  ',</l>',
  'embraced',
  'by',
  'the',
  'night',
  '.</e>'],
 ['shadows',
  'dance',
  'gently',
  ',</l>',
  'across',
  'fields',
  'of',
  'golden',
  'wheat',
  ',</l>',
  'embraced',
  'by',
  'the',
  'night',
  '.</e>'],
 ['fireflies',
  'flicker',
  'bright',
  ',</l>',
  'illuminating',
  'the',
  'dark',
  ',</l>',
  'embraced',
  'by',
  'the',
  'night',
  '.</e>']]

### Flattening `tokenized_haikus` in place to build vocab

In [6]:
# flatten tokenized_haikus in place
i = 0
while i < len(tokenized_haikus):
    if isinstance(tokenized_haikus[i], list):
        tokenized_haikus[i:i+1] = tokenized_haikus[i]
    else:
        i += 1

tokenized_haikus

['tranquil',
 'waters',
 'flow',
 ',</l>',
 'whispering',
 'secrets',
 'of',
 'time',
 ',</l>',
 'embraced',
 'by',
 'the',
 'night',
 '.</e>',
 'moonlight',
 'dances',
 'soft',
 ',</l>',
 'through',
 'branches',
 'of',
 'ancient',
 'oak',
 ',</l>',
 'embraced',
 'by',
 'the',
 'night',
 '.</e>',
 'serene',
 'silence',
 'reigns',
 ',</l>',
 'stars',
 'shimmer',
 'in',
 'the',
 'night',
 'sky',
 ',</l>',
 'embraced',
 'by',
 'the',
 'night',
 '.</e>',
 'shadows',
 'dance',
 'gently',
 ',</l>',
 'across',
 'fields',
 'of',
 'golden',
 'wheat',
 ',</l>',
 'embraced',
 'by',
 'the',
 'night',
 '.</e>',
 'fireflies',
 'flicker',
 'bright',
 ',</l>',
 'illuminating',
 'the',
 'dark',
 ',</l>',
 'embraced',
 'by',
 'the',
 'night',
 '.</e>']

### Building vocab

In [7]:
vocabulary = torchtext.vocab.build_vocab_from_iterator([tokenized_haikus])
# this is a built-in vocabulary object from torchtext, might help to lookup documentation
vocabulary

Vocab()

### Index of each token within `vocabulary`

In [8]:
indexed_tokens = [vocabulary[token] for token in tokenized_haikus]
indexed_tokens

[35,
 36,
 17,
 0,
 38,
 25,
 6,
 34,
 0,
 5,
 4,
 1,
 2,
 3,
 22,
 12,
 31,
 0,
 33,
 9,
 6,
 8,
 23,
 0,
 5,
 4,
 1,
 2,
 3,
 26,
 29,
 24,
 0,
 32,
 28,
 21,
 1,
 2,
 30,
 0,
 5,
 4,
 1,
 2,
 3,
 27,
 11,
 18,
 0,
 7,
 14,
 6,
 19,
 37,
 0,
 5,
 4,
 1,
 2,
 3,
 15,
 16,
 10,
 0,
 20,
 1,
 13,
 0,
 5,
 4,
 1,
 2,
 3]

# Task 2 Create a Model that Implements MultiheadTransformer

### Get `torch.nn`

In [9]:
import torch.nn as nn

### Feed to an embedding layer

In [10]:
embedding_dimension = 3
vocab_size = len(vocabulary)

embedding_layer = nn.Embedding(vocab_size, embedding_dimension)

input_embeddings = embedding_layer(torch.tensor(indexed_tokens))
input_embeddings = input_embeddings.transpose(0, 1)

input_embeddings.shape

torch.Size([3, 73])

### MultiHeadAttention class

In [11]:
class MultiHeadAttention(nn.Module):
    def __init__(self, num_embeddings, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.num_embeddings = num_embeddings
        self.num_heads = num_heads
        self.attention = nn.MultiheadAttention(self.num_embeddings, self.num_heads)

    def forward(self, x):
        input_embeddings = x.transpose(0, 1)

        output_embeddings = self.attention(
            input_embeddings,
            input_embeddings,
            input_embeddings
        )
        
        return output_embeddings

### Instantiate MultiHeadAttention class

In [12]:
MultiHeadAttention = MultiHeadAttention(embedding_dimension, 3)

### Defining Linear Layer

In [13]:
linear_layer = nn.Linear(embedding_dimension, len(indexed_tokens))

### Passing Into Linear Layer

In [14]:
linear_output = linear_layer(MultiHeadAttention.forward(input_embeddings)[0])
linear_output

tensor([[ 0.5464,  0.0793, -0.4347,  ..., -0.2016,  0.5659,  0.0045],
        [ 0.5794,  0.1398, -0.4425,  ..., -0.1636,  0.6198, -0.0731],
        [ 0.3852,  0.0582, -0.4574,  ..., -0.3079,  0.4541,  0.1871],
        ...,
        [ 0.4311,  0.1274, -0.4646,  ..., -0.2597,  0.5208,  0.0888],
        [ 0.5986,  0.1100, -0.4334,  ..., -0.1594,  0.6153, -0.0688],
        [ 0.4398,  0.1634, -0.4712,  ..., -0.2437,  0.5461,  0.0545]],
       grad_fn=<AddmmBackward0>)

# Task 3 Create a Model that Implements MultiheadTransformer


In [17]:
!pip install torch
!pip install tqdm


[31mERROR: Could not find a version that satisfies the requirement library-name (from versions: none)[0m
[31mERROR: No matching distribution found for library-name[0m


In [22]:
import pickle
import torch
import datetime
import math

def get_batch_loader(data, context_size, batch_size):
    def closure():
        ix = torch.randint(high=len(data) - context_size + 1, size=(batch_size,))
        x = torch.stack([data[i:i+context_size] for i in ix])
        return x
    
    return closure

In [24]:
class TransformerDecoder(nn.Module):
    def __init__(self, d_model=768, n_head=12, d_ffn=2048, dropout=0.1, device='cpu'):
        super(TransformerDecoder, self).__init__()
        self.d_model = d_model
        self.n_head = n_head
        self.d_ffn = d_ffn
        self.dropout = nn.Dropout(dropout)
        self.device = device

        self.norm_1 = nn.LayerNorm(d_model)
        self.norm_2 = nn.LayerNorm(d_model)

        self.ffn_1 = nn.Linear(d_model, d_ffn)
        self.ffn_2 = nn.Linear(d_ffn, d_model)

        self.gelu = nn.GELU()

        self.attention = nn.MultiheadAttention(d_model, n_head, dropout, batch_first=True, device=device)
        def forward(self, x):
        # x is of shape (N,L,d_model)

            x_1 = self.norm_1(x) # remember x for residual

        # generate mask for masked self-attention
        mask = torch.triu(torch.ones(x.shape[1], x.shape[1]), diagonal=1).bool().to(self.device)

        x_1, _ = self.attention(x_1, x_1, x_1, attn_mask=mask, need_weights=False)

        x_1 = self.dropout(x_1)
        x_1 = x_1 + x
        x_1 = self.norm_2(x_1)

        x_2 = self.ffn_1(x_1)
        x_2 = self.gelu(x_2)
        x_2 = self.ffn_2(x_2)
        x_2 = self.dropout(x_2)

        return x_2 + x_1

class Transformer(nn.Module):
    def __init__(self, context_size, vocab_size, d_model=768, dropout=0.1, n_block=12, device='cpu'):
        super(Transformer, self).__init__()
        self.context_size = context_size
        self.vocab_size = vocab_size
        self.d_model = d_model
        self.d_ffn = 2048
        self.dropout = nn.Dropout(dropout)
        self.n_block = n_block
        self.device = device

        self.embedding = nn.Embedding(vocab_size, d_model)
    
        self.dec_blocks = nn.ModuleList([
            TransformerDecoder(device=device) for _ in range(n_block)
        ])

        self.pe = self.gen_pe(context_size, d_model)

        self.norm = nn.LayerNorm(d_model)
        self.ffn = nn.Linear(d_model, vocab_size)
    
    def gen_pe(self, r, c):
        pe = torch.zeros(r, c).to(self.device)
        for k in range(r):
            for i in range(c):
                if i % 2 == 0:
                    # theta = k / (10_000 ** (i / c)) # overflow error, fixed using log-exp trick
                    theta = math.e ** ((-i/c) * math.log(10_000))
                    pe[k,i] = math.sin(k * theta)
                else:
                    # theta = k / (10_000 ** ((i-1) / c))
                    theta = math.e ** (((-i+1)/c) * math.log(10_000))
                    pe[k,i] = math.cos(k * theta)
        return pe

    def forward(self, x):
        # x is of shape (N,L,d_model)

        x = self.embedding(x)
        length = x.shape[1]
        x = x + self.pe[:length]
        x = self.dropout(x)

        for dec in self.dec_blocks:
            x = dec(x)
        
        x = self.ffn(x[:, -1])

        # no softmax as we use CELoss
        return x
    

IndentationError: expected an indented block (<ipython-input-24-2cc189119892>, line 22)

In [None]:
from model import Transformer
from torch import nn
from tqdm import tqdm



In [None]:
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'