# Importing Packages

In [1]:
import torch
import torchtext

from collections import defaultdict

# Task 1 Create Dataset for Generative Learning

### Storing the haikus into strings.

In [2]:
haiku1 = "Tranquil waters flow, Whispering secrets of time, Embraced by the night."
haiku2 = "Moonlight dances soft, Through branches of ancient oak, Embraced by the night."
haiku3 = "Serene silence reigns, Stars shimmer in the night sky, Embraced by the night."
haiku4 = "Shadows dance gently, Across fields of golden wheat, Embraced by the night."
haiku5 = "Fireflies flicker bright, Illuminating the dark, Embraced by the night."

haikus = [haiku1, haiku2, haiku3, haiku4, haiku5]

for haiku in haikus:
    print(haiku)

Tranquil waters flow, Whispering secrets of time, Embraced by the night.
Moonlight dances soft, Through branches of ancient oak, Embraced by the night.
Serene silence reigns, Stars shimmer in the night sky, Embraced by the night.
Shadows dance gently, Across fields of golden wheat, Embraced by the night.
Fireflies flicker bright, Illuminating the dark, Embraced by the night.


### Tokenize haikus into words

In [3]:
tokenizer = torchtext.data.utils.get_tokenizer("basic_english")

tokenized_haikus = [tokenizer(haiku) for haiku in haikus]
tokenized_haikus

[['tranquil',
  'waters',
  'flow',
  ',',
  'whispering',
  'secrets',
  'of',
  'time',
  ',',
  'embraced',
  'by',
  'the',
  'night',
  '.'],
 ['moonlight',
  'dances',
  'soft',
  ',',
  'through',
  'branches',
  'of',
  'ancient',
  'oak',
  ',',
  'embraced',
  'by',
  'the',
  'night',
  '.'],
 ['serene',
  'silence',
  'reigns',
  ',',
  'stars',
  'shimmer',
  'in',
  'the',
  'night',
  'sky',
  ',',
  'embraced',
  'by',
  'the',
  'night',
  '.'],
 ['shadows',
  'dance',
  'gently',
  ',',
  'across',
  'fields',
  'of',
  'golden',
  'wheat',
  ',',
  'embraced',
  'by',
  'the',
  'night',
  '.'],
 ['fireflies',
  'flicker',
  'bright',
  ',',
  'illuminating',
  'the',
  'dark',
  ',',
  'embraced',
  'by',
  'the',
  'night',
  '.']]

### Tagging end of line with `</l>` and end of haiku with `</e>`

In [4]:
for haiku_tokens in tokenized_haikus:
    for i, token in enumerate(haiku_tokens):
        if token.endswith(','):
            haiku_tokens[i] += "</l>"
        elif token.endswith('.'):
            haiku_tokens[i] += "</e>"

tokenized_haikus

[['tranquil',
  'waters',
  'flow',
  ',</l>',
  'whispering',
  'secrets',
  'of',
  'time',
  ',</l>',
  'embraced',
  'by',
  'the',
  'night',
  '.</e>'],
 ['moonlight',
  'dances',
  'soft',
  ',</l>',
  'through',
  'branches',
  'of',
  'ancient',
  'oak',
  ',</l>',
  'embraced',
  'by',
  'the',
  'night',
  '.</e>'],
 ['serene',
  'silence',
  'reigns',
  ',</l>',
  'stars',
  'shimmer',
  'in',
  'the',
  'night',
  'sky',
  ',</l>',
  'embraced',
  'by',
  'the',
  'night',
  '.</e>'],
 ['shadows',
  'dance',
  'gently',
  ',</l>',
  'across',
  'fields',
  'of',
  'golden',
  'wheat',
  ',</l>',
  'embraced',
  'by',
  'the',
  'night',
  '.</e>'],
 ['fireflies',
  'flicker',
  'bright',
  ',</l>',
  'illuminating',
  'the',
  'dark',
  ',</l>',
  'embraced',
  'by',
  'the',
  'night',
  '.</e>']]

### Flattening `tokenized_haikus` in place to build vocab

In [5]:
# flatten tokenized_haikus in place
i = 0
while i < len(tokenized_haikus):
    if isinstance(tokenized_haikus[i], list):
        tokenized_haikus[i:i+1] = tokenized_haikus[i]
    else:
        i += 1

tokenized_haikus

['tranquil',
 'waters',
 'flow',
 ',</l>',
 'whispering',
 'secrets',
 'of',
 'time',
 ',</l>',
 'embraced',
 'by',
 'the',
 'night',
 '.</e>',
 'moonlight',
 'dances',
 'soft',
 ',</l>',
 'through',
 'branches',
 'of',
 'ancient',
 'oak',
 ',</l>',
 'embraced',
 'by',
 'the',
 'night',
 '.</e>',
 'serene',
 'silence',
 'reigns',
 ',</l>',
 'stars',
 'shimmer',
 'in',
 'the',
 'night',
 'sky',
 ',</l>',
 'embraced',
 'by',
 'the',
 'night',
 '.</e>',
 'shadows',
 'dance',
 'gently',
 ',</l>',
 'across',
 'fields',
 'of',
 'golden',
 'wheat',
 ',</l>',
 'embraced',
 'by',
 'the',
 'night',
 '.</e>',
 'fireflies',
 'flicker',
 'bright',
 ',</l>',
 'illuminating',
 'the',
 'dark',
 ',</l>',
 'embraced',
 'by',
 'the',
 'night',
 '.</e>']

### Building vocab

In [6]:
vocabulary = torchtext.vocab.build_vocab_from_iterator([tokenized_haikus])
# this is a built-in vocabulary object from torchtext, might help to lookup documentation
vocabulary

Vocab()

### Index of each token within `vocabulary`

In [7]:
indexed_tokens = [vocabulary[token] for token in tokenized_haikus]
indexed_tokens

[35,
 36,
 17,
 0,
 38,
 25,
 6,
 34,
 0,
 5,
 4,
 1,
 2,
 3,
 22,
 12,
 31,
 0,
 33,
 9,
 6,
 8,
 23,
 0,
 5,
 4,
 1,
 2,
 3,
 26,
 29,
 24,
 0,
 32,
 28,
 21,
 1,
 2,
 30,
 0,
 5,
 4,
 1,
 2,
 3,
 27,
 11,
 18,
 0,
 7,
 14,
 6,
 19,
 37,
 0,
 5,
 4,
 1,
 2,
 3,
 15,
 16,
 10,
 0,
 20,
 1,
 13,
 0,
 5,
 4,
 1,
 2,
 3]

# Task 2 Create a Model that Implements MultiheadTransformer

### Get `torch.nn`

In [8]:
import torch.nn as nn

### Feed to an embedding layer

In [9]:
embedding_dimension = 3
vocab_size = len(vocabulary)

embedding_layer = nn.Embedding(vocab_size, embedding_dimension)

input_embeddings = embedding_layer(torch.tensor(indexed_tokens))
input_embeddings = input_embeddings.transpose(0, 1)

input_embeddings.shape

torch.Size([3, 73])

### MultiHeadAttention class

In [10]:
class MultiHeadAttention(nn.Module):
    def __init__(self, num_embeddings, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.num_embeddings = num_embeddings
        self.num_heads = num_heads
        self.attention = nn.MultiheadAttention(self.num_embeddings, self.num_heads)

    def forward(self, x):
        input_embeddings = x.transpose(0, 1)

        output_embeddings = self.attention(
            input_embeddings,
            input_embeddings,
            input_embeddings
        )
        
        return output_embeddings

### Instantiate MultiHeadAttention class

In [11]:
MultiHeadAttention = MultiHeadAttention(embedding_dimension, 3)

### Defining Linear Layer

In [12]:
linear_layer = nn.Linear(embedding_dimension, len(vocabulary))

### Passing Into Linear Layer

In [14]:
linear_output = linear_layer(MultiHeadAttention.forward(input_embeddings)[0])

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x73 and 3x39)