# Importing Packages

In [1]:
import torch
import torchtext

from collections import defaultdict

# Task 1 Create Dataset for Generative Learning

### Storing the haikus into strings.

In [2]:
haiku1 = "Tranquil waters flow, Whispering secrets of time, Embraced by the night."
haiku2 = "Moonlight dances soft, Through branches of ancient oak, Embraced by the night."
haiku3 = "Serene silence reigns, Stars shimmer in the night sky, Embraced by the night."
haiku4 = "Shadows dance gently, Across fields of golden wheat, Embraced by the night."
haiku5 = "Fireflies flicker bright, Illuminating the dark, Embraced by the night."

haikus = [haiku1, haiku2, haiku3, haiku4, haiku5]

for haiku in haikus:
    print(haiku)

Tranquil waters flow, Whispering secrets of time, Embraced by the night.
Moonlight dances soft, Through branches of ancient oak, Embraced by the night.
Serene silence reigns, Stars shimmer in the night sky, Embraced by the night.
Shadows dance gently, Across fields of golden wheat, Embraced by the night.
Fireflies flicker bright, Illuminating the dark, Embraced by the night.


### Tokenize haikus into words

In [3]:
tokenizer = torchtext.data.utils.get_tokenizer("basic_english")

tokenized_haikus = [tokenizer(haiku) for haiku in haikus]
tokenized_haikus

[['tranquil',
  'waters',
  'flow',
  ',',
  'whispering',
  'secrets',
  'of',
  'time',
  ',',
  'embraced',
  'by',
  'the',
  'night',
  '.'],
 ['moonlight',
  'dances',
  'soft',
  ',',
  'through',
  'branches',
  'of',
  'ancient',
  'oak',
  ',',
  'embraced',
  'by',
  'the',
  'night',
  '.'],
 ['serene',
  'silence',
  'reigns',
  ',',
  'stars',
  'shimmer',
  'in',
  'the',
  'night',
  'sky',
  ',',
  'embraced',
  'by',
  'the',
  'night',
  '.'],
 ['shadows',
  'dance',
  'gently',
  ',',
  'across',
  'fields',
  'of',
  'golden',
  'wheat',
  ',',
  'embraced',
  'by',
  'the',
  'night',
  '.'],
 ['fireflies',
  'flicker',
  'bright',
  ',',
  'illuminating',
  'the',
  'dark',
  ',',
  'embraced',
  'by',
  'the',
  'night',
  '.']]

### Tagging end of line with `</l>` and end of haiku with `</e>`

In [4]:
for haiku_tokens in tokenized_haikus:
    for i, token in enumerate(haiku_tokens):
        if token.endswith(','):
            haiku_tokens[i] += "</l>"
        elif token.endswith('.'):
            haiku_tokens[i] += "</e>"

tokenized_haikus

[['tranquil',
  'waters',
  'flow',
  ',</l>',
  'whispering',
  'secrets',
  'of',
  'time',
  ',</l>',
  'embraced',
  'by',
  'the',
  'night',
  '.</e>'],
 ['moonlight',
  'dances',
  'soft',
  ',</l>',
  'through',
  'branches',
  'of',
  'ancient',
  'oak',
  ',</l>',
  'embraced',
  'by',
  'the',
  'night',
  '.</e>'],
 ['serene',
  'silence',
  'reigns',
  ',</l>',
  'stars',
  'shimmer',
  'in',
  'the',
  'night',
  'sky',
  ',</l>',
  'embraced',
  'by',
  'the',
  'night',
  '.</e>'],
 ['shadows',
  'dance',
  'gently',
  ',</l>',
  'across',
  'fields',
  'of',
  'golden',
  'wheat',
  ',</l>',
  'embraced',
  'by',
  'the',
  'night',
  '.</e>'],
 ['fireflies',
  'flicker',
  'bright',
  ',</l>',
  'illuminating',
  'the',
  'dark',
  ',</l>',
  'embraced',
  'by',
  'the',
  'night',
  '.</e>']]

### Generating sequences

In [5]:
sequences = []
x = 3  # x-length sequences
y = 1  # y-length sequences

for haiku_tokens in tokenized_haikus:
    # Generate sequences for each haiku
    for i in range(len(haiku_tokens) - x - y + 1):
        # Extract x and y sequences
        x_sequence = haiku_tokens[i:i + x]
        y_sequence = haiku_tokens[i + x:i + x + y]
        sequences.append((x_sequence, y_sequence))

# let's see what it looks like
sequences

[(['tranquil', 'waters', 'flow'], [',</l>']),
 (['waters', 'flow', ',</l>'], ['whispering']),
 (['flow', ',</l>', 'whispering'], ['secrets']),
 ([',</l>', 'whispering', 'secrets'], ['of']),
 (['whispering', 'secrets', 'of'], ['time']),
 (['secrets', 'of', 'time'], [',</l>']),
 (['of', 'time', ',</l>'], ['embraced']),
 (['time', ',</l>', 'embraced'], ['by']),
 ([',</l>', 'embraced', 'by'], ['the']),
 (['embraced', 'by', 'the'], ['night']),
 (['by', 'the', 'night'], ['.</e>']),
 (['moonlight', 'dances', 'soft'], [',</l>']),
 (['dances', 'soft', ',</l>'], ['through']),
 (['soft', ',</l>', 'through'], ['branches']),
 ([',</l>', 'through', 'branches'], ['of']),
 (['through', 'branches', 'of'], ['ancient']),
 (['branches', 'of', 'ancient'], ['oak']),
 (['of', 'ancient', 'oak'], [',</l>']),
 (['ancient', 'oak', ',</l>'], ['embraced']),
 (['oak', ',</l>', 'embraced'], ['by']),
 ([',</l>', 'embraced', 'by'], ['the']),
 (['embraced', 'by', 'the'], ['night']),
 (['by', 'the', 'night'], ['.</e>'])

# Task 2 Create a Model that Implements MultiheadTransformer

In [7]:
import torch.nn