# Assignment II - Text Generation with RNNs (Without Punctuations)
Submitted by Arham Anwar

In [1]:
# seed and immports 

import urllib.request
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl
from pytorch_lightning import Trainer
import random
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
import torch.nn.functional as F



# Set seeds for reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    pl.seed_everything(seed)

# Setting the seed
SEED = 42
set_seed(SEED)

Global seed set to 42


## 1. Preprocess The Data

In [2]:
import random 
import numpy as np
import urllib.request


### 1.1. Loading the data

In [3]:
"""tiny shakespeare dataset"""

# Data preparation
url = 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt'
filename = 'shakespeare.txt'
urllib.request.urlretrieve(url, filename)
print(f"File downloaded and saved as: {filename}")

File downloaded and saved as: shakespeare.txt


### 1.2. Lower casing the data

In [4]:
# Read and preprocess the text
text = open(filename, 'rb').read().decode(encoding='utf-8').lower()

### 1.3. Using 500K characters only due to compute resource limitations

In [5]:
# for compute restrictions we will use only 500000 characters
text = text[100000:800000]

# remove all punctuations and special characters
text = text.replace('\n', ' ').replace('\r', ' ')
text = text.replace('!', '').replace('?', '').replace(';', '').replace(':', '').replace(',', '')
text = text.replace('(', '').replace(')', '').replace('--', '').replace('?', '').replace('.', '')
text = text.replace('"', '').replace("'", '').replace('_', '').replace('-', '').replace('`', '')
text = text.replace('*', '').replace(']', '').replace('[', '').replace('}', '').replace('{', '')
text = text.replace('1', '').replace('2', '').replace('3', '').replace('4', '').replace('5', '')
text = text.replace('6', '').replace('7', '').replace('8', '').replace('9', '').replace('0', '')
text = text.replace('=', '').replace('+', '').replace('<', '').replace('>', '').replace('/', '')
text = text.replace('\\', '').replace('|', '').replace('@', '').replace('#', '').replace('$', '')
text = text.replace('%', '').replace('^', '').replace('&', '').replace('*', '').replace('~', '')
text = text.replace('`', '').replace('´', '').replace('§', '').replace('°', '').replace('¨', '')
text = text.replace('£', '').replace('€', '').replace('¥', '').replace('¢', '').replace('¬', '')
text = text.replace('µ', '').replace('¶', '').replace('©', '').replace('®', '').replace('™', '')


#text = text[300000:800000]

#### 1.4. Character Dictionary

In [6]:
# Get all unique characters
characters = sorted(set(text))
char_to_index = {c: i for i, c in enumerate(characters)}
index_to_char = {i: c for i, c in enumerate(characters)}

#### 1.5. Sequence Configuration

In [7]:
SEQ_LENGTH = 40
STEP_SIZE = 3
sentences = []
next_characters = []

# Assuming 'text' and 'char_to_index' are defined earlier in your code
for i in range(0, len(text) - SEQ_LENGTH, STEP_SIZE):
    sentences.append(text[i: i + SEQ_LENGTH])
    next_characters.append(text[i + SEQ_LENGTH])

# Convert data to indices
import numpy as np
X = np.zeros((len(sentences), SEQ_LENGTH), dtype=np.int32)
y = np.zeros((len(sentences)), dtype=np.int32)

for i, sentence in enumerate(sentences):
    X[i] = [char_to_index[char] for char in sentence]
    y[i] = char_to_index[next_characters[i]]


In [8]:
# Custom Dataset
class ShakespeareDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.long)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Set a larger batch size
BATCH_SIZE = 256

dataset = ShakespeareDataset(X, y)

# split to train and validation
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
# dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

#### 2. RNN Model

In [9]:
class ShakespeareModel(pl.LightningModule):
    def __init__(self, n_chars, hidden_size, num_layers, lr, dropout=0.3):
        super().__init__()
        self.save_hyperparameters()
        self.lstm = nn.LSTM(self.hparams.n_chars, self.hparams.hidden_size, self.hparams.num_layers, batch_first=True, dropout=self.hparams.dropout)
        self.fc = nn.Linear(self.hparams.hidden_size, self.hparams.n_chars)

    def forward(self, x):
        x = nn.functional.one_hot(x, num_classes=self.hparams.n_chars).float()
        h0 = torch.zeros(self.hparams.num_layers, x.size(0), self.hparams.hidden_size, device=self.device)
        c0 = torch.zeros(self.hparams.num_layers, x.size(0), self.hparams.hidden_size, device=self.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = nn.CrossEntropyLoss()(y_hat, y)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        val_loss = nn.CrossEntropyLoss()(y_hat, y) 
        self.log('val_loss', val_loss)
        return {'val_loss': val_loss}

    def validation_epoch_end(self, outputs):
        if outputs:
            avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
            self.log('val_loss', avg_loss)
        else:
            self.log('val_loss', torch.tensor(float('nan')))
            print("Warning: No validation outputs were generated. Check your data.")

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.hparams.lr)

    def generate_text(self, seed_text, max_length=100, temperature=1.0):
        self.eval()
        generated_text = seed_text
        input_ids = torch.tensor([char_to_index[c] for c in seed_text], dtype=torch.long).unsqueeze(0).to(self.device)

        with torch.no_grad():
            for _ in range(max_length - len(seed_text)):
                logits = self(input_ids)
                logits = logits[0, :] / temperature  # Apply temperature correctly to logits
                probabilities = F.softmax(logits, dim=-1)  # Convert logits to probabilities
                predicted_char_index = torch.multinomial(probabilities, 1).item()  # Sample from the distribution
                predicted_char = index_to_char[predicted_char_index]
                generated_text += predicted_char
                next_input = torch.tensor([[predicted_char_index]], dtype=torch.long).to(self.device)
                input_ids = torch.cat([input_ids[:, 1:], next_input], dim=1)  # Shift and append

        return generated_text

# Instantiate the model with recommended hyperparameters
n_chars = len(char_to_index)  # Ensure this is set based on your dataset
hidden_size = 128  # Increased hidden size
num_layers = 3  # Increased number of layers
lr = 0.001  # Reduced learning rate
dropout = 0.3  # Added dropout for regularization

model = ShakespeareModel(n_chars, hidden_size, num_layers, lr, dropout)

# Define the ModelCheckpoint callback
checkpoint_callback = ModelCheckpoint(
    monitor='val_loss',
    dirpath='checkpoints/',
    filename='shakespeare-{epoch:02d}-{val_loss:.2f}',
    save_top_k=1,
    mode='min',
    save_weights_only=True,
    verbose=True
)

# Define the EarlyStopping callback
early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=3,
    mode='min',
    verbose=True
)

# Logger
logger = TensorBoardLogger("tb_logs", name="shakespeare_model")

# Define the Trainer with the checkpoint and early stopping callbacks
trainer = Trainer(
    max_epochs=6,
    gpus=1 if torch.cuda.is_available() else 0,
    callbacks=[checkpoint_callback, early_stopping_callback],
    logger=logger
)

# Assuming train_dataloader and val_dataloader are defined
# Train the model
trainer.fit(model, train_dataloader, val_dataloader)

  rank_zero_deprecation(
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")

  | Name | Type   | Params
--------------------------------
0 | lstm | LSTM   | 344 K 
1 | fc   | Linear | 3.5 K 
--------------------------------
348 K     Trainable params
0         Non-trainable params
348 K     Total params
1.392     Total estimated model params size (MB)
2024-05-27 00:16:33.768777: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Epoch 0: 100%|██████████| 869/869 [06:39<00:00,  2.18it/s, loss=2.21, v_num=17]

Metric val_loss improved. New best score: 2.175


Epoch 0: 100%|██████████| 869/869 [06:39<00:00,  2.17it/s, loss=2.21, v_num=17]

Epoch 0, global step 695: 'val_loss' reached 2.17461 (best 2.17461), saving model to 'checkpoints/shakespeare-epoch=00-val_loss=2.17.ckpt' as top 1


Epoch 1: 100%|██████████| 869/869 [06:29<00:00,  2.23it/s, loss=2.01, v_num=17]

Metric val_loss improved by 0.195 >= min_delta = 0.0. New best score: 1.979


Epoch 1: 100%|██████████| 869/869 [06:30<00:00,  2.23it/s, loss=2.01, v_num=17]

Epoch 1, global step 1390: 'val_loss' reached 1.97925 (best 1.97925), saving model to 'checkpoints/shakespeare-epoch=01-val_loss=1.98.ckpt' as top 1


Epoch 2: 100%|██████████| 869/869 [06:42<00:00,  2.16it/s, loss=1.88, v_num=17]

Metric val_loss improved by 0.137 >= min_delta = 0.0. New best score: 1.842


Epoch 2: 100%|██████████| 869/869 [06:43<00:00,  2.16it/s, loss=1.88, v_num=17]

Epoch 2, global step 2085: 'val_loss' reached 1.84201 (best 1.84201), saving model to 'checkpoints/shakespeare-epoch=02-val_loss=1.84.ckpt' as top 1


Epoch 3: 100%|██████████| 869/869 [06:15<00:00,  2.31it/s, loss=1.78, v_num=17]

Metric val_loss improved by 0.096 >= min_delta = 0.0. New best score: 1.746


Epoch 3: 100%|██████████| 869/869 [06:16<00:00,  2.31it/s, loss=1.78, v_num=17]

Epoch 3, global step 2780: 'val_loss' reached 1.74581 (best 1.74581), saving model to 'checkpoints/shakespeare-epoch=03-val_loss=1.75.ckpt' as top 1


Epoch 4: 100%|██████████| 869/869 [06:28<00:00,  2.23it/s, loss=1.74, v_num=17]

Metric val_loss improved by 0.069 >= min_delta = 0.0. New best score: 1.677


Epoch 4: 100%|██████████| 869/869 [06:29<00:00,  2.23it/s, loss=1.74, v_num=17]

Epoch 4, global step 3475: 'val_loss' reached 1.67730 (best 1.67730), saving model to 'checkpoints/shakespeare-epoch=04-val_loss=1.68.ckpt' as top 1


Epoch 5: 100%|██████████| 869/869 [07:59<00:00,  1.81it/s, loss=1.67, v_num=17]

Metric val_loss improved by 0.044 >= min_delta = 0.0. New best score: 1.633


Epoch 5: 100%|██████████| 869/869 [07:59<00:00,  1.81it/s, loss=1.67, v_num=17]

Epoch 5, global step 4170: 'val_loss' reached 1.63346 (best 1.63346), saving model to 'checkpoints/shakespeare-epoch=05-val_loss=1.63.ckpt' as top 1
`Trainer.fit` stopped: `max_epochs=6` reached.


Epoch 5: 100%|██████████| 869/869 [07:59<00:00,  1.81it/s, loss=1.67, v_num=17]


In [11]:
# Load the best checkpoint
best_model_path = checkpoint_callback.best_model_path
model = ShakespeareModel.load_from_checkpoint(best_model_path, n_chars=n_chars, hidden_size=hidden_size, num_layers=num_layers, lr=lr, dropout=dropout)

# Generate text
seed_text = "Is deep learning deep enough or not is"  # You can start with any seed text
generated_text = model.generate_text(seed_text.lower(), max_length=200, temperature=0.2)
print(generated_text)


is deep learning deep enough or not is the world the see the will the world the see the rese and the porith thou shall the come the warwick the for the come the world i would not the will the good the


In [17]:
# Load the best checkpoint
best_model_path = checkpoint_callback.best_model_path
model = ShakespeareModel.load_from_checkpoint(best_model_path, n_chars=n_chars, hidden_size=hidden_size, num_layers=num_layers, lr=lr, dropout=dropout)

# Generate text
seed_text = "Is deep learning deep enough or not is"  # You can start with any seed text
generated_text = model.generate_text(seed_text.lower(), max_length=1000, temperature=0.5)
# print such that text is new line after 12 words each
print(' '.join(generated_text.split()[:12]))
print(' '.join(generated_text.split()[12:24]))
print(' '.join(generated_text.split()[24:36]))
print(' '.join(generated_text.split()[36:48]))
print(' '.join(generated_text.split()[48:60]))
print(' '.join(generated_text.split()[60:72]))
print(' '.join(generated_text.split()[72:84]))
print(' '.join(generated_text.split()[84:96]))
print(' '.join(generated_text.split()[96:108]))
print(' '.join(generated_text.split()[108:120]))




is deep learning deep enough or not is i with the was
the farewell courten in the day than there to he see thee
for the warwick be i to the dount then the for the
will the say for shall richard in the that or be the
geart to thus see the ban should the come the serviles do
and the prove in the to anther moster and well the soul
him the since a come in the comes king henry with the
parse i with are to fight that i well be the son
her the fear be on the warwond and the garis and with
the dead father the death the sware be such that the good


#### Temperature 0.2

In [18]:
# Load the best checkpoint
best_model_path = checkpoint_callback.best_model_path
model = ShakespeareModel.load_from_checkpoint(best_model_path, n_chars=n_chars, hidden_size=hidden_size, num_layers=num_layers, lr=lr, dropout=dropout)

# Generate text
seed_text = "Is deep learning deep enough or not is"  # You can start with any seed text
generated_text = model.generate_text(seed_text.lower(), max_length=1000, temperature=0.2)
# print such that text is new line after 12 words each
print(' '.join(generated_text.split()[:12]))
print(' '.join(generated_text.split()[12:24]))
print(' '.join(generated_text.split()[24:36]))
print(' '.join(generated_text.split()[36:48]))
print(' '.join(generated_text.split()[48:60]))
print(' '.join(generated_text.split()[60:72]))
print(' '.join(generated_text.split()[72:84]))
print(' '.join(generated_text.split()[84:96]))
print(' '.join(generated_text.split()[96:108]))
print(' '.join(generated_text.split()[108:120]))




is deep learning deep enough or not is the seat the see
the such my lord the world what the for the see the
death the will shall the heart the good the will the present
the death the come the say the will the soul the see
the soul my lord and the were the word the see the
will so the soul the soul the more the warwick the lord
in the see the see the world the see the warwick the
grom the soul the god the wall the deep the world with
the see the death the prese the bear the see the world
the soul the soul the will the counter the beather the fore


#### Temperature 0.4

In [26]:
# Load the best checkpoint
best_model_path = checkpoint_callback.best_model_path
model = ShakespeareModel.load_from_checkpoint(best_model_path, n_chars=n_chars, hidden_size=hidden_size, num_layers=num_layers, lr=lr, dropout=dropout)

# Generate text
seed_text = "Is deep learning deep enough or not is"  # You can start with any seed text
generated_text = model.generate_text(seed_text.lower(), max_length=1000, temperature=0.4)

def print_text_in_chunks(text, chunk_size=12):
    words = text.split()
    for i in range(0, len(words), chunk_size):
        print(' '.join(words[i:i + chunk_size]))

print_text_in_chunks(generated_text)


is deep learning deep enough or not is this seet the say
and the count the king and see my lord and the warwick
the bead the the pear the prine the world the courter the
bear my lord and sir thee and the earth the say the
man be the world be the dake the conter son well the
dead the rebored is the cate in the death the seat the
warwick the recest the hast of the hast the prother the pore
the dosh with the will come the death the seat the warwick
i the nother so be the ant the dead the court that
your be the sound the warwick the sepore in he pray the
not see the for i would nor the will houd hath the
gaunter and the grow me the death of the come the and
the rest the warwick with the world of more king i me
the see the say the for the will the had the duke
the house and so the prince and with i tear i the
man a two beath god the was this i be the can
it you are the prese the noble us me that and shall
not the make the beather my come not see there the sent
the bear the service and my l


#### Temperature 0.6

In [27]:
# Load the best checkpoint
best_model_path = checkpoint_callback.best_model_path
model = ShakespeareModel.load_from_checkpoint(best_model_path, n_chars=n_chars, hidden_size=hidden_size, num_layers=num_layers, lr=lr, dropout=dropout)

# Generate text
seed_text = "Is deep learning deep enough or not is"  # You can start with any seed text
generated_text = model.generate_text(seed_text.lower(), max_length=1000, temperature=0.6)

def print_text_in_chunks(text, chunk_size=12):
    words = text.split()
    for i in range(0, len(words), chunk_size):
        print(' '.join(words[i:i + chunk_size]))

print_text_in_chunks(generated_text)


is deep learning deep enough or not is for the too shall
spould romeo grutt i death the king then our we plant and
the ban peine look i for hath more the world thou my
roke my lord king recordy that me for the come shall sole
i will that and the have make as my lord the seat
the seak the nent he down the farese the bingh the made
the some as nom it thy cail not more were what the
for a honour the do not staul with his parse duchess be
he life the soul seal be the bear the not the neth
your spould the fore the count in then seal i do the
courtens but thou the sell thou have tear i leave and on
our mont bound to they with some the lear thus dear the
romeo shall why so me the come and king and him that
is the some the beath and where that word is be the
were the exent the engole and well see i wend the deesing
to me more me i am may the canunt the seat the
brink the were a come at this preven there the dool thou
art the want and the deep then must so the grace the
lord go and what seat


#### Temperature 0.8

In [28]:
# Load the best checkpoint
best_model_path = checkpoint_callback.best_model_path
model = ShakespeareModel.load_from_checkpoint(best_model_path, n_chars=n_chars, hidden_size=hidden_size, num_layers=num_layers, lr=lr, dropout=dropout)

# Generate text
seed_text = "Is deep learning deep enough or not is"  # You can start with any seed text
generated_text = model.generate_text(seed_text.lower(), max_length=1000, temperature=0.8)

def print_text_in_chunks(text, chunk_size=12):
    words = text.split()
    for i in range(0, len(words), chunk_size):
        print(' '.join(words[i:i + chunk_size]))

print_text_in_chunks(generated_text)


is deep learning deep enough or not is your senving and seeven
my saul not my lord i foul my would a kind them
fathers a pourt of there to that here what margo me i
wail way and the sike this the lorded parst the will shall
to the the counton have teseess gloucester amay mone thou such pronn
and bine draw that not then have save and who thee seatel
of your and have on to tweo shall to it edward is
therefore comnot be feor shall lead leaveming te heart nor which thi
see the backoring of with me i come of aglord that speak
chate bes iw not mother i come i foot such richaldbing he
may iut thou art eovinglan toight i were all that but edward
now her with see their frole the gester be theme mard and
for love of yorks soul some dast not wour head be enfere
god and than then seed this breath is gonour shall a good
by mach is he well come that siin more a bester to
the shall frether and now been to make be though moue that
how this like good and that the romeo and the our so
lord uram the


#### Temperature 1.0

In [29]:
# Load the best checkpoint
best_model_path = checkpoint_callback.best_model_path
model = ShakespeareModel.load_from_checkpoint(best_model_path, n_chars=n_chars, hidden_size=hidden_size, num_layers=num_layers, lr=lr, dropout=dropout)

# Generate text
seed_text = "Is deep learning deep enough or not is"  # change seed here
generated_text = model.generate_text(seed_text.lower(), max_length=1000, temperature=1.0)

def print_text_in_chunks(text, chunk_size=12):
    words = text.split()
    for i in range(0, len(words), chunk_size):
        print(' '.join(words[i:i + chunk_size]))

print_text_in_chunks(generated_text)


is deep learning deep enough or not is me hen thou the
boy fasces to geword shole puaring his mein supand flow i poor
themh word of galing shall engle prinn of she eay her again
time i would pref and bourse thing hath to crevack ge plkiend
you ridester comvers on mown vave before to therefore beages the and
frem sour seal he seallw pestering all thee werly i low not
me preskd of wesniot to yes see shall slave boonage to us
out sise is me fall stare cherelitn thou bolingbrokes we than is
nok weir my all beling of lord be your as should piges
rest hin heavy stardd king store in fur hath for midel is
sead hid make twem theme which king will the tear with lords
guapts of the here sufs i pefore truids than the right not
your be are death the queen to thou beather thou me with
me no remeid o rasp vise thus my list saiter to your
god wyrfas be nobe of say delils land is sucs and i
wan newer that the ris i how prieis then i wie theres
with thee for myter wo moul treas is too thou pronath


In [23]:
# Load the best checkpoint
best_model_path = checkpoint_callback.best_model_path
model = ShakespeareModel.load_from_checkpoint(best_model_path, n_chars=n_chars, hidden_size=hidden_size, num_layers=num_layers, lr=lr, dropout=dropout)

# Generate text
seed_text = "Is deep learning deep enough or not is"  # You can start with any seed text
generated_text = model.generate_text(seed_text.lower(), max_length=1000, temperature=1)
# print such that text is new line after 12 words each
print(' '.join(generated_text.split()[:12]))
print(' '.join(generated_text.split()[12:24]))
print(' '.join(generated_text.split()[24:36]))
print(' '.join(generated_text.split()[36:48]))
print(' '.join(generated_text.split()[48:60]))
print(' '.join(generated_text.split()[60:72]))
print(' '.join(generated_text.split()[72:84]))
print(' '.join(generated_text.split()[84:96]))
print(' '.join(generated_text.split()[96:108]))
print(' '.join(generated_text.split()[108:120]))




is deep learning deep enough or not is your no the fakeily
on my mald vitle upfer my to nor onluy and no recommy
is now sleep never my gave a durn ord thee edwleds my
duch i cree before comeson a cameland be suit un enour kand
hele fear and she jess thy god strake a sert it a
man he aruthine me then foor ale a with hereful stould her
in forghat kole while gloucester that mesters i hnop this to speaks
our some preatan o doth thee apal appirst nor mowent thou gongh
whis samen to stoucome and verooy with his world aldfppen a were
men to wity ged may i mauntt would loves gast he tell
