# Assignment II - Text Generation with RNNs
Submitted by Arham Anwar

In [20]:
# seed and immports 

import urllib.request
import numpy as np
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
import pytorch_lightning as pl
from pytorch_lightning import Trainer
import random
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
from pytorch_lightning.loggers import TensorBoardLogger
import torch.nn.functional as F



# Set seeds for reproducibility
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    pl.seed_everything(seed)

# Setting the seed
SEED = 42
set_seed(SEED)

Global seed set to 42


## 1. Preprocess The Data

In [21]:
import random 
import numpy as np
import urllib.request


### 1.1. Loading the data

In [22]:
"""tiny shakespeare dataset"""

# Data preparation
url = 'https://storage.googleapis.com/download.tensorflow.org/data/shakespeare.txt'
filename = 'shakespeare.txt'
urllib.request.urlretrieve(url, filename)
print(f"File downloaded and saved as: {filename}")

File downloaded and saved as: shakespeare.txt


### 1.2. Lower casing the data

In [23]:
# Read and preprocess the text
text = open(filename, 'rb').read().decode(encoding='utf-8').lower()

### 1.3. Using 500K characters only due to compute resource limitations

In [24]:
# for compute restrictions we will use only 500000 characters
text = text[100000:800000]
#text = text[300000:800000]

#### 1.4. Character Dictionary

In [25]:
# Get all unique characters
characters = sorted(set(text))
char_to_index = {c: i for i, c in enumerate(characters)}
index_to_char = {i: c for i, c in enumerate(characters)}

#### 1.5. Sequence Configuration

In [26]:
SEQ_LENGTH = 40
STEP_SIZE = 3
sentences = []
next_characters = []

# Assuming 'text' and 'char_to_index' are defined earlier in your code
for i in range(0, len(text) - SEQ_LENGTH, STEP_SIZE):
    sentences.append(text[i: i + SEQ_LENGTH])
    next_characters.append(text[i + SEQ_LENGTH])

# Convert data to indices
import numpy as np
X = np.zeros((len(sentences), SEQ_LENGTH), dtype=np.int32)
y = np.zeros((len(sentences)), dtype=np.int32)

for i, sentence in enumerate(sentences):
    X[i] = [char_to_index[char] for char in sentence]
    y[i] = char_to_index[next_characters[i]]


In [27]:
# Custom Dataset
class ShakespeareDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.long)
        self.y = torch.tensor(y, dtype=torch.long)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# Set a larger batch size
BATCH_SIZE = 256

dataset = ShakespeareDataset(X, y)

# split to train and validation
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)
# dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

#### 2. RNN Model

In [28]:
class ShakespeareModel(pl.LightningModule):
    def __init__(self, n_chars, hidden_size, num_layers, lr, dropout=0.3):
        super().__init__()
        self.save_hyperparameters()
        self.lstm = nn.LSTM(self.hparams.n_chars, self.hparams.hidden_size, self.hparams.num_layers, batch_first=True, dropout=self.hparams.dropout)
        self.fc = nn.Linear(self.hparams.hidden_size, self.hparams.n_chars)

    def forward(self, x):
        x = nn.functional.one_hot(x, num_classes=self.hparams.n_chars).float()
        h0 = torch.zeros(self.hparams.num_layers, x.size(0), self.hparams.hidden_size, device=self.device)
        c0 = torch.zeros(self.hparams.num_layers, x.size(0), self.hparams.hidden_size, device=self.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = nn.CrossEntropyLoss()(y_hat, y)
        self.log('train_loss', loss)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        val_loss = nn.CrossEntropyLoss()(y_hat, y) 
        self.log('val_loss', val_loss)
        return {'val_loss': val_loss}

    def validation_epoch_end(self, outputs):
        if outputs:
            avg_loss = torch.stack([x['val_loss'] for x in outputs]).mean()
            self.log('val_loss', avg_loss)
        else:
            self.log('val_loss', torch.tensor(float('nan')))
            print("Warning: No validation outputs were generated. Check your data.")

    def configure_optimizers(self):
        return torch.optim.Adam(self.parameters(), lr=self.hparams.lr)

    def generate_text(self, seed_text, max_length=100, temperature=1.0):
        self.eval()
        generated_text = seed_text
        input_ids = torch.tensor([char_to_index[c] for c in seed_text], dtype=torch.long).unsqueeze(0).to(self.device)

        with torch.no_grad():
            for _ in range(max_length - len(seed_text)):
                logits = self(input_ids)
                logits = logits[0, :] / temperature  # Apply temperature correctly to logits
                probabilities = F.softmax(logits, dim=-1)  # Convert logits to probabilities
                predicted_char_index = torch.multinomial(probabilities, 1).item()  # Sample from the distribution
                predicted_char = index_to_char[predicted_char_index]
                generated_text += predicted_char
                next_input = torch.tensor([[predicted_char_index]], dtype=torch.long).to(self.device)
                input_ids = torch.cat([input_ids[:, 1:], next_input], dim=1)  # Shift and append

        return generated_text

# Instantiate the model with recommended hyperparameters
n_chars = len(char_to_index)  # Ensure this is set based on your dataset
hidden_size = 128  # Increased hidden size
num_layers = 3  # Increased number of layers
lr = 0.001  # Reduced learning rate
dropout = 0.3  # Added dropout for regularization

model = ShakespeareModel(n_chars, hidden_size, num_layers, lr, dropout)

# Define the ModelCheckpoint callback
checkpoint_callback = ModelCheckpoint(
    monitor='val_loss',
    dirpath='checkpoints/',
    filename='shakespeare-{epoch:02d}-{val_loss:.2f}',
    save_top_k=1,
    mode='min',
    save_weights_only=True,
    verbose=True
)

# Define the EarlyStopping callback
early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=3,
    mode='min',
    verbose=True
)

# Logger
logger = TensorBoardLogger("tb_logs", name="shakespeare_model")

# Define the Trainer with the checkpoint and early stopping callbacks
trainer = Trainer(
    max_epochs=6,
    gpus=1 if torch.cuda.is_available() else 0,
    callbacks=[checkpoint_callback, early_stopping_callback],
    logger=logger
)

# Assuming train_dataloader and val_dataloader are defined
# Train the model
trainer.fit(model, train_dataloader, val_dataloader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name | Type   | Params
--------------------------------
0 | lstm | LSTM   | 350 K 
1 | fc   | Linear | 5.0 K 
--------------------------------
355 K     Trainable params
0         Non-trainable params
355 K     Total params
1.423     Total estimated model params size (MB)


Epoch 0:   7%|▋         | 43/652 [13:35<3:12:31, 18.97s/it, loss=3.08, v_num=14]
Epoch 0: 100%|██████████| 913/913 [06:30<00:00,  2.34it/s, loss=2.35, v_num=16]

Metric val_loss improved. New best score: 2.350


Epoch 0: 100%|██████████| 913/913 [06:30<00:00,  2.34it/s, loss=2.35, v_num=16]

Epoch 0, global step 730: 'val_loss' reached 2.34978 (best 2.34978), saving model to 'checkpoints/shakespeare-epoch=00-val_loss=2.35.ckpt' as top 1


Epoch 1: 100%|██████████| 913/913 [05:29<00:00,  2.77it/s, loss=2.16, v_num=16]

Metric val_loss improved by 0.189 >= min_delta = 0.0. New best score: 2.161


Epoch 1: 100%|██████████| 913/913 [05:29<00:00,  2.77it/s, loss=2.16, v_num=16]

Epoch 1, global step 1460: 'val_loss' reached 2.16053 (best 2.16053), saving model to 'checkpoints/shakespeare-epoch=01-val_loss=2.16.ckpt' as top 1


Epoch 2: 100%|██████████| 913/913 [05:27<00:00,  2.79it/s, loss=2.07, v_num=16]

Metric val_loss improved by 0.136 >= min_delta = 0.0. New best score: 2.024


Epoch 2: 100%|██████████| 913/913 [05:27<00:00,  2.79it/s, loss=2.07, v_num=16]

Epoch 2, global step 2190: 'val_loss' reached 2.02431 (best 2.02431), saving model to 'checkpoints/shakespeare-epoch=02-val_loss=2.02.ckpt' as top 1


Epoch 3: 100%|██████████| 913/913 [05:32<00:00,  2.74it/s, loss=1.98, v_num=16]

Metric val_loss improved by 0.117 >= min_delta = 0.0. New best score: 1.907


Epoch 3: 100%|██████████| 913/913 [05:32<00:00,  2.74it/s, loss=1.98, v_num=16]

Epoch 3, global step 2920: 'val_loss' reached 1.90748 (best 1.90748), saving model to 'checkpoints/shakespeare-epoch=03-val_loss=1.91.ckpt' as top 1


Epoch 4: 100%|██████████| 913/913 [06:46<00:00,  2.24it/s, loss=1.88, v_num=16]

Metric val_loss improved by 0.074 >= min_delta = 0.0. New best score: 1.833


Epoch 4: 100%|██████████| 913/913 [06:46<00:00,  2.24it/s, loss=1.88, v_num=16]

Epoch 4, global step 3650: 'val_loss' reached 1.83333 (best 1.83333), saving model to 'checkpoints/shakespeare-epoch=04-val_loss=1.83.ckpt' as top 1


Epoch 5: 100%|██████████| 913/913 [06:20<00:00,  2.40it/s, loss=1.8, v_num=16] 

Metric val_loss improved by 0.070 >= min_delta = 0.0. New best score: 1.764


Epoch 5: 100%|██████████| 913/913 [06:20<00:00,  2.40it/s, loss=1.8, v_num=16]

Epoch 5, global step 4380: 'val_loss' reached 1.76364 (best 1.76364), saving model to 'checkpoints/shakespeare-epoch=05-val_loss=1.76.ckpt' as top 1
`Trainer.fit` stopped: `max_epochs=6` reached.


Epoch 5: 100%|██████████| 913/913 [06:20<00:00,  2.40it/s, loss=1.8, v_num=16]


In [46]:
# Load the best checkpoint
best_model_path = checkpoint_callback.best_model_path
model = ShakespeareModel.load_from_checkpoint(best_model_path, n_chars=n_chars, hidden_size=hidden_size, num_layers=num_layers, lr=lr, dropout=dropout)

# Generate text
seed_text = "Is deep learning deep enough or not is"  # You can start with any seed text
generated_text = model.generate_text(seed_text.lower(), max_length=200, temperature=0.2)
print(generated_text)


is deep learning deep enough or not is the will the some
the thou the shord the stall the shall the stall the senter:
the mard the have the with the come the bord.

canilice:
the some the will the wit


#### Temperature 0.2

In [37]:
# Instantiate the model with recommended hyperparameters
n_chars = 39  # Ensure this is set based on your dataset
hidden_size = 128  # Increased hidden size
num_layers = 3  # Increased number of layers
lr = 0.001  # Reduced learning rate
dropout = 0.3  # Added dropout for regularization

# Load the best checkpoint
best_model_path = checkpoint_callback.best_model_path
model = ShakespeareModel.load_from_checkpoint(best_model_path, n_chars=n_chars, hidden_size=hidden_size, num_layers=num_layers, lr=lr, dropout=dropout)

# Generate text
seed_text = "Is deep learning deep enough or not is"  # You can start with any seed text
generated_text = model.generate_text(seed_text.lower(), max_length=200, temperature=0.2)
print(generated_text)


is deep learning deep enough or not is the come
the shall the come the shall the shord,
and the sace the will the shall the forth and the best the stord
the dichard the did the shall the shall the sha


In [44]:
# Instantiate the model with recommended hyperparameters
n_chars = 39  # Ensure this is set based on your dataset
hidden_size = 128  # Increased hidden size
num_layers = 3  # Increased number of layers
lr = 0.001  # Reduced learning rate
dropout = 0.3  # Added dropout for regularization

# Load the best checkpoint
best_model_path = checkpoint_callback.best_model_path
model = ShakespeareModel.load_from_checkpoint(best_model_path, n_chars=n_chars, hidden_size=hidden_size, num_layers=num_layers, lr=lr, dropout=dropout)

# Generate text
seed_text = "stability"  # You can start with any seed text
generated_text = model.generate_text(seed_text.lower(), max_length=200, temperature=0.2)
print(generated_text)


stability:
the fares the done the come:
and the will be the world be the consent the death,
the shall the words the shall the shall the done the with the shord the present and the dichard the bectord 


#### Temperature 0.4

In [38]:
# Instantiate the model with recommended hyperparameters
n_chars = 39  # Ensure this is set based on your dataset
hidden_size = 128  # Increased hidden size
num_layers = 3  # Increased number of layers
lr = 0.001  # Reduced learning rate
dropout = 0.3  # Added dropout for regularization

# Load the best checkpoint
best_model_path = checkpoint_callback.best_model_path
model = ShakespeareModel.load_from_checkpoint(best_model_path, n_chars=n_chars, hidden_size=hidden_size, num_layers=num_layers, lr=lr, dropout=dropout)

# Generate text
seed_text = "Is deep learning deep enough or not is"  # You can start with any seed text
generated_text = model.generate_text(seed_text.lower(), max_length=200, temperature=0.4)
print(generated_text)


is deep learning deep enough or not is you
have and my edward the pare the to the king the wird
the dome and the have lath the ward,
and the come the from the court thee with the jole,
and the will be


#### Temperature 0.6

In [39]:
# Instantiate the model with recommended hyperparameters
n_chars = 39  # Ensure this is set based on your dataset
hidden_size = 128  # Increased hidden size
num_layers = 3  # Increased number of layers
lr = 0.001  # Reduced learning rate
dropout = 0.3  # Added dropout for regularization

# Load the best checkpoint
best_model_path = checkpoint_callback.best_model_path
model = ShakespeareModel.load_from_checkpoint(best_model_path, n_chars=n_chars, hidden_size=hidden_size, num_layers=num_layers, lr=lr, dropout=dropout)

# Generate text
seed_text = "Is deep learning deep enough or not is"  # You can start with any seed text
generated_text = model.generate_text(seed_text.lower(), max_length=200, temperature=0.6)
print(generated_text)


is deep learning deep enough or not is the lirg dather
where-to my what the werper thy sheard.
thy court this not the wilt the bunker.

king rothard:
i bust the sharl my than do and compont,
and the l


#### Temperature 0.8

In [40]:
# Instantiate the model with recommended hyperparameters
n_chars = 39  # Ensure this is set based on your dataset
hidden_size = 128  # Increased hidden size
num_layers = 3  # Increased number of layers
lr = 0.001  # Reduced learning rate
dropout = 0.3  # Added dropout for regularization

# Load the best checkpoint
best_model_path = checkpoint_callback.best_model_path
model = ShakespeareModel.load_from_checkpoint(best_model_path, n_chars=n_chars, hidden_size=hidden_size, num_layers=num_layers, lr=lr, dropout=dropout)

# Generate text
seed_text = "Is deep learning deep enough or not is"  # You can start with any seed text
generated_text = model.generate_text(seed_text.lower(), max_length=200, temperature=0.8)
print(generated_text)


is deep learning deep enough or not is his his
thought inseld with my, destace in the beers me so heride
stere; the still to mam blow a dike,
sweee rave the rards thi greve the hop
say thee to the for


#### Temperature 1.0

In [73]:
# Instantiate the model with recommended hyperparameters
n_chars = 39  # Ensure this is set based on your dataset
hidden_size = 128  # Increased hidden size
num_layers = 3  # Increased number of layers
lr = 0.001  # Reduced learning rate
dropout = 0.3  # Added dropout for regularization

# Load the best checkpoint
best_model_path = checkpoint_callback.best_model_path
model = ShakespeareModel.load_from_checkpoint(best_model_path, n_chars=n_chars, hidden_size=hidden_size, num_layers=num_layers, lr=lr, dropout=dropout)

# Generate text
seed_text = "Is deep learning deep enough or not is"  # You can start with any seed text
generated_text = model.generate_text(seed_text.lower(), max_length=1000, temperature=1)
print(generated_text)


is deep learning deep enough or not is it of's,
and bight op you, so live, i not the burth:
you!

delove:
nhor lomcy of ey: is our powers is that dir,
beave usvan your leds, and ut him:
is tere qreen on the gented:
when thy; swould will that their better:
iner unthink sild for desty plices,
apon he dues combloutss his mikalt hen's os witing:
and fool be this the for the picnte, mean my.
-
gry padiced: it reapherer burd, preastery.

lincy ud he one:
a stake carse: whree 'ting noight:
the pursquet for elcked leqvors, but a twriens.
-radsiens nebty amour nand sowled fortay
to wind for the briy burthour is aphers
the mich so digtther tong aalks lewist.

mroriul:
o's on stome shenp sharl, the lady,
my homan, for soutunh some shy whow earth?

gaist:
i hame?
and i'parculidferp that pray.
bewlowl and bretchand the jeging deigts.
hoth!

kheng man
morsed:
the daven all you that no and me bege.
betirienes with to thass saed, the shurg,
my lord, beel of will that i eesons erest,
and will to thy en