# Text generation 

In [32]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
from torch.utils.data import Dataset, DataLoader,TensorDataset

# Data preprocessing 

In [None]:
import os
import string
input_file = "my.txt"
punctuation = string.punctuation.replace('.', '').replace('?', '')

text_data=""
if os.path.exists(input_file) :
    with open(input_file, 'r') as infile:
        for line in infile:
            if line.strip():
                for char in punctuation:
                    line = line.replace(char, '')
                text_data += line

print("Processed Text:")
print(text_data)

In [None]:
import nltk
words = nltk.word_tokenize(text_data.lower())
words

In [None]:
word_to_int = {w: i for i, w in enumerate(set(words))}
int_to_word = {i: w for w, i in word_to_int.items()}
len(word_to_int)


In [36]:
def create_sequences(input_data, seq_length):
    sequences = []
    for i in range(0, len(input_data) - seq_length):
        sequence_in = input_data[i:i + seq_length]
        sequence_out = input_data[i + seq_length]
        sequences.append((sequence_in, sequence_out))
    return sequences

encoded_text = np.array([word_to_int[word] for word in words])
seq_length = 5
sequences = create_sequences(encoded_text, seq_length)

# Models 

RNN

In [38]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

class RNNModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(RNNModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.rnn1 = nn.RNN(embedding_dim, hidden_dim, batch_first=True)
        self.rnn2 = nn.RNN(hidden_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.hidden_dim = hidden_dim

    def forward(self, x, hidden1, hidden2):
        embedded = self.embedding(x)
        out1, hidden1 = self.rnn1(embedded, hidden1)
        out2, hidden2 = self.rnn2(out1, hidden2)
        out = self.fc(out2[:, -1, :])
        return out, hidden1, hidden2

    def init_hidden(self, batch_size, device):
        return (
            torch.zeros(1, batch_size, self.hidden_dim).to(device),
            torch.zeros(1, batch_size, self.hidden_dim).to(device),
        )

In [39]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_size = len(word_to_int)
output_size = len(word_to_int)
embedding_dim = 256
hidden_dim = 512
batch_size = 32
epochs = 100

model = RNNModel(input_size, embedding_dim, hidden_dim, output_size).to(device)

loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
inputs = np.array([item[0] for item in sequences])
targets = np.array([item[1] for item in sequences])

inputs_tensor = torch.tensor(inputs, dtype=torch.long)
targets_tensor = torch.tensor(targets, dtype=torch.long)

dataset = TensorDataset(inputs_tensor, targets_tensor)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

for epoch in range(epochs):
    epoch_loss = 0

    for batch_idx, (inputs, targets) in enumerate(data_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        hidden1, hidden2 = model.init_hidden(inputs.size(0), device)

        optimizer.zero_grad()
        output, hidden1, hidden2 = model(inputs, hidden1, hidden2)
        loss = loss_function(output, targets)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f'Epoch {epoch + 1}, Loss: {epoch_loss / len(data_loader)}')

torch.save(model, 'my_rnn_entire_model.pth')


Epoch 1, Loss: 4.955714821815491
Epoch 2, Loss: 4.740542113780975
Epoch 3, Loss: 4.561698734760284
Epoch 4, Loss: 4.388520300388336
Epoch 5, Loss: 4.210253298282623
Epoch 6, Loss: 4.03250116109848
Epoch 7, Loss: 3.841259628534317
Epoch 8, Loss: 3.654032737016678
Epoch 9, Loss: 3.4553170204162598
Epoch 10, Loss: 3.2537152469158173
Epoch 11, Loss: 3.062666267156601
Epoch 12, Loss: 2.858065515756607
Epoch 13, Loss: 2.6713258028030396
Epoch 14, Loss: 2.4796840250492096
Epoch 15, Loss: 2.2930074632167816
Epoch 16, Loss: 2.112635850906372
Epoch 17, Loss: 1.9403085857629776
Epoch 18, Loss: 1.7764658778905869
Epoch 19, Loss: 1.6279776841402054
Epoch 20, Loss: 1.4831965863704681
Epoch 21, Loss: 1.3467533439397812
Epoch 22, Loss: 1.2309203371405602
Epoch 23, Loss: 1.1150882691144943
Epoch 24, Loss: 1.015949808061123
Epoch 25, Loss: 0.918430432677269
Epoch 26, Loss: 0.8321503773331642
Epoch 27, Loss: 0.7579859718680382
Epoch 28, Loss: 0.6903301775455475
Epoch 29, Loss: 0.6271351650357246
Epoch 30

In [40]:
import torch
import numpy as np

model = torch.load('my_rnn_entire_model.pth', map_location=torch.device('cpu'))
model.eval()

def words_to_tensor(words, word_to_int):
    indices = [word_to_int[word] for word in words]
    tensor = torch.tensor(np.array([indices]), dtype=torch.long)
    return tensor

def generate_sequence(start_words, model, word_to_int, int_to_word, seq_length=5, length=2):
    start_words= start_words.replace('\n', ' ')
    words = nltk.word_tokenize(start_words.lower())  
    input_tensor = words_to_tensor(words[-seq_length:], word_to_int)

    hidden1, hidden2 = model.init_hidden(batch_size=1, device=torch.device('cpu'))

    for _ in range(length):
        with torch.no_grad():
            output, hidden1, hidden2 = model(input_tensor, hidden1, hidden2)
        predicted_index = torch.argmax(output, dim=1).item()
        predicted_word = int_to_word[predicted_index]  
        words.append(predicted_word)
        input_tensor = words_to_tensor(words[-seq_length:], word_to_int)

    return ' '.join(words)

start_words = (
    "Alice was beginning to get very tired of "
)

predicted_sequence = generate_sequence(start_words, model, word_to_int, int_to_word)
print(f"Generated sequence: {predicted_sequence}")


Generated sequence: alice was beginning to get very tired of sitting by


GRU

In [41]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

class GRUModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(GRUModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.gru1 = nn.GRU(embedding_dim, hidden_dim, batch_first=True)
        self.gru2 = nn.GRU(hidden_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.hidden_dim = hidden_dim

    def forward(self, x, hidden1, hidden2):
        embedded = self.embedding(x)
        out1, hidden1 = self.gru1(embedded, hidden1)
        out2, hidden2 = self.gru2(out1, hidden2)
        out = self.fc(out2[:, -1, :])
        return out, hidden1, hidden2

    def init_hidden(self, batch_size, device):
        return (
            torch.zeros(1, batch_size, self.hidden_dim).to(device),
            torch.zeros(1, batch_size, self.hidden_dim).to(device),
        )

In [42]:


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

input_size = len(word_to_int)
output_size = len(word_to_int)
embedding_dim = 256
hidden_dim = 512
batch_size = 32
epochs = 100

model = GRUModel(input_size, embedding_dim, hidden_dim, output_size).to(device)

loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)

inputs = np.array([item[0] for item in sequences])
targets = np.array([item[1] for item in sequences])

inputs_tensor = torch.tensor(inputs, dtype=torch.long)
targets_tensor = torch.tensor(targets, dtype=torch.long)

dataset = TensorDataset(inputs_tensor, targets_tensor)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

for epoch in range(epochs):
    epoch_loss = 0

    for batch_idx, (inputs, targets) in enumerate(data_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        hidden1, hidden2 = model.init_hidden(inputs.size(0), device)

        optimizer.zero_grad()
        output, hidden1, hidden2 = model(inputs, hidden1, hidden2)
        loss = loss_function(output, targets)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f'Epoch {epoch + 1}, Loss: {epoch_loss / len(data_loader)}')
torch.save(model, 'my_gru_entire_model.pth')


Epoch 1, Loss: 4.945730566978455
Epoch 2, Loss: 4.872101783752441
Epoch 3, Loss: 4.811259031295776
Epoch 4, Loss: 4.75011020898819
Epoch 5, Loss: 4.684012532234192
Epoch 6, Loss: 4.611028492450714
Epoch 7, Loss: 4.52817839384079
Epoch 8, Loss: 4.432860374450684
Epoch 9, Loss: 4.312912046909332
Epoch 10, Loss: 4.1786646246910095
Epoch 11, Loss: 4.033781945705414
Epoch 12, Loss: 3.8974643647670746
Epoch 13, Loss: 3.762934535741806
Epoch 14, Loss: 3.6094580590724945
Epoch 15, Loss: 3.450372636318207
Epoch 16, Loss: 3.284603625535965
Epoch 17, Loss: 3.1213663816452026
Epoch 18, Loss: 2.9479028284549713
Epoch 19, Loss: 2.782639741897583
Epoch 20, Loss: 2.6215376555919647
Epoch 21, Loss: 2.4575088024139404
Epoch 22, Loss: 2.2981907725334167
Epoch 23, Loss: 2.1519567370414734
Epoch 24, Loss: 1.9998178631067276
Epoch 25, Loss: 1.8665757775306702
Epoch 26, Loss: 1.729377418756485
Epoch 27, Loss: 1.6064624786376953
Epoch 28, Loss: 1.487868919968605
Epoch 29, Loss: 1.376137763261795
Epoch 30, Los

In [43]:
import torch
import numpy as np

model = torch.load('my_gru_entire_model.pth', map_location=torch.device('cpu'))
model.eval()

def words_to_tensor(words, word_to_int):
    indices = [word_to_int[word] for word in words]
    tensor = torch.tensor(np.array([indices]), dtype=torch.long)
    return tensor

def generate_sequence(start_words, model, word_to_int, int_to_word, seq_length=5, length=2):
    start_words= start_words.replace('\n', ' ')
    words = nltk.word_tokenize(start_words.lower())  
    input_tensor = words_to_tensor(words[-seq_length:], word_to_int)

    hidden1, hidden2 = model.init_hidden(batch_size=1, device=torch.device('cpu'))

    for _ in range(length):
        with torch.no_grad():
            output, hidden1, hidden2 = model(input_tensor, hidden1, hidden2)
        predicted_index = torch.argmax(output, dim=1).item()
        predicted_word = int_to_word[predicted_index]  
        words.append(predicted_word)
        input_tensor = words_to_tensor(words[-seq_length:], word_to_int)

    return ' '.join(words)

start_words = (
    "There was nothing so VERY remarkable"
)

predicted_sequence = generate_sequence(start_words, model, word_to_int, int_to_word)
print(f"Generated sequence: {predicted_sequence}")


Generated sequence: there was nothing so very remarkable in that


LSTM

In [45]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

class LSTMModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim):
        super(LSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm1 = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.lstm2 = nn.LSTM(hidden_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.hidden_dim = hidden_dim

    def forward(self, x, hidden1, hidden2):
        embedded = self.embedding(x)
        out1, hidden1 = self.lstm1(embedded, hidden1)
        out2, hidden2 = self.lstm2(out1, hidden2)
        out = self.fc(out2[:, -1, :])
        return out, hidden1, hidden2

    def init_hidden(self, batch_size, device):
        return (
            (torch.zeros(1, batch_size, self.hidden_dim).to(device),  
             torch.zeros(1, batch_size, self.hidden_dim).to(device)),  
            (torch.zeros(1, batch_size, self.hidden_dim).to(device),  
             torch.zeros(1, batch_size, self.hidden_dim).to(device))   
        )


In [46]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

input_size = len(word_to_int)
output_size = len(word_to_int)
embedding_dim = 256
hidden_dim = 512
batch_size = 32
epochs = 100

model = LSTMModel(input_size, embedding_dim, hidden_dim, output_size).to(device)

loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
inputs = np.array([item[0] for item in sequences])
targets = np.array([item[1] for item in sequences])

inputs_tensor = torch.tensor(inputs, dtype=torch.long)
targets_tensor = torch.tensor(targets, dtype=torch.long)

dataset = TensorDataset(inputs_tensor, targets_tensor)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

for epoch in range(epochs):
    epoch_loss = 0

    for batch_idx, (inputs, targets) in enumerate(data_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        hidden1, hidden2 = model.init_hidden(inputs.size(0), device)

        optimizer.zero_grad()
        output, hidden1, hidden2 = model(inputs, hidden1, hidden2)

        loss = loss_function(output, targets)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f'Epoch {epoch + 1}, Loss: {epoch_loss / len(data_loader)}')
torch.save(model, 'my_lstm_entire_model.pth')

Epoch 1, Loss: 4.942196428775787
Epoch 2, Loss: 4.920147776603699
Epoch 3, Loss: 4.9016329646110535
Epoch 4, Loss: 4.881509244441986
Epoch 5, Loss: 4.859233915805817
Epoch 6, Loss: 4.8325188755989075
Epoch 7, Loss: 4.796272099018097
Epoch 8, Loss: 4.742726266384125
Epoch 9, Loss: 4.658486843109131
Epoch 10, Loss: 4.501550137996674
Epoch 11, Loss: 4.3161925077438354
Epoch 12, Loss: 4.178343266248703
Epoch 13, Loss: 3.9984431862831116
Epoch 14, Loss: 3.7966540157794952
Epoch 15, Loss: 3.5781111419200897
Epoch 16, Loss: 3.3328101336956024
Epoch 17, Loss: 3.0670833587646484
Epoch 18, Loss: 2.809463530778885
Epoch 19, Loss: 2.5588470697402954
Epoch 20, Loss: 2.3401624858379364
Epoch 21, Loss: 2.1110410690307617
Epoch 22, Loss: 1.925069972872734
Epoch 23, Loss: 1.7396045923233032
Epoch 24, Loss: 1.5761377811431885
Epoch 25, Loss: 1.4336453676223755
Epoch 26, Loss: 1.295258805155754
Epoch 27, Loss: 1.182697519659996
Epoch 28, Loss: 1.0772036463022232
Epoch 29, Loss: 0.9848990812897682
Epoch 3

In [49]:
import torch
import numpy as np

model = torch.load('my_lstm_entire_model.pth', map_location=torch.device('cpu'))
model.eval()

def words_to_tensor(words, word_to_int):
    indices = [word_to_int[word] for word in words]
    tensor = torch.tensor(np.array([indices]), dtype=torch.long)
    return tensor

def generate_sequence(start_words, model, word_to_int, int_to_word, seq_length=5, length=1):
    start_words= start_words.replace('\n', ' ')
    words = nltk.word_tokenize(start_words.lower())  
    input_tensor = words_to_tensor(words[-seq_length:], word_to_int)

    hidden1, hidden2 = model.init_hidden(batch_size=1, device=torch.device('cpu'))

    for _ in range(length):
        with torch.no_grad():
            output, hidden1, hidden2 = model(input_tensor, hidden1, hidden2)
        predicted_index = torch.argmax(output, dim=1).item()
        predicted_word = int_to_word[predicted_index]  
        words.append(predicted_word)
        input_tensor = words_to_tensor(words[-seq_length:], word_to_int)

    return ' '.join(words)

start_words = (
    "Alice was beginning to get very tired of"
)

predicted_sequence = generate_sequence(start_words, model, word_to_int, int_to_word)
print(f"Generated sequence: {predicted_sequence}")


Generated sequence: alice was beginning to get very tired of sitting


Transformer Model 

In [50]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

class SimpleTransformerModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_heads, seq_length):
        super(SimpleTransformerModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.positional_encoding = nn.Parameter(torch.zeros(1, seq_length, embedding_dim))
        self.attention = nn.MultiheadAttention(embedding_dim, num_heads, batch_first=True)
        self.fc = nn.Linear(embedding_dim, vocab_size)

    def forward(self, x):
        x = self.embedding(x) + self.positional_encoding[:, :x.size(1), :]
        attn_output, _ = self.attention(x, x, x)
        logits = self.fc(attn_output[:, -1, :])  
        return logits

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


vocab_size = len(word_to_int)  
embedding_dim = 256
hidden_dim = 512
num_heads = 4
seq_length = 5
batch_size = 32
epochs = 100
learning_rate = 0.0001

model = SimpleTransformerModel(vocab_size, embedding_dim, hidden_dim, num_heads, seq_length).to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
inputs = np.array([item[0] for item in sequences])
targets = np.array([item[1] for item in sequences])

inputs_tensor = torch.tensor(inputs, dtype=torch.long)
targets_tensor = torch.tensor(targets, dtype=torch.long)

dataset = TensorDataset(inputs_tensor, targets_tensor)
data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
for epoch in range(epochs):
    model.train()
    epoch_loss = 0

    for batch_idx, (inputs, targets) in enumerate(data_loader):
        inputs, targets = inputs.to(device), targets.to(device)
        optimizer.zero_grad()
        output = model(inputs)
        loss = loss_function(output, targets)
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    print(f'Epoch {epoch + 1}, Loss: {epoch_loss / len(data_loader)}')

# Save the entire model
torch.save(model, 'my_transformer_entire_model.pth')


Epoch 1, Loss: 4.955663621425629
Epoch 2, Loss: 4.899330914020538
Epoch 3, Loss: 4.850935995578766
Epoch 4, Loss: 4.805143237113953
Epoch 5, Loss: 4.757228434085846
Epoch 6, Loss: 4.7090007066726685
Epoch 7, Loss: 4.657526910305023
Epoch 8, Loss: 4.604125559329987
Epoch 9, Loss: 4.545733392238617
Epoch 10, Loss: 4.487375795841217
Epoch 11, Loss: 4.420965135097504
Epoch 12, Loss: 4.347955584526062
Epoch 13, Loss: 4.2679367661476135
Epoch 14, Loss: 4.1809640526771545
Epoch 15, Loss: 4.084452688694
Epoch 16, Loss: 3.9825932383537292
Epoch 17, Loss: 3.8717805445194244
Epoch 18, Loss: 3.7453378438949585
Epoch 19, Loss: 3.6159085035324097
Epoch 20, Loss: 3.482541650533676
Epoch 21, Loss: 3.3342038691043854
Epoch 22, Loss: 3.1909786760807037
Epoch 23, Loss: 3.043420672416687
Epoch 24, Loss: 2.8992905616760254
Epoch 25, Loss: 2.744039237499237
Epoch 26, Loss: 2.593690037727356
Epoch 27, Loss: 2.4527108669281006
Epoch 28, Loss: 2.303657203912735
Epoch 29, Loss: 2.162389785051346
Epoch 30, Loss:

In [56]:
import torch
import numpy as np

model = torch.load('my_transformer_entire_model.pth', map_location=torch.device('cpu'))
model.eval()

def words_to_tensor(words, word_to_int):
    indices = [word_to_int[word] for word in words]
    tensor = torch.tensor(np.array([indices]), dtype=torch.long)
    return tensor

def generate_sequence(start_words, model, word_to_int, int_to_word, seq_length=5, length=2):
    start_words= start_words.replace('\n', ' ')
    words = nltk.word_tokenize(start_words.lower())  
    input_tensor = words_to_tensor(words[-seq_length:], word_to_int)
 
    for _ in range(length):
        with torch.no_grad():
            output= model(input_tensor)
        predicted_index = torch.argmax(output, dim=1).item()
        predicted_word = int_to_word[predicted_index]  
        words.append(predicted_word)
        input_tensor = words_to_tensor(words[-seq_length:], word_to_int)

    return ' '.join(words)

start_words = (
    "Alice was beginning to get very tired of"
)

predicted_sequence = generate_sequence(start_words, model, word_to_int, int_to_word)
print(f"Generated sequence: {predicted_sequence}")


Generated sequence: alice was beginning to get very tired of sitting by
