In [2]:
!pip install -q gdown


import gdown
import os

url = "https://drive.google.com/uc?id=1ko4cu8nWhtoQ3OoTqkJdesoq3GtiToev"
file_name = "poems-100.csv"

if not os.path.exists(file_name):
    gdown.download(url, file_name, quiet=False)

# Imports
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np

with open(file_name, "r", encoding="utf-8") as f:
    txt = f.read().lower()

words = txt.split()                     # Tokenize text
vocab = sorted(set(words))              # Unique words
stoi = {w:i for i,w in enumerate(vocab)}# Word → index
itos = {i:w for w,i in stoi.items()}    # Index → word
V = len(vocab)                          # Vocabulary size


Downloading...
From: https://drive.google.com/uc?id=1ko4cu8nWhtoQ3OoTqkJdesoq3GtiToev
To: /content/poems-100.csv
100%|██████████| 140k/140k [00:00<00:00, 38.4MB/s]


In [3]:
# Prepare sliding window sequences
win = 5
X_idx, y_idx = [], []

for i in range(len(words) - win):
    X_idx.append([stoi[w] for w in words[i:i+win]])
    y_idx.append(stoi[words[i+win]])

X_idx = torch.tensor(X_idx)
y_idx = torch.tensor(y_idx)


In [4]:
# Convert indices to one-hot vectors
N, T = X_idx.shape
X_hot = torch.zeros(N, T, V)

for i in range(N):
    for j in range(T):
        X_hot[i, j, X_idx[i, j]] = 1.0

hot_ds = TensorDataset(X_hot, y_idx)    # Dataset
hot_dl = DataLoader(hot_ds, batch_size=64, shuffle=True)


In [5]:
# Simple RNN using one-hot vectors
class RNNHot(nn.Module):
    def __init__(self, v, h):
        super().__init__()
        self.r = nn.RNN(v, h, batch_first=True)  # RNN layer
        self.o = nn.Linear(h, v)                 # Output layer

    def forward(self, x):
        y, _ = self.r(x)
        return self.o(y[:, -1])                  # Last timestep


In [6]:
# Train one-hot RNN
net = RNNHot(V, 64)
loss_fn = nn.CrossEntropyLoss()
opt = optim.Adam(net.parameters(), lr=0.001)

for e in range(20):
    s = 0
    for xb, yb in hot_dl:
        p = net(xb)
        l = loss_fn(p, yb)
        opt.zero_grad()
        l.backward()
        opt.step()
        s += l.item()
    print(f"Epoch {e+1}, Loss: {s/len(hot_dl):.4f}")


Epoch 1, Loss: 7.4008
Epoch 2, Loss: 6.7847
Epoch 3, Loss: 6.4992
Epoch 4, Loss: 6.2011
Epoch 5, Loss: 5.9532
Epoch 6, Loss: 5.7124
Epoch 7, Loss: 5.4609
Epoch 8, Loss: 5.1973
Epoch 9, Loss: 4.9165
Epoch 10, Loss: 4.6397
Epoch 11, Loss: 4.3700
Epoch 12, Loss: 4.1097
Epoch 13, Loss: 3.8600
Epoch 14, Loss: 3.6142
Epoch 15, Loss: 3.3816
Epoch 16, Loss: 3.1562
Epoch 17, Loss: 2.9410
Epoch 18, Loss: 2.7313
Epoch 19, Loss: 2.5361
Epoch 20, Loss: 2.3414


In [7]:
# Text generation for one-hot models
def gen_hot(m, seed, n):
    m.eval()
    out = list(seed)

    for _ in range(n):
        cur = out[-T:]
        ids = [stoi[w] for w in cur]
        x = torch.zeros(1, len(ids), V)
        for i, k in enumerate(ids):
            x[0, i, k] = 1

        with torch.no_grad():
            p = m(x)

        nxt = torch.argmax(p).item()
        out.append(itos[nxt])
    return " ".join(out)

print(gen_hot(net, ["the","sun","shines","bright","upon"], 50))


the sun shines bright upon warmer than the song of hate on a lovelier, or it is know like of dictionary, and often like you swings they dimmed, with the tune that fair lips with his lovers different earth and his of the centre, of his a new comer, by he at and strange, in


In [8]:
# LSTM version using one-hot vectors
class LSTMHot(nn.Module):
    def __init__(self, v, h):
        super().__init__()
        self.l = nn.LSTM(v, h, batch_first=True)  # LSTM layer
        self.o = nn.Linear(h, v)

    def forward(self, x):
        y, _ = self.l(x)
        return self.o(y[:, -1])


In [9]:
# Train one-hot LSTM
net = LSTMHot(V, 64)
opt = optim.Adam(net.parameters(), lr=0.001)

for e in range(20):
    s = 0
    for xb, yb in hot_dl:
        p = net(xb)
        l = loss_fn(p, yb)
        opt.zero_grad()
        l.backward()
        opt.step()
        s += l.item()
    print(f"Epoch {e+1}, Loss: {s/len(hot_dl):.4f}")

print(gen_hot(net, ["the","sun","shines","bright","upon"], 50))


Epoch 1, Loss: 7.4592
Epoch 2, Loss: 6.8660
Epoch 3, Loss: 6.7024
Epoch 4, Loss: 6.5127
Epoch 5, Loss: 6.2933
Epoch 6, Loss: 6.0509
Epoch 7, Loss: 5.7852
Epoch 8, Loss: 5.5068
Epoch 9, Loss: 5.2276
Epoch 10, Loss: 4.9481
Epoch 11, Loss: 4.6729
Epoch 12, Loss: 4.4015
Epoch 13, Loss: 4.1324
Epoch 14, Loss: 3.8695
Epoch 15, Loss: 3.6120
Epoch 16, Loss: 3.3611
Epoch 17, Loss: 3.1167
Epoch 18, Loss: 2.8738
Epoch 19, Loss: 2.6433
Epoch 20, Loss: 2.4156
the sun shines bright upon more than the young little i am i know i am the spirit of one and all all the best of my and looking and i so that of the wild is like in these the dead of all and ever the wild of long sweet these an an or


In [10]:
emb_ds = TensorDataset(X_idx, y_idx)
emb_dl = DataLoader(emb_ds, batch_size=64, shuffle=True)


In [11]:
# RNN with word embeddings
class RNNEmb(nn.Module):
    def __init__(self, v, e, h):
        super().__init__()
        self.e = nn.Embedding(v, e)     # Embedding layer
        self.r = nn.RNN(e, h, batch_first=True)
        self.o = nn.Linear(h, v)

    def forward(self, x):
        x = self.e(x)
        y, _ = self.r(x)
        return self.o(y[:, -1])


In [12]:
# Train embedding-based RNN
net = RNNEmb(V, 100, 64)
opt = optim.Adam(net.parameters(), lr=0.001)

for e in range(20):
    s = 0
    for xb, yb in emb_dl:
        p = net(xb)
        l = loss_fn(p, yb)
        opt.zero_grad()
        l.backward()
        opt.step()
        s += l.item()
    print(f"Epoch {e+1}, Loss: {s/len(emb_dl):.4f}")


Epoch 1, Loss: 7.4817
Epoch 2, Loss: 6.5976
Epoch 3, Loss: 6.2512
Epoch 4, Loss: 5.9349
Epoch 5, Loss: 5.6291
Epoch 6, Loss: 5.3326
Epoch 7, Loss: 5.0444
Epoch 8, Loss: 4.7630
Epoch 9, Loss: 4.4880
Epoch 10, Loss: 4.2212
Epoch 11, Loss: 3.9675
Epoch 12, Loss: 3.7225
Epoch 13, Loss: 3.4913
Epoch 14, Loss: 3.2776
Epoch 15, Loss: 3.0768
Epoch 16, Loss: 2.8901
Epoch 17, Loss: 2.7171
Epoch 18, Loss: 2.5502
Epoch 19, Loss: 2.3956
Epoch 20, Loss: 2.2476


In [13]:
# Text generation for embedding models
def gen_emb(m, seed, n):
    m.eval()
    out = list(seed)

    for _ in range(n):
        cur = out[-T:]
        x = torch.tensor([stoi[w] for w in cur]).unsqueeze(0)

        with torch.no_grad():
            p = m(x)

        nxt = torch.argmax(p).item()
        out.append(itos[nxt])
    return " ".join(out)

print(gen_emb(net, ["the","sun","shines","bright","upon"], 50))


the sun shines bright upon his own dashings—yet—the dead are there: and she " i am not i see and i see the day with stealthy tread, leaving me baskets cover'd with their shimmering sound; and frogs in the sea! and i am the poet of the young men glisten'd with him who died and


In [14]:
# LSTM with embeddings
class LSTMEmb(nn.Module):
    def __init__(self, v, e, h):
        super().__init__()
        self.e = nn.Embedding(v, e)
        self.l = nn.LSTM(e, h, batch_first=True)
        self.o = nn.Linear(h, v)

    def forward(self, x):
        x = self.e(x)
        y, _ = self.l(x)
        return self.o(y[:, -1])


In [15]:
# Train embedding-based LSTM
net = LSTMEmb(V, 100, 64)
opt = optim.Adam(net.parameters(), lr=0.001)

for e in range(20):
    s = 0
    for xb, yb in emb_dl:
        p = net(xb)
        l = loss_fn(p, yb)
        opt.zero_grad()
        l.backward()
        opt.step()
        s += l.item()
    print(f"Epoch {e+1}, Loss: {s/len(emb_dl):.4f}")

print(gen_emb(net, ["the","sun","shines","bright","upon"], 50))


Epoch 1, Loss: 7.4668
Epoch 2, Loss: 6.7015
Epoch 3, Loss: 6.4220
Epoch 4, Loss: 6.1223
Epoch 5, Loss: 5.8242
Epoch 6, Loss: 5.5295
Epoch 7, Loss: 5.2410
Epoch 8, Loss: 4.9570
Epoch 9, Loss: 4.6777
Epoch 10, Loss: 4.4010
Epoch 11, Loss: 4.1299
Epoch 12, Loss: 3.8663
Epoch 13, Loss: 3.6106
Epoch 14, Loss: 3.3639
Epoch 15, Loss: 3.1286
Epoch 16, Loss: 2.9043
Epoch 17, Loss: 2.6935
Epoch 18, Loss: 2.4949
Epoch 19, Loss: 2.3105
Epoch 20, Loss: 2.1373
the sun shines bright upon me, and all the argument of the same or the moon where the squatter strikes of the tale of the murder of the beautiful annabel lee; and the same waits in the field who would not the young husband sleeps by his wife; and the narrowest hinge in my hand
