In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import numpy as np
import pandas as pd
from tqdm import tqdm

import re
import time

In [3]:
torch.cuda.is_available()

False

In [120]:
#Dataloader
class MyDataset(Dataset):
    def __init__(self,input,seq_len):
        """
        input: full corpus
        seq_len: number of features
        """
        self.input = input 
        self.seq_len = seq_len
    
    def __getitem__(self, item):
        """
        return one item in the dataset
        """
        return torch.tensor(self.input[item:item+self.seq_len]), torch.tensor(self.input[item+1:item+1+self.seq_len])
    
    def __len__(self):
        """
        return the length of the dataset
        """
        return len(self.input) - self.seq_len

In [186]:
#Model
class MyRNN(nn.Module):
    def __init__(self, dv, dh, num_layers):
        super(MyRNN,self).__init__()
        self.d_in = dv #num vocab
        self.d_h = dh #hidden state
        self.layer = num_layers
        self.rnn = nn.RNN(input_size = dv,hidden_size = dh,num_layers = num_layers,batch_first = True, bias = True )
        self.fc =nn.Linear(dh,dv)
        if torch.cuda.is_available():
          self.device = torch.device('cuda')
        else:
          self.device = torch.device('cpu')
    
    def forward(self, batch_size, x):
        h0 = torch.zeros(1,batch_size,self.d_h).float().to(self.device)
        result, ht = self.rnn(x,h0)
        result = self.fc(result)
        return result

In [80]:
#get corpus
txt = open('shakespeare.txt','r')
corpus = txt.read()
corpus = re.sub(" +",' ',corpus).strip()
txt.close()

In [81]:
corpus_list = [i.strip()+"\n" for i in re.split('[0-9]',corpus) if len(i)>1]
corpus = re.sub('[0-9]','',corpus)
corpus_list[0]

"From fairest creatures we desire increase,\nThat thereby beauty's rose might never die,\nBut as the riper should by time decease,\nHis tender heir might bear his memory:\nBut thou contracted to thine own bright eyes,\nFeed'st thy light's flame with self-substantial fuel,\nMaking a famine where abundance lies,\nThy self thy foe, to thy sweet self too cruel:\nThou that art now the world's fresh ornament,\nAnd only herald to the gaudy spring,\nWithin thine own bud buriest thy content,\nAnd tender churl mak'st waste in niggarding:\n Pity the world, or else this glutton be,\n To eat the world's due, by the grave and thee.\n"

In [82]:
#build dictionary and word embedding(one hot)
letters = list(corpus.strip())
bow = set(letters)
num_vocab = len(bow)
dictionary={word:num for (num,word) in enumerate(bow)}
word_vectors = []
for 诗 in corpus_list:
    corpus_vectors = []
    for j in 诗:
        v = np.zeros(num_vocab)
        v[dictionary[j]] = 1
        corpus_vectors.append(v)
    word_vectors.append(corpus_vectors)

In [187]:
#training
epochs = 300
d_hidden = 1024
if torch.cuda.is_available():
  device = torch.device('cuda')
else:
  device = torch.device('cpu')
model = MyRNN(num_vocab,d_hidden,1)
model.to(device)
loss_Func = nn.CrossEntropyLoss()
opt = torch.optim.SGD(model.parameters(),lr = 0.005)
pre_loss = float('inf')
for epoch in range(epochs):
    print(f"====================== Epoch:{epoch+1}/{epochs} ======================")
    epoch_loss = 0.0
    # num_ = 0
    for 诗 in word_vectors:
        # num_+=1
        # print(f'诗：{num_}')
        ds = MyDataset(诗,10)
        dl = DataLoader(ds, batch_size= 128)
        total_train_loss = torch.tensor(0.0)
        total_train_loss = total_train_loss.to(device)
        for label, items in enumerate(tqdm(dl)):
            batch = items[0]
            input = batch.float()
            batch_size = batch.shape[0]
            output = items[1]
            input = input.to(device)
            output = output.to(device)
            pred = model(batch_size,input)
            loss = loss_Func(pred, output)
            total_train_loss += loss
        opt.zero_grad()
        total_train_loss.backward()
        opt.step()
        epoch_loss += total_train_loss
    print(f'Epoch: {epoch+1}, epoch loss: {epoch_loss}')
    if (epoch+1)%100 == 0:
        time.sleep(300)




100%|██████████| 5/5 [00:00<00:00, 10.59it/s]
100%|██████████| 5/5 [00:00<00:00, 10.59it/s]
100%|██████████| 5/5 [00:00<00:00, 10.51it/s]
100%|██████████| 5/5 [00:00<00:00, 11.23it/s]
100%|██████████| 5/5 [00:00<00:00, 11.11it/s]
100%|██████████| 5/5 [00:00<00:00, 10.62it/s]


KeyboardInterrupt: 

In [11]:
#optional, for extra training
epochs = 20
for epoch in range(epochs):
    print(f"====================== Epoch:{epoch+1}/{epochs} ======================")
    epoch_loss = 0.0
    # num_ = 0
    for 诗 in word_vectors:
        # num_+=1
        # print(f'诗：{num_}')
        ds = MyDataset(诗,10)
        dl = DataLoader(ds, batch_size= 258)
        total_train_loss = torch.tensor(0.0)
        total_train_loss = total_train_loss.to(device)
        for label, items in enumerate(tqdm(dl)):
            batch = items[0]
            input = batch.float()
            output = items[1]
            input = input.to(device)
            output = output.to(device)
            pred = model(input)
            loss = loss_Func(pred, output)
            total_train_loss += loss
        opt.zero_grad()
        total_train_loss.backward()
        opt.step()
        epoch_loss += total_train_loss
    print(f'Epoch: {epoch+1}, epoch loss: {epoch_loss}')
    time.sleep(10)



100%|██████████| 3/3 [00:00<00:00, 45.31it/s]
100%|██████████| 3/3 [00:00<00:00, 205.94it/s]
100%|██████████| 3/3 [00:00<00:00, 143.32it/s]
100%|██████████| 3/3 [00:00<00:00, 209.02it/s]
100%|██████████| 3/3 [00:00<00:00, 170.51it/s]
100%|██████████| 3/3 [00:00<00:00, 204.04it/s]
100%|██████████| 3/3 [00:00<00:00, 187.15it/s]
100%|██████████| 3/3 [00:00<00:00, 172.10it/s]
100%|██████████| 3/3 [00:00<00:00, 192.26it/s]
100%|██████████| 3/3 [00:00<00:00, 198.91it/s]
100%|██████████| 3/3 [00:00<00:00, 174.79it/s]
100%|██████████| 3/3 [00:00<00:00, 196.37it/s]
100%|██████████| 3/3 [00:00<00:00, 175.98it/s]
100%|██████████| 3/3 [00:00<00:00, 220.73it/s]
100%|██████████| 3/3 [00:00<00:00, 167.35it/s]
100%|██████████| 3/3 [00:00<00:00, 206.69it/s]
100%|██████████| 3/3 [00:00<00:00, 198.58it/s]
100%|██████████| 3/3 [00:00<00:00, 194.94it/s]
100%|██████████| 3/3 [00:00<00:00, 215.56it/s]
100%|██████████| 3/3 [00:00<00:00, 171.90it/s]
100%|██████████| 3/3 [00:00<00:00, 189.61it/s]
100%|█████████

KeyboardInterrupt: 

In [12]:
#get a dictionary for generation
result_dict = {v:k for k,v in dictionary.items()}

In [29]:
#text generating
start = word_vectors[0][0]#[np.random.choice(len(word_vectors))]
func = nn.Softmax(dim=1)
word = ''
count = 0
while count<14:
    out = func(model(torch.tensor([start]).float().to(device)))
    out = out.cpu().detach().numpy()
    num = np.random.choice(np.arange(len(out[0])),p = out[0])
    pred = result_dict[num]
    if pred == "\n":
      count+=1
    word = word+pred
    start = np.zeros(num_vocab)
    start[dictionary[pred]] = 1


In [30]:
print(word)

ongreme thinede,
Whe.
I cass berereresty withes atheesthive, me,
Tharingast meforofofe,
Tonowat whe by med,
Thadongomymely prowor as me prathas I t sthanowofas merelarofas beded os isssthed benesaly beres med.
Whe bed theere,
Astherinalourenousthis thin ast sthave I meathindded I me wins moulinorus mave The,
As fo hinofontofofs mes me fathered berelout becoucadesserinountovinorous thistersthed my t dinscorakemmeallat mmellofofeeresst me,
Mivanouthenerowigs,
That cedatorest onous gro s cathed ales astheat ined mes ad grestherenombes med I be,
Andowofisthake,
Mof festhathymy wigresthasthasthes bed thin med,
Thererthader mad athys's t st stronones ssthalofos medofat sse pre bes my t as as bed ds anofondin grasthathesthan thesthe'sedst,



In [31]:
#save model
torch.save(model.state_dict(), 'RNN_word_generation_model.pth')

In [33]:
#load model
d_hidden = 1024
params = torch.load("RNN_word_generation_model.pth",map_location=torch.device('cpu'))
model = MyRNN(num_vocab,d_hidden,1)
model.load_state_dict(params)

<All keys matched successfully>

In [34]:
model.eval()

MyRNN(
  (rnn): RNN(61, 1024, batch_first=True)
  (fc): Linear(in_features=1024, out_features=61, bias=True)
)

In [164]:
test_corpus = word_vectors[0]


In [182]:
ds = MyDataset(test_corpus,10)
dl =DataLoader(ds, batch_size= 11)

In [183]:
for label, items in enumerate(tqdm(dl)):
    a= items
    break

  0%|          | 0/55 [00:00<?, ?it/s]


In [184]:
test_input = a[0]
test_output = a[1]
h0 = torch.zeros(1,128,1024)

In [185]:
test_input.shape[0]

11

In [168]:
model = nn.RNN(61,1024,1,batch_first = True)
model.eval()

RNN(61, 1024, batch_first=True)

In [169]:
test_input

tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]],

        ...,

        [[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0., 

In [177]:
test_pred,test_hidden = model(test_input.float(),h0)

In [178]:
h0

tensor([[[0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         ...,
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.],
         [0., 0., 0.,  ..., 0., 0., 0.]]])

In [151]:
test_pred.shape

torch.Size([128, 10, 1024])

In [152]:
first_layer = nn.Linear(1024,61)

In [153]:
test_pred = first_layer(test_pred)
test_pred.shape

torch.Size([128, 10, 61])

In [154]:
loss_f = nn.CrossEntropyLoss()

In [155]:
loss_f(test_pred, test_output)

tensor(0.3774, dtype=torch.float64, grad_fn=<DivBackward1>)

In [157]:
test_pred,test_output

(tensor([[[ 0.0700,  0.0105,  0.0082,  ..., -0.0171,  0.0064, -0.0104],
          [ 0.0188,  0.0123,  0.0245,  ..., -0.0013,  0.0298,  0.0029],
          [ 0.0475,  0.0171, -0.0154,  ..., -0.0210,  0.0408,  0.0239],
          ...,
          [ 0.0053,  0.0143,  0.0013,  ..., -0.0002,  0.0071,  0.0186],
          [ 0.0333,  0.0299,  0.0022,  ..., -0.0111,  0.0458,  0.0064],
          [ 0.0597,  0.0354, -0.0003,  ..., -0.0095,  0.0323,  0.0249]],
 
         [[ 0.0353,  0.0153,  0.0278,  ..., -0.0006,  0.0239, -0.0012],
          [ 0.0481,  0.0245, -0.0049,  ..., -0.0159,  0.0385,  0.0187],
          [ 0.0317,  0.0097,  0.0027,  ..., -0.0309,  0.0183,  0.0512],
          ...,
          [ 0.0333,  0.0298,  0.0020,  ..., -0.0109,  0.0458,  0.0060],
          [ 0.0599,  0.0355, -0.0002,  ..., -0.0096,  0.0323,  0.0248],
          [ 0.0293,  0.0155,  0.0078,  ..., -0.0052,  0.0513,  0.0357]],
 
         [[ 0.0426,  0.0123,  0.0100,  ..., -0.0207,  0.0225,  0.0073],
          [ 0.0341,  0.0031,