In [109]:
import torch
from torch import nn
import torch.nn.functional as F
import numpy as np

In [110]:
def detach_from_history(h):
    if type(h) == torch.Tensor:
        return h.detach()

    return tuple(detach_from_history(v) for v in h)


class CharRnn(nn.Module):
    def __init__(self, vocab_size, n_fac, n_hidden, batch_size):
        super().__init__()
        self.e = nn.Embedding(vocab_size, n_fac)
        self.rnn = nn.RNN(n_fac, n_hidden)
        self.l_out = nn.Linear(n_hidden, vocab_size)
        self.n_hidden = n_hidden
        self.init_hidden_state(batch_size)

    def init_hidden_state(self, batch_size):
        self.h = torch.zeros(1, batch_size, self.n_hidden)

    def forward(self, inp):
        inp = self.e(inp)
        b_size = inp[0].size(0)
        if self.h[0].size(1) != b_size:
            self.init_hidden_state(b_size)

        outp, h = self.rnn(inp, self.h)
        self.h = detach_from_history(h)

        return F.log_softmax(self.l_out(outp[-1]), dim=-1)

In [111]:
def generateNextChar(charNet, phraze):
    idxs = np.empty((1, seq_size))
    idxs[0] = np.array([char2int[c] for c in phraze])

    res = charNet(torch.LongTensor(idxs).transpose(0, 1))
    _, t_idxs = torch.max(res, dim=1)

    return int2char[t_idxs.detach().cpu().numpy()[0]]


def generateText(charNet, phraze, numChars):
    cText = phraze
    for i in range(0, numChars):
        cText += generateNextChar(charNet, cText[i:])

    return cText

In [112]:
text = ""
with open("dairy.txt", "r", encoding="utf-8") as file:
    text = file.read().replace("\n", " ")

chars = sorted(list(set(text)))
int2char = dict(enumerate(chars))
char2int = {char: ind for ind, char in int2char.items()}

idx = [char2int[c] for c in text]

epochs = 60
seq_size = 32
hidden_size = 256
batch_size = 300

net = CharRnn(len(char2int), seq_size, hidden_size, batch_size)
lr = 1e-3
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=lr)

# PREPARE DATA
# overlapping sets of characters, predict 1 character
in_text = np.array(
    [[idx[j + i] for i in range(seq_size)] for j in range(len(idx) - seq_size - 1)]
)
out_text = np.array([idx[j + seq_size] for j in range(len(idx) - seq_size - 1)])

print(in_text.shape)
print(out_text.shape)

# TRAIN
for e in range(0, epochs):
    loss = 0
    for b in range(0, in_text.shape[0] // batch_size):
        input_idxs = (
            torch.LongTensor(in_text[b * batch_size : (b + 1) * batch_size, :seq_size])
            .transpose(0, 1)
        )
        target_idxs = (
            torch.LongTensor(out_text[b * batch_size : (b + 1) * batch_size])
            .squeeze()
        )

        res = net(input_idxs)
        loss = criterion(res, target_idxs)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print("Epoch {}, loss {}".format(e + 1, loss.item()))

# GENERATE
print(text[:seq_size], "=>", generateText(net, text[:seq_size], 120))


(100133, 32)
(100133,)
Epoch 1, loss 2.363274574279785
Epoch 2, loss 2.219494342803955
Epoch 3, loss 2.126389265060425
Epoch 4, loss 2.0525288581848145
Epoch 5, loss 1.9898604154586792
Epoch 6, loss 1.9320149421691895
Epoch 7, loss 1.8864898681640625
Epoch 8, loss 1.8453539609909058
Epoch 9, loss 1.8085178136825562
Epoch 10, loss 1.7726221084594727
Epoch 11, loss 1.7341296672821045
Epoch 12, loss 1.6955574750900269
Epoch 13, loss 1.657349705696106
Epoch 14, loss 1.6244044303894043
Epoch 15, loss 1.6008937358856201
Epoch 16, loss 1.5778106451034546
Epoch 17, loss 1.5522607564926147
Epoch 18, loss 1.5301302671432495
Epoch 19, loss 1.5035696029663086
Epoch 20, loss 1.480438470840454
Epoch 21, loss 1.4585514068603516
Epoch 22, loss 1.4656882286071777
Epoch 23, loss 1.4725834131240845
Epoch 24, loss 1.4323713779449463
Epoch 25, loss 1.4171489477157593
Epoch 26, loss 1.4153741598129272
Epoch 27, loss 1.450045108795166
Epoch 28, loss 1.4422481060028076
Epoch 29, loss 1.4230618476867676
Epoch 

In [113]:
print("Макар отправляется в Ногинск и т", "=>", generateText(net, "Макар отправляется в Ногинск и т", 155))

Макар отправляется в Ногинск и т => Макар отправляется в Ногинск и только словно князь, как бы то, что я старика, помнится, я даже не произнести меня отвечал я, не отвечал на меня во мне в то все-таки в отвечал я, не отвеча
