In [2]:
import torch.nn as nn
import torch
import os
from tqdm import tqdm
import random

device = "cuda" if torch.cuda.is_available() else "cpu"

In [3]:
class RNN(nn.Module):

    def __init__(self, input_size:int,
                        hidden_size:int,
                        dropout: float = 0.5,
                        hidden_layers: int = 1):
        """Initialize an RNN
        for training purposes
        """
        super(RNN,self).__init__()
        
        self.i2h = nn.Linear(input_size, hidden_size)
        layers = []
        self.hidden_layers = hidden_layers
        self.hidden_size = hidden_size

        for _ in range(hidden_layers):
            layers.append(nn.Sequential(
                nn.Linear(hidden_size * 2, hidden_size),
                nn.Tanh(),
                nn.Dropout(dropout)
            ))

        self.layers = nn.ModuleList(layers)

        self.h2o = nn.Linear(hidden_size, input_size)
        self.softmax = nn.LogSoftmax()

    def forward(self, input, hidden):
        
        encoded = self.i2h(input)

        for idx, rnn_lay in enumerate(self.layers):
            combined = torch.cat((encoded, hidden[idx:idx+1]),1)
            encoded  = rnn_lay(combined)
            hidden[idx] = encoded

        output = self.softmax(self.h2o(encoded))

        return output, hidden

    def init_hidden(self):
        return torch.randn(self.hidden_layers, self.hidden_size).to(device)

In [4]:
all_letters = "".join(["a","b","c","ç","d","e","f","g","ğ","h","ı","i",
                "j","k","l","m","n","o","ö","p","q","r","s","ş","t",
                "u","ü","v","w","x","y","z",".",";",",","?","!", " ",
                "0","1","2","3","4","5","6","7","8","9","0","[","]",
                "{","}","(",")","'","\"","%",":","\n","\r\n","-","$","+",
                "*","/","#","@"])

def letter_to_index(letter: str):
    """converts letter to an index
    """
    return all_letters.find(letter.lower())

def letter_to_tensor(letter: str):
    tensor = torch.zeros(1, len(all_letters))
    tensor[0][letter_to_index(letter)] = 1
    return tensor

def line_to_tensor(line):
    tensor = torch.zeros(len(line),1,len(all_letters),dtype=torch.float32)
    for idx, letter in tqdm(enumerate(line)):
        tensor[idx][0][letter_to_index(letter)] = 1
    return tensor


In [5]:
#prep training data

def prep_training_data(path):
    """load data from a path with
    txt files
    """
    files = os.listdir(path)
    print(f"{len(files)} files found!")
    texts = "".join([open(os.path.join(path, file),"r").read()
                for file in files]).lower()

    print(f"{len(texts)} characters...")

    texts = "".join([ch for ch in texts if ch in all_letters])

    return line_to_tensor(texts)
    

input = prep_training_data("./paul_graham/")


217 files found!
2994149 characters...


2993152it [00:13, 214022.30it/s]


In [7]:
import seaborn as sns

def train(rnn, inputs, optimizer,lr=0.005):
    
    
    rnn.train()
    hidden = rnn.init_hidden()
    loss_fn = nn.CrossEntropyLoss()
    losses = 0
    clipping_value = 1 # arbitrary value of your choosing
    for idx in range(inputs.shape[0]-1):
        output, hidden = rnn(inputs[idx], hidden)
        loss = loss_fn(torch.flatten(output), torch.flatten(inputs[idx+1]))
        losses += loss
    
    loss.backward()
    #optimizer.step()
    torch.nn.utils.clip_grad_norm_(rnn.parameters(), clipping_value)
    for p in rnn.parameters():
        p.data.add_(p.grad.data, alpha=-lr)

    return losses / (inputs.shape[0])

print(f"Device is {device}")

rnn = RNN(
    input_size=len(all_letters),
    hidden_size=512,
    hidden_layers=2
).to(device)
rnn.load_state_dict(torch.load("rnn_512_2layer_softmax_final.bin"))

inputs = input.to(device)
optimizer = torch.optim.Adam(rnn.parameters(), lr=0.01)


Device is cuda


In [8]:
batch_size = 100
batch_count = inputs.shape[0]//batch_size

loss_epoch = []
while True:

    pbar = tqdm(range(10000))
    for epoch in pbar:

        batch = int(random.random() * batch_count)
        rnn.zero_grad()
        inps = inputs[batch*batch_size: batch*batch_size + batch_size]
        loss = train(rnn, inps, optimizer,lr=0.005)
        loss_epoch.append(loss)
        pbar.set_description(f"Epoch {epoch} | Loss: {loss:.2f}")

        if epoch % 1000 == 0:
                print(f"Epoch {epoch} Avg loss: {sum(loss_epoch)/len(loss_epoch):.2f}")
                loss_epoch = []

    print(f"Epoch {epoch} Avg loss: {sum(loss_epoch)/len(loss_epoch):.2f}")
   

  return self._call_impl(*args, **kwargs)
Epoch 2 | Loss: 2.57:   0%|          | 3/10000 [00:00<21:55,  7.60it/s]

Epoch 0 Avg loss: 2.23


Epoch 32 | Loss: 2.96:   0%|          | 33/10000 [00:02<14:35, 11.38it/s]


KeyboardInterrupt: 

In [None]:
#torch.save(rnn.state_dict(), "rnn.bin")
#rnn.load_state_dict(torch.load("rnn_2layer_512_softmax.bin"))

In [9]:

rnn.eval()

def tensor_to_char(tensor):
    return all_letters[int((tensor == 1).nonzero(as_tuple=True)[0])]

ch = ""
output = letter_to_tensor(ch).to(device)

hidden = rnn.init_hidden()
print(ch,end="")
with torch.no_grad():
    for idx in range(1000):

        output, hidden = rnn(output, hidden)
        output_dist = output.data.view(-1).div(0.6).exp()
        top_i = int(torch.multinomial(output_dist, 1)[0])
        #top_i = int(output.argmax())#
        print(all_letters[top_i],end="")
        output = letter_to_tensor(all_letters[top_i]).to(device)
        

ke the and the saing in a just the sime the more the make the read the companay companies and what the was the with companies the and vers was the in the was must of the mant of dang the fire what the nound in a startups whing be compans af you can a vertion of the pare the most in the are in and deally for the something in the startups are was whit startups in a startups are the compant the for and you dange of the same the sale was the sive in the companies when the sime the was in a startups are the sime and the company deally startups and startups was of the most of the ind really the make a kead in a companies of the startups prople the startups are a startups in prowere the sact as the sime the and deally startups in the difers same the compance and startups more some the was in the companies in the mighat is the companies the was a make the best companing the pars was the dide the prople and startups and was of the not was a for the same the mach companies the a for the bere the