In [1]:
! wget https://raw.githubusercontent.com/ZardashtKaya/pytorch-name-generator/master/kurdish_names.txt

--2020-09-11 13:20:35--  https://raw.githubusercontent.com/ZardashtKaya/pytorch-name-generator/master/kurdish_names.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 151.101.0.133, 151.101.64.133, 151.101.128.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|151.101.0.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 43117 (42K) [text/plain]
Saving to: ‘kurdish_names.txt’


2020-09-11 13:20:35 (1.67 MB/s) - ‘kurdish_names.txt’ saved [43117/43117]



In [2]:
! pip install Unidecode

Collecting Unidecode
[?25l  Downloading https://files.pythonhosted.org/packages/d0/42/d9edfed04228bacea2d824904cae367ee9efd05e6cce7ceaaedd0b0ad964/Unidecode-1.1.1-py2.py3-none-any.whl (238kB)
[K     |████████████████████████████████| 245kB 2.8MB/s 
[?25hInstalling collected packages: Unidecode
Successfully installed Unidecode-1.1.1


In [3]:
import torch
import torch.nn as nn
import string
import random
import sys
import unidecode
from torch.utils.tensorboard import SummaryWriter

In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

all_chars = string.printable
n_chars = len(all_chars)
File = unidecode.unidecode(open('kurdish_names.txt').read())

In [5]:
class RNN(nn.Module):
  def __init__(self, input_size, hidden_size, num_layers, output_size):
    super(RNN,self).__init__()
    self.hidden_size = hidden_size
    self.num_layers = num_layers

    self.embed = nn.Embedding(input_size, hidden_size)
    self.lstm = nn.LSTM(hidden_size, hidden_size, num_layers, batch_first=True)
    self.fc = nn.Linear(hidden_size, output_size)

  def forward(self,x ,hidden, cell):
    out = self.embed(x)
    out, (hidden,cell) = self.lstm(out.unsqueeze(1), (hidden,cell))
    out = self.fc(out.reshape(out.shape[0],-1))
    return out, (hidden,cell)
  
  def init_hidden(self, batch_size):
    hidden = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
    cell = torch.zeros(self.num_layers, batch_size, self.hidden_size).to(device)
    return hidden ,cell

In [6]:
class Generator():

  def __init__(self):
    self.chunk_len = 250
    self.num_epochs = 5000
    self.batch_size = 1
    self.print_every = 50
    self.hidden_size = 256
    self.num_layers = 2
    self.lr = 0.003
  
  def char_tensor(self, string):
    tensor = torch.zeros(len(string)).long()
    for c in range(len(string)):
      tensor[c] = all_chars.index(string[c])
    return tensor
  
  def get_random_batch(self):
    start_idx = random.randint(0, len(File) - self.chunk_len)
    end_idx = start_idx + self.chunk_len + 1
    text_str = File[start_idx:end_idx]
    text_input = torch.zeros(self.batch_size, self.chunk_len)
    text_target = torch.zeros(self.batch_size, self.chunk_len)

    for i in range(self.batch_size):
      text_input[i,:] = self.char_tensor(text_str[:-1])
      text_target[i,:] = self.char_tensor(text_str[1:])

    return  text_input.long(), text_target.long()
  
  def generate(self, initial_str = 'A', predict_len = 100, temperature=0.85):
    hidden,cell = self.rnn.init_hidden(batch_size=self.batch_size)
    initial_input = self.char_tensor(initial_str)
    predicted = initial_str

    for p in range(len(initial_str)-1):
      _, (hidden,cell) = self.rnn(
          initial_input[p].view(1).to(device), hidden,cell
          )
    last_char = initial_input[-1]

    for p in range(predict_len):
      output,(hidden,cell) = self.rnn(last_char.view(1).to(device), hidden,cell)
      output_dist = output.data.view(-1).div(temperature).exp()
      top_char = torch.multinomial(output_dist,1)[0]
      predicted_char = all_chars[top_char]
      predicted += predicted_char
      last_char = self.char_tensor(predicted_char)
    return predicted


  
  def train(self):
    self.rnn = RNN(n_chars, self.hidden_size, self.num_layers, n_chars).to(device)

    optimizer = torch.optim.Adam(self.rnn.parameters(), lr=self.lr)
    criterion = nn.CrossEntropyLoss()
    writer = SummaryWriter(f'runs/names0')
    print('=> Starting training')
    for epoch in range(1, self.num_epochs + 1):
      inp, target = self.get_random_batch()
      hidden ,cell = self.rnn.init_hidden(batch_size=self.batch_size)

      self.rnn.zero_grad()
      loss = 0
      inp = inp.to(device)
      target = target.to(device)

      for c in range(self.chunk_len):
        output, (hidden,cell) = self.rnn(inp[:,c], hidden,cell)
        loss += criterion(output, target[:,c])
      
      loss.backward()
      optimizer.step()
      loss = loss.item() / self.chunk_len

      if epoch % self.print_every == 0:
        print(f'Loss: {loss}')
        print(self.generate())
      writer.add_scalar('Training loss:', loss, global_step=epoch)


In [None]:
gennames = Generator()
gennames.train()

=> Starting training
Loss: 2.39720654296875
AA
EON
E
AEO
ERKC
MA
TIYAN
MHMIRH
HAYINA
BERHA
A
DSYIRHARA
LIRG
LMON
ZIGIDA
SDSA
ANIDT
HHIAN
DIYIREN

