## Prepare data

In [3]:
import unidecode
import string
import random
import torch

In [5]:
all_characters=string.printable

In [6]:
all_characters

'0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ \t\n\r\x0b\x0c'

In [7]:
n_characters=len(all_characters)
n_characters

100

In [114]:
file=unidecode.unidecode(open('cg.txt').read())

In [115]:
file_len=len(file)
print('file_len',file_len)

file_len 117135


Making input into chunks

In [116]:
chunk_len=200

def random_chunk():
    start_index=random.randint(0,file_len-chunk_len)
    end_index=start_index+chunk_len+1
    return file[start_index:end_index]
print(random_chunk())

untable Opportunity."

"It Is Like Drinking From A Firehose."

"Be Sure To Have YOUR Messages `Netiquette Approved.'"


These sentiments reflect a portion of the Internet who
have terrified thoughts an


## Build the Model

In [156]:
import torch
import torch.nn as nn

class RNN(nn.Module):
    def __init__(self,input_size,hidden_size,output_size,n_layers=1):
        super().__init__()
        self.input_size=input_size
        self.hidden_size=hidden_size
        self.output_size=output_size
        self.n_layers=n_layers
        self.encoder=nn.Embedding(input_size,hidden_size)
        self.gru=nn.GRU(hidden_size,hidden_size,n_layers)
        self.decoder=nn.Linear(hidden_size,output_size)
        
    def forward(self,input,hidden):
        input=self.encoder(input.reshape(1,-1))
        output,hidden=self.gru(input.reshape(1,1,-1),hidden)
        output=self.decoder(output.reshape(1,-1))
        return output,hidden
    
    def init_hidden(self):
        return torch.zeros(self.n_layers,1,self.hidden_size)

## Inputs and Targets

In [157]:
#Turning string into list of longs
def char_tensor(string):
    tensor=torch.zeros(len(string),dtype=torch.int64)
    for c in range(len(string)):
        tensor[c]=all_characters.index(string[c])
    return tensor
print(char_tensor('abcDEF'))

tensor([10, 11, 12, 39, 40, 41])


In [158]:
def random_training_set():
    chunk=random_chunk()
    inp=char_tensor(chunk[:-1])
    target=char_tensor(chunk[1:])
    return inp,target

## Training

In [164]:
#Setting Hyperparameter
n_epochs=2000
hidden_size=100
n_layers=1
lr=0.005


#calling model
model=RNN(n_characters,hidden_size,n_characters,n_layers)
optimizer=torch.optim.Adam(model.parameters(),lr=lr)
criterion=nn.CrossEntropyLoss()




#Iteration for learning
for epoch in range(n_epochs+1):
    inp,label=random_training_set()
    hidden=model.init_hidden()
    optimizer.zero_grad()
    loss=0
    for c in range(chunk_len):
        output,hidden=model(inp[c],hidden)
        loss+=criterion(output,label[c].reshape(-1))
    loss.backward()
    optimizer.step()
        


## Evaluating 

In [160]:
def evaluate(prime_str='A',predict_len=100,temperature=0.8):
    hidden=model.init_hidden()
    prime_input=char_tensor(prime_str)
    predicted=prime_str
    
    
    ###Using priming string to 'build up' hidden state
    for p in range(len(prime_str)-1):
        _,hidden=model(prime_input[p],hidden)
    inp=prime_input[-1]
    
    for p in range(predict_len):
        output,hidden=model(inp,hidden)
        
        #Sample from the network as a multinomial distribution
        output_dist=output.data.reshape(-1).div(temperature).exp()
        top_i=torch.multinomial(output_dist,1)[0]
        
        #Add predicted character to string and use as next input 
        predicted_char=all_characters[top_i]
        predicted+=predicted_char
        inp=char_tensor(predicted_char)
        
    return predicted

In [174]:
print(evaluate('Gu',100,temperature=0.4))

Gutenberg-tm tradement to person of the for the to light reted to the never the price the for and the 
