In [70]:
import torch
import json
import torch.nn as nn
import numpy as np

from random import randrange

class CharacterLanguageModel(nn.Module):
    def __init__(self, num_unique_chars, hidden_state_dim=512):
        super(CharacterLanguageModel, self).__init__()

        self.lstm = nn.LSTM(input_size=num_unique_chars, hidden_size=hidden_state_dim, num_layers=3)

        self.linear_layer_1 = nn.Linear(hidden_state_dim, hidden_state_dim)
        self.linear_layer_2 = nn.Linear(hidden_state_dim, num_unique_chars)

        self.relu = nn.LeakyReLU()

    
    def forward(self, character_indices_tensor):
        out, h = self.lstm(character_indices_tensor)
        
        out = self.linear_layer_1(out)
        out = self.relu(out)
        
        out = self.linear_layer_2(out)
        return out
    
class CharacterLanguageModelWrapper:
    def __init__(self, context_window=3000, hidden_state_dim=512):
        self.context_window = context_window
        self.hidden_state_dim = hidden_state_dim
        
    def load_data(self, file_path):
        with open(file_path, 'r') as file:
            self.training_data = file.read()
        
    def load_unique_characters(self, min_char_count=500): 
        char_counts = {}
        for c in self.training_data:
            if c not in char_counts:
                char_counts[c] = 0
            char_counts[c] += 1
        
        self.unique_characters = {c: i for i, c in enumerate([c for c, count in char_counts.items() if count > min_char_count])}
        self.num_unique_characters = len(self.unique_characters) + 1
    
    def create_model(self):
        has_cuda = torch.cuda.is_available()
        print('Has CUDA', has_cuda)
        self.device = torch.device('cuda' if has_cuda else 'cpu')
        self.model = CharacterLanguageModel(self.num_unique_characters, self.hidden_state_dim).to(self.device)
    
    def train_model(self, epochs=1, lr=0.0001, skip_factor=100):
        optimizer = torch.optim.Adam(self.model.parameters(), lr=lr)
        
        for epoch in range(epochs):
            offset = randrange(self.context_window * skip_factor)
            total_loss = 0

            while offset + self.context_window + 1 < len(self.training_data):
                optimizer.zero_grad()
                
                text_in = self.training_data[offset:offset + self.context_window + 1]
                text_in_tensor = self._text_to_tensor(text_in)

                prediction = self.model.forward(torch.FloatTensor(text_in_tensor[:-1]).to(self.device))
                loss = nn.functional.cross_entropy(prediction, torch.FloatTensor(text_in_tensor[1:]).to(self.device))
                loss.backward()
                optimizer.step()

                total_loss += loss.item()
            
                offset += self.context_window * skip_factor
            print(f'Epoch {epoch + 1}, Train Loss {total_loss}')
    
    def predict(self, text, length=500):
        for i in range(length):
            text_in_tensor = self._text_to_tensor(text)
            prediction = self.model.forward(torch.FloatTensor(text_in_tensor).to(self.device))
            
            probs = nn.functional.softmax(prediction[-1], dim=0).detach().cpu().numpy()
            res_index = np.random.choice(range(len(probs) - 1), p=probs[:-1]/sum(probs[:-1]))
            res_char = list(self.unique_characters.keys())[res_index]
            
            text += res_char
        
        return text
    
    def store_model(self, name):
        torch.save(self.model.state_dict(), f'models/{name}.pt')
        
        with open(f'models/{name}.json', 'w') as f:
            json.dump({'unique_characters': self.unique_characters, 'num_unique_characters': self.num_unique_characters}, f)
    
    def load_model(self, name):
        with open(f'models/{name}.json') as f:
            d = json.load(f)
            self.unique_characters = d['unique_characters']
            self.num_unique_characters = d['num_unique_characters']
        
        self.create_model()

        self.model.load_state_dict(torch.load(f'models/{name}.pt'))
        self.model.eval()
        
    
    def _text_to_tensor(self, text_in):
        text_in_tensor = []
        for char in text_in:
            char_tensor = [0] * self.num_unique_characters
            char_tensor[self._get_char_index(char)] = 1
            text_in_tensor.append(char_tensor)
        return text_in_tensor
        
    
    def _get_char_index(self, char):
        return self.unique_characters[char] if char in self.unique_characters else self.num_unique_characters - 1


In [72]:
# Model training
def train():
    model = CharacterLanguageModelWrapper(hidden_state_dim=512)
    model.load_data('wiki_large.train.raw')
    model.load_unique_characters()
    model.create_model()

    for i in range(100000):
        model.train_model()
        print(model.predict('th'))

        if i % 10 == 0:
            model.store_model(f'epoch_{i + 1}')


In [73]:
model = CharacterLanguageModelWrapper(hidden_state_dim=512)
model.load_model('epoch_271')
print(model.predict('Patrik '))


Has CUDA True
Patrik from the John 's sub @-@ camp date based on cutblage , and garden East Ponto , where it faced him from an apple attack at once , and was towed to progress a colosity of her transport and mentioning Defence 's killing . This encompassed a year in late 1864 . The ship 's score to 335 years old , and he made peak on the Death Grand Post , the Vicarboat River in 1633 . This came to liast from Harvard on 6 August 1182 , but more than fleeing the Concorde until 1500 , he comes to hundre I minor each g
