In [1]:
import torch
import torch.nn as nn
import numpy as np
import torch.nn.functional as F
import torch.optim.lr_scheduler as lr_scheduler
import torch.optim as optim

In [2]:
import torchtext.vocab as vocab

In [3]:
glove = vocab.GloVe(name='6B', dim=100)
alphabet = 'abcdefghijklmnopqrstuvwxyz'
letter_vocab = {letter: idx for idx, letter in enumerate(alphabet)}

In [4]:
def letter_to_embedding(letter):
    if letter in letter_vocab:
        index = letter_vocab[letter]
    else:
        # Handle unknown letters with a special token or random embedding
        index = letter_vocab['<UNK>']
    return glove.vectors[index]

In [5]:
input_letter = 'a'
embedding = letter_to_embedding(input_letter)
embedding.shape

torch.Size([100])

In [6]:
filename = "./words_250000_train.txt"

word_dict = []
with open(filename, 'r') as f:
    for line in f:
        word = line.split()[0]
        if word.isalpha():
            word_dict.append(word)

words = list(np.random.permutation(list(word_dict)))


In [7]:
len(words)

227300

In [8]:
train_val_split_idx = int(len(words) * 0.8)

MAX_NUM_INPUTS = max([len(i) for i in words[:train_val_split_idx]])
EPOCH_SIZE = train_val_split_idx
NUM_EPOCHS = 100
BATCH_SIZE = np.array([len(i) for i in words[:train_val_split_idx]]).mean() #?

In [9]:
MAX_NUM_INPUTS, EPOCH_SIZE, NUM_EPOCHS, BATCH_SIZE

(29, 181840, 100, 9.35000549934008)

In [10]:
class HangmanPlayer:
    def __init__(self, word, model, lives=6):
        self.original_word = word
        self.full_word = [ord(i)-97 for i in word]
        self.embedded_word = [list(letter_to_embedding(letter)) for letter in word]
        self.embedded_word = torch.tensor(self.embedded_word)
        self.letters_guessed = set([])
        self.letters_remaining = set(self.full_word)
        self.lives_left = lives
        self.obscured_words_seen = []
        self.letters_previously_guessed = []
        self.guesses = []
        self.correct_responses = []
        self.z = model
        return
    
    def encode_obscured_word(self):
        word = [i if i in self.letters_guessed else 26 for i in self.full_word]
        obscured_word = np.zeros((len(word), 100), dtype=np.float32)
        
        for i, j in enumerate(word):
            if j != 26:
                obscured_word[i] = list(letter_to_embedding(chr(97+j)))
                
        return(torch.tensor(obscured_word))
    
    def encode_guess(self, guess):
        encoded_guess = np.zeros(26, dtype=np.float32)
        encoded_guess[guess] = 1
        return(encoded_guess)

    def encode_previous_guesses(self):
        # Create a 1 x 26 vector where 1s indicate that the letter was previously guessed
        guess = np.zeros(26, dtype=np.float32)
        for i in self.letters_guessed:
            guess[i] = 1
        return(guess)
    
    def encode_correct_responses(self):
        # To be used with cross_entropy_with_softmax, this vector must be normalized
        response = np.zeros(26, dtype=np.float32)
        for i in self.letters_remaining:
            response[i] = 1.0
        response /= response.sum()
        return(response)

     
    
    def store_guess_and_result(self, guess):
        # Record what the model saw as input: an obscured word and a list of previously-guessed letters
        self.obscured_words_seen.append(self.encode_obscured_word())
        self.letters_previously_guessed.append(self.encode_previous_guesses())
        
        # Record the letter that the model guessed, and add that guess to the list of previous guesses
        self.guesses.append(guess)
        self.letters_guessed.add(int(guess))
        # print(guess)
        # Store the "correct responses"
        correct_responses = self.encode_correct_responses()
        self.correct_responses.append(correct_responses)
        
        # Determine an appropriate reward, and reduce # of lives left if appropriate
        if int(guess) in self.letters_remaining:
            self.letters_remaining.remove(int(guess))
        
        if self.correct_responses[-1][guess] < 0.00001:
            self.lives_left -= 1
        return
                
    def run(self):
        # Play a game until we run out of lives or letters
        while (self.lives_left > 0) and (len(self.letters_remaining) > 0):
            self.encoded_obscured_word = torch.tensor(self.encode_obscured_word())
            self.encoded_obscured_word = torch.reshape(self.encoded_obscured_word, [1, self.encoded_obscured_word.shape[0], self.encoded_obscured_word.shape[1]] )
            
            self.encoded_previous_guesses = torch.tensor(self.encode_previous_guesses())
            self.encoded_previous_guesses = torch.reshape(self.encoded_previous_guesses, [1, self.encoded_previous_guesses.shape[0]])
            
            output = torch.squeeze(self.z(self.encoded_obscured_word, self.encoded_previous_guesses))
            guess = torch.argmax(output)
            attempt = 2
            while int(guess) in self.letters_guessed:
                guess = torch.topk(output, k=attempt).indices[attempt-1]
                attempt += 1
            self.store_guess_and_result(guess)

        b = torch.tensor(self.letters_previously_guessed)
        c = torch.tensor(self.correct_responses)
        # Return the observations for use in training (both inputs, predictions, and losses)
        return torch.stack(self.obscured_words_seen), torch.tensor(self.letters_previously_guessed), torch.tensor(self.correct_responses)

In [11]:
class LSTMNet(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(LSTMNet, self).__init__()
        self.lstm = nn.LSTM(input_size, hidden_size)
        self.fc = nn.Linear(hidden_size + output_size, output_size)
        

    def forward(self, input_obscured_word_seen, input_letters_guessed_previously):
        
        lstm_outputs, _ = self.lstm(input_obscured_word_seen)
        final_lstm_output = lstm_outputs[:, -1, : ]

        combined_input = torch.cat((final_lstm_output, input_letters_guessed_previously), dim=1)
        output = self.fc(combined_input)
        return output
    

In [24]:
lstm = LSTMNet(100 ,MAX_NUM_INPUTS, 26)

In [22]:
learning_rate = 0.001
momentum_time_constant = BATCH_SIZE / -np.log(0.9)

criterion = nn.CrossEntropyLoss()

optimizer = optim.SGD(lstm.parameters(), lr=learning_rate, momentum=momentum_time_constant)

In [27]:
total_samples = 0

for epoch in range(NUM_EPOCHS):
    i = 0
    while total_samples < (epoch+1) * EPOCH_SIZE:
        torch.autograd.set_detect_anomaly(True)
        
        optimizer.zero_grad()

        word = words[i]
        i += 1

        other_player = HangmanPlayer(word, lstm)
        words_seen, previous_letters, correct_responses = other_player.run()

        # output = lstm(words_seen,  previous_letters)
       
        
        optimizer.zero_grad()
        words_seen.shape
        z = lstm(words_seen, previous_letters)
        
        loss = F.cross_entropy(z, correct_responses)
        
        loss.backward()

        optimizer.step()
        if total_samples % 20000 == 0:
            print(loss)
       
    print("_________________________________________________________")
    print(epoch)
    print("_________________________________________________________")

        
        
    
        
        

  self.encoded_obscured_word = torch.tensor(self.encode_obscured_word())


tensor(3.2571, grad_fn=<DivBackward1>)
tensor(3.3926, grad_fn=<DivBackward1>)
tensor(3.2994, grad_fn=<DivBackward1>)
tensor(3.3402, grad_fn=<DivBackward1>)
tensor(3.3450, grad_fn=<DivBackward1>)
tensor(3.3749, grad_fn=<DivBackward1>)
tensor(3.3274, grad_fn=<DivBackward1>)
tensor(3.3137, grad_fn=<DivBackward1>)
tensor(3.3531, grad_fn=<DivBackward1>)
tensor(3.3300, grad_fn=<DivBackward1>)
tensor(3.3473, grad_fn=<DivBackward1>)
tensor(3.3741, grad_fn=<DivBackward1>)
tensor(3.3190, grad_fn=<DivBackward1>)
tensor(3.3184, grad_fn=<DivBackward1>)
tensor(3.3436, grad_fn=<DivBackward1>)
tensor(3.3650, grad_fn=<DivBackward1>)
tensor(3.3654, grad_fn=<DivBackward1>)
tensor(3.3476, grad_fn=<DivBackward1>)
tensor(3.3338, grad_fn=<DivBackward1>)
tensor(3.3291, grad_fn=<DivBackward1>)
tensor(3.3119, grad_fn=<DivBackward1>)
tensor(3.3288, grad_fn=<DivBackward1>)
tensor(3.3747, grad_fn=<DivBackward1>)
tensor(3.3360, grad_fn=<DivBackward1>)
tensor(3.4102, grad_fn=<DivBackward1>)
tensor(3.3286, grad_fn=<D

KeyboardInterrupt: 

In [None]:
torch.Size([6, 12, 27]) torch.Size([6, 26])

In [139]:
MAX_NUM_INPUTS


28

In [140]:
lstm = nn.LSTM(27, MAX_NUM_INPUTS)
fc = nn.Linear(MAX_NUM_INPUTS + 26, 26)
input_obscured_word_seen = words_seen
input_letters_guessed_previously = previous_letters 

In [141]:
lstm_outputs, _ = lstm(input_obscured_word_seen)
lstm_outputs.shape

torch.Size([6, 12, 28])

In [142]:
final_lstm_output = lstm_outputs[:, -1, : ]
final_lstm_output.shape

torch.Size([6, 28])

In [143]:
combined_input = torch.cat((final_lstm_output, input_letters_guessed_previously), dim=1)
combined_input.shape

torch.Size([6, 54])

In [144]:
output = fc(combined_input)

In [145]:
output.shape

torch.Size([6, 26])

In [None]:
# encoded word after guess not changing check the tensor to numpy thing