In [1]:
import sys
import os,shutil
import tensorflow as tf
import numpy as np
import random
import pandas as pd
import collections
print('''
Python version: {}
TensorFlow version: {}
NumPy version: {}
Pandas version: {}
'''.format(sys.version, tf.__version__, np.__version__, pd.__version__))


Python version: 3.6.4 (default, Mar  9 2018, 23:15:03) 
[GCC 4.2.1 Compatible Apple LLVM 9.0.0 (clang-900.0.39.2)]
TensorFlow version: 1.6.0
NumPy version: 1.14.2
Pandas version: 0.22.0



# Create neural network model

In [3]:
# The MAX_WORD_LENGTH is the max word length from 250,000 dictionary words
MAX_WORD_LENGTH=29
N_WORDS = 27*MAX_WORD_LENGTH
N_GUESSED=26
N_CLASSES = 26
HIDDEN_SIZE4 = 300
HIDDEN_SIZE1 = 150
HIDDEN_SIZE2 = 80
HIDDEN_SIZE3=HIDDEN_SIZE2+N_GUESSED
# inputs       
x_words = tf.placeholder(tf.float32, [None,N_WORDS], name="words")
x_guessed=tf.placeholder(tf.float32, [None,N_GUESSED], name="guessed")
y_label = tf.placeholder(tf.float32, [None,N_CLASSES], name="labels")
# hidden layer1
W4 = tf.Variable(tf.truncated_normal([N_WORDS, HIDDEN_SIZE4],stddev=N_WORDS**-0.5),name="W4")
b4 = tf.Variable(tf.zeros([HIDDEN_SIZE4]),name="b4")
hidden4 = tf.nn.relu(tf.matmul(x_words, W4) + b4)
# hidden layer2
W1 = tf.Variable(tf.truncated_normal([HIDDEN_SIZE4, HIDDEN_SIZE1],stddev=HIDDEN_SIZE4**-0.5),name="W1")
b1 = tf.Variable(tf.zeros([HIDDEN_SIZE1]),name="b1")
hidden1 = tf.nn.sigmoid(tf.matmul(hidden4, W1) + b1)
# hidden layer3
W2 = tf.Variable(tf.truncated_normal([HIDDEN_SIZE1, HIDDEN_SIZE2],stddev=HIDDEN_SIZE1**-0.5),name="W2")
b2 = tf.Variable(tf.zeros([HIDDEN_SIZE2]),name="b2")
hidden2 = tf.nn.sigmoid(tf.matmul(hidden1, W2) + b2)
# output layer4
W3 = tf.Variable(tf.truncated_normal([HIDDEN_SIZE3, N_CLASSES],stddev=HIDDEN_SIZE3**-0.5),name="W3")
b3 = tf.Variable(tf.zeros([N_CLASSES]),name="b3")
y = tf.matmul(tf.concat([hidden2,x_guessed],1), W3) + b3
# loss function
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=y, labels=y_label))
# prediction function
pred=tf.reduce_mean(tf.argmax(y,1))
# optimizer
sgd = tf.train.MomentumOptimizer(0.5,0.1).minimize(loss)
#--------------------------------------------
#
#    LOAD MODELS
#
#--------------------------------------------
folder = './hangman_model'
LOADED=False

if not os.path.exists(folder):
    os.makedirs(folder)
sess=tf.Session()
#Initialization
sess.run(tf.global_variables_initializer())
# load trained weights
if os.path.exists('./hangman_model/W1.npy') and not LOADED:
    print('---------------------------------------------------------------------')
    print('----------------Readind trained model---------------------------------')
    print('---------------------------------------------------------------------')       
    #read trained weights anb biases
    W1_np=np.load('./hangman_model/W1.npy')
    W2_np=np.load('./hangman_model/W2.npy')
    W3_np=np.load('./hangman_model/W3.npy')
    W4_np=np.load('./hangman_model/W4.npy')
    b1_np=np.load('./hangman_model/b1.npy')
    b2_np=np.load('./hangman_model/b2.npy')
    b3_np=np.load('./hangman_model/b3.npy')
    b4_np=np.load('./hangman_model/b4.npy')
    #restore trained weights and biases
    if  W1.shape==W1_np.shape:
        print('---------------------------------------------------------------------')
        print('----------------Loading trained model---------------------------------')
        print('---------------------------------------------------------------------')
        sess.run(tf.assign(W1, W1_np))
        sess.run(tf.assign(W2, W2_np))
        sess.run(tf.assign(W3, W3_np))
        sess.run(tf.assign(W4, W4_np))
        sess.run(tf.assign(b1, b1_np))
        sess.run(tf.assign(b2, b2_np))
        sess.run(tf.assign(b3, b3_np))
        sess.run(tf.assign(b4, b4_np))        

---------------------------------------------------------------------
----------------Readind trained model---------------------------------
---------------------------------------------------------------------
---------------------------------------------------------------------
----------------Loading trained model---------------------------------
---------------------------------------------------------------------


# Create the game player

In [17]:
class HangmanPlayer:
    def __init__(self, word, model, lives=11):
        self.original_word = word
        self.full_word = [ord(i)-97 for i in word]
        self.letters_guessed = set([])
        self.letters_remaining = set(self.full_word)
        self.lives_left = lives
        self.obscured_words_seen = []
        self.letters_previously_guessed = []
        self.guesses = []
        self.correct_responses = []
        self.z = model
        return
    
    def encode_obscured_word(self):
        word = [i if i in self.letters_guessed else 26 for i in self.full_word]
        obscured_word = np.zeros((MAX_WORD_LENGTH, 27), dtype=np.float32)
        for i, j in enumerate(word):
            obscured_word[i, j] = 1
        return(obscured_word.flatten())
    
    def encode_guess(self, guess):
        encoded_guess = np.zeros(26, dtype=np.float32)
        encoded_guess[guess] = 1
        return(encoded_guess)

    def encode_previous_guesses(self):
        # Create a 1 x 26 vector where 1s indicate that the letter was previously guessed
        guess = np.zeros(26, dtype=np.float32)
        for i in self.letters_guessed:
            guess[i] = 1
        return(guess)
    
    def encode_correct_responses(self):
        # To be used with cross_entropy_with_softmax, this vector must be normalized
        response = np.zeros(26, dtype=np.float32)
        for i in self.letters_remaining:
            response[i] = 1.0
        response /= response.sum()
        return(response)
    
    def store_guess_and_result(self, guess):
        # Record what the model saw as input: an obscured word and a list of previously-guessed letters
        self.obscured_words_seen.append(self.encode_obscured_word())
        self.letters_previously_guessed.append(self.encode_previous_guesses())
        
        # Record the letter that the model guessed, and add that guess to the list of previous guesses
        self.guesses.append(guess)
        self.letters_guessed.add(guess)
        
        # Store the "correct responses"
        correct_responses = self.encode_correct_responses()
        self.correct_responses.append(correct_responses)
        
        # Determine an appropriate reward, and reduce # of lives left if appropriate
        if guess in self.letters_remaining:
            self.letters_remaining.remove(guess)
        
        if self.correct_responses[-1][guess] < 0.00001:
            self.lives_left -= 1
        return
                
    def run(self):
        while (self.lives_left > 0) and (len(self.letters_remaining) > 0):
            x1=np.array([self.encode_obscured_word()])
            x2=np.array([self.encode_previous_guesses()])
            guess = self.z.run(pred,feed_dict={x_words: x1,x_guessed:x2})
            
            self.store_guess_and_result(guess)
        
        # Return the observations for use in training (both inputs, predictions, and losses)
        return(self.obscured_words_seen,
               self.letters_previously_guessed,
               self.correct_responses,self.lives_left > 0)
    
    def evaluate_performance(self):
        # Assumes that the run() method has already been called
        ended_in_success = self.lives_left > 0
        letters_in_word = set([i for i in self.original_word])
        correct_guesses = len(letters_in_word) - len(self.letters_remaining)
        incorrect_guesses = len(self.guesses) - correct_guesses
        return(ended_in_success, correct_guesses, incorrect_guesses, letters_in_word)

# Read test data

In [18]:
full_dictionary_location = "words_250000_train.txt"
text_file = open(full_dictionary_location,"r")
full_dictionary = text_file.read().splitlines()
random.shuffle(full_dictionary)
text_file.close()
# define test data
test_val_split_idx=int(len(full_dictionary) * 0.005)
test_dictionary=full_dictionary[:test_val_split_idx]
print('Training with {} words'.format(test_val_split_idx))

Training with 1136 words


# Evaluate test model

In [None]:
def evaluate_model(my_words, my_model):
    results = []
    for word in my_words:
        my_player = HangmanPlayer(word, my_model)
        _ = my_player.run()
        results.append(my_player.evaluate_performance())
    df = pd.DataFrame(results, columns=['won', 'num_correct', 'num_incorrect', 'letters'])
    return(df)

result_df = evaluate_model(test_dictionary, sess)
print('Performance on the validation set:')
print('- Averaged {:0.1f} correct and {:0.1f} incorrect guesses per game'.format(result_df['num_correct'].mean(),
                                                                       result_df['num_incorrect'].mean()))
print('- Won {:0.1f}% of games played'.format(100 * result_df['won'].sum() / len(result_df.index)))