#### 1. Load model

In [None]:
import torch
from nnModels import CustomNN

model = CustomNN(chars_len=26, embed_dim=16, hidden_dim=128, num_layers=2, dropout=0.2)
model.load_state_dict(torch.load("model_rnn.pth", map_location=torch.device("cpu")))
model.eval()

#### 2. Load inference methodology

In [None]:
from Hangman import HangmanBasic, HangmanRNN

In [19]:
## Inference
def guess(model, masked_word, guessed_letters):
    stripped = masked_word.replace(" ", "")
    if all(c == '_' for c in stripped):
        # First guess: prioritize vowels not yet guessed
        vowels = ['e', 'a', 'o', 'i', 'u']
        for v in vowels:
            if v not in guessed_letters:
                return v, None, None
        # If all vowels are wrong, fall back to consonants (ETAOIN SHRDLU consonants)
        sorted_consonants = ['t', 'n', 's', 'h', 'r', 'd', 'l', 'b', 'c', 'f', 'g', 'j', 'k', 'm', 'p', 'q', 'v', 'w', 'x', 'y', 'z']
        for c in sorted_consonants:
            if c not in guessed_letters:
                return c, None, None

    word_len = len(stripped)
    x_input = encode_word(stripped)

    model.eval()
    with torch.no_grad():
        lengths = torch.tensor([word_len])  # batch of 1
        logits = model(x_input.unsqueeze(0), lengths)
        probs = torch.softmax(logits[0], dim=-1)    # max_word_len x all_chars_len
        
        # 1. Zero out positions already known (non-zero in input)
        known_positions_mask = x_input.sum(dim=1) > 0  # [T]
        probs[known_positions_mask] = 0.0

        # 2. Zero out previously guessed letters
        if guessed_letters:
            guessed_char_idx = torch.tensor([char_to_idx.get(x) for x in guessed_letters])
            unknown_positions_mask = torch.tensor([i for i in range(word_len) if not known_positions_mask[i]])
            probs[unknown_positions_mask[:, None], guessed_char_idx] = 0

        # 3. normalize probabilities within each position
        row_sums = probs.sum(dim=1, keepdim=True) + 1e-8  # avoid division by zero
        probs_normalized = probs / row_sums

        # 4. pick max probability among all positions and characters
        guessed_pos, char = torch.where(probs_normalized == probs_normalized.max())
        guessed_char = idx_to_char[char[0].item()]

    return guessed_char, guessed_pos, probs_normalized

#### 8. Test model

In [20]:
## Example 1
masked_word = 'app_e'
hangman_input = ' '.join(masked_word)
guessed_letters = ['a', 'p', 'e', 'b', 'c', 'd', 'h']


char, pos, probs = guess(model, hangman_input, guessed_letters)

if len(set(masked_word)) == 1:
    print(f"Next guess: '{char}'")
else:
    print(f"Next guess: '{char}' at position {pos}")
    print("Probability matrix (masked positions only):")
    masked_positions = [idx for idx, char in enumerate(masked_word) if char == '_']
    for i in masked_positions:
        probs_dict = {idx_to_char[j]: float(probs[i, j]) for j in range(all_chars_len) if probs[i,j]>0}
        print(f"Position {i}: ", probs_dict)
        print(sorted(probs_dict, key=probs_dict.get, reverse=True))

Next guess: 'l' at position tensor([3])
Probability matrix (masked positions only):
Position 3:  {'f': 0.00012836948735639453, 'g': 0.001108763855881989, 'i': 0.19128884375095367, 'j': 0.0002790417929645628, 'k': 0.00015477229317184538, 'l': 0.6618263721466064, 'm': 0.00042593933176249266, 'n': 0.001427668146789074, 'o': 0.023885030299425125, 'q': 2.3676307137066033e-06, 'r': 0.05030466243624687, 's': 0.029816830530762672, 't': 0.00936949159950018, 'u': 0.024286340922117233, 'v': 0.0003797242825385183, 'w': 0.00012386047455947846, 'x': 1.4550921150657814e-05, 'y': 0.005062872543931007, 'z': 0.00011447598080849275}
['l', 'i', 'r', 's', 'u', 'o', 't', 'y', 'n', 'g', 'm', 'v', 'j', 'k', 'f', 'w', 'z', 'x', 'q']


In [21]:
## Example 2
masked_word = '____'
hangman_input = ' '.join(masked_word)
guessed_letters = []


char, pos, probs = guess(model, hangman_input, guessed_letters)

if len(set(masked_word)) == 1:
    print(f"Next guess: '{char}'")
else:
    print(f"Next guess: '{char}' at position {pos}")
    print("Probability matrix (masked positions only):")
    masked_positions = [idx for idx, char in enumerate(masked_word) if char == '_']
    for i in masked_positions:
        probs_dict = {idx_to_char[j]: float(probs[i, j]) for j in range(all_chars_len) if probs[i,j]>0}
        print(f"Position {i}: ", probs_dict)
        print(sorted(probs_dict, key=probs_dict.get, reverse=True))

Next guess: 'e'


## Playing recorded games:
Please finalize your code prior to running the cell below. Once this code executes once successfully your submission will be finalized. Our system will not allow you to rerun any additional games.

Please note that it is expected that after you successfully run this block of code that subsequent runs will result in the error message "Your account has been deactivated".

Once you've run this section of the code your submission is complete. Please send us your source code via email.

In [32]:
for i in range(1000):
    print('Playing ', i, ' th game')
    # Uncomment the following line to execute your final runs. Do not do this until you are satisfied with your submission
    api.start_game(practice=0,verbose=False)
    
    # DO NOT REMOVE as otherwise the server may lock you out for too high frequency of requests
    time.sleep(0.5)

Playing  0  th game
Playing  1  th game
Playing  2  th game
Playing  3  th game
Playing  4  th game
Playing  5  th game
Playing  6  th game
Playing  7  th game
Playing  8  th game
Playing  9  th game
Playing  10  th game
Playing  11  th game
Playing  12  th game
Playing  13  th game
Playing  14  th game
Playing  15  th game
Playing  16  th game
Playing  17  th game
Playing  18  th game
Playing  19  th game
Playing  20  th game
Playing  21  th game
Playing  22  th game
Playing  23  th game
Playing  24  th game
Playing  25  th game
Playing  26  th game


HangmanAPIError: {'error': 'You have reached 1000 of games', 'status': 'denied'}

## To check your game statistics
1. Simply use "my_status" method.
2. Returns your total number of games, and number of wins.

In [33]:
[total_practice_runs,total_recorded_runs,total_recorded_successes,total_practice_successes] = api.my_status() # Get my game stats: (# of tries, # of wins)
success_rate = total_recorded_successes/total_recorded_runs
print('overall success rate = %.3f' % success_rate)

overall success rate = 0.517


## ANNEX

In [None]:
## Model 1 - basic

class HangmanRNN(nn.Module):
    def __init__(self, chars_len=26, embed_dim=16, hidden_dim=256, num_layers=2, dropout=0.2):
        super().__init__()
        self.embedding = nn.Linear(chars_len, embed_dim)
        self.rnn = nn.GRU(embed_dim, hidden_dim, num_layers=num_layers, 
                          batch_first=True, dropout=dropout)#, bidirectional=True)
        self.norm = nn.LayerNorm(hidden_dim)
        self.fc = nn.Linear(hidden_dim, chars_len)

    def forward(self, x):
        x = self.embedding(x)   # num_batches x max_word_len x embed_dim
        out, _ = self.rnn(x)    # num_batches x max_word_len x hidden_dim
        out = self.norm(out)
        logits = self.fc(out)   # num_batches x max_word_len x all_chars_len
        return logits



## Model 2 - added bidirectional training

class HangmanRNN(nn.Module):
    def __init__(self, chars_len=26, embed_dim=16, hidden_dim=256, num_layers=2, dropout=0.2):
        super().__init__()
        self.embedding = nn.Linear(chars_len, embed_dim)
        self.rnn = nn.GRU(
            embed_dim,
            hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout,
            bidirectional=True
        )
        self.norm = nn.LayerNorm(hidden_dim * 2)    # bidirectional doubles hidden size
        self.fc = nn.Linear(hidden_dim * 2, chars_len)
        self.embed_dropout = nn.Dropout(dropout)    # Optional dropout after embedding

    def forward(self, x):
        x = self.embedding(x)
        x = self.embed_dropout(x)
        out, _ = self.rnn(x)
        out = self.norm(out)
        logits = self.fc(out)
        return logits



## Model 3 - added training-validation split

dataset_size = len(dataset)
val_size = int(0.1 * dataset_size)
train_size = dataset_size - val_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)



## Model 4 - added learning rate scheduler

model4 = HangmanRNN(chars_len=26, embed_dim=16, hidden_dim=128, num_layers=2, dropout=0.2)
optimizer = torch.optim.Adam(model4.parameters(), lr=1e-3)
criterion = torch.nn.BCEWithLogitsLoss()
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2)



## Model 5 -- added dynamic padding so max_seq_len depends on max word length within each batch

def collate_fn_dynamic_padding(batch):
    xs, ys, masks = zip(*batch)
    lengths = [x.shape[0] for x in xs]
    lengths = torch.tensor(lengths)

    # pad_sequence expects a list of tensors [seq_len, feature_dim]
    xs = pad_sequence(xs, batch_first=True)
    ys = pad_sequence(ys, batch_first=True)
    masks = pad_sequence(masks, batch_first=True)

    # sort by descending length for pack_padded_sequence
    lengths, perm_idx = lengths.sort(0, descending=True)
    xs, ys, masks = xs[perm_idx], ys[perm_idx], masks[perm_idx]
    return xs, ys, masks, lengths

In [None]:
## Data processing with max word length

def encode_word(word, max_word_len=30, all_chars_len=26):
    word_len = len(word)
    encoded_word = torch.zeros((max_word_len, all_chars_len))

    for i, char in enumerate(word):
        if char == '_':
            continue  # skip if masked character
        encoded_word[i, char_to_idx[char]] = 1
    return encoded_word, word_len


def convert_word_to_training_data(word, max_word_len=30, all_chars_len=26):
    encoded_word, word_len = encode_word(word, max_word_len, all_chars_len)

    # create random masking, but ensures consistent masking per unique character
    unique_chars = sorted(set(word))
    while True:
        char_mask_map = {c: np.random.randint(0, 2) for c in unique_chars}  # 0=shown, 1=masked
        if len(set(char_mask_map.values())) != 1:   # ensure not all masked/unmasked
            break
    mask = np.array([char_mask_map[c] for c in word])

    # apply masking
    mask_tensor = torch.zeros(max_word_len, dtype=torch.float32)
    mask_tensor[:word_len] = torch.tensor(mask, dtype=torch.float32)

    # zero-out masked positions
    mask_bool = mask_tensor.bool()
    x_input = encoded_word.clone()
    mask_full = torch.zeros(max_word_len, dtype=torch.bool) # pad mask_bool to full length first (30)
    mask_full[:word_len] = mask_bool[:word_len]
    x_input[mask_full] = 0.0

    y_target = encoded_word  # same

    return x_input, y_target, mask_tensor


def process_all_words(words, max_word_len=30, all_chars_len=26, cache_file="processed.pkl", force_process=False):
    from multiprocessing.dummy import Pool
    import pickle
    
    if not force_process:
        try:
            with open(cache_file, "rb") as f:
                print(f"Loading cached preprocessed data from {cache_file}...")
                return pickle.load(f)
        except FileNotFoundError:
            print("No cached data found — preprocessing...")
 
    def worker(w):
        return convert_word_to_training_data(w, max_word_len, all_chars_len)

    with Pool() as pool:
        processed_data = list(
            tqdm(pool.imap(worker, words), total=len(words), desc="Preprocessing words")
        )

    # cache results for future runs
    with open(cache_file, "wb") as f:
        pickle.dump(processed_data, f)
        print(f"Saved preprocessed data to {cache_file}")

    return processed_data

In [None]:
## Model

class HangmanRNN(nn.Module):
    def __init__(self, chars_len=26, embed_dim=16, hidden_dim=128, num_layers=2, dropout=0.2):
        super().__init__()
        self.embedding = nn.Linear(chars_len, embed_dim)
        self.rnn = nn.GRU(
            embed_dim,
            hidden_dim,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout,
            bidirectional=True
        )
        self.norm = nn.LayerNorm(hidden_dim * 2)    # bidirectional doubles hidden size
        self.fc = nn.Linear(hidden_dim * 2, chars_len)
        self.embed_dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.embedding(x)
        x = self.embed_dropout(x)
        out, _ = self.rnn(x)
        out = self.norm(out)
        logits = self.fc(out)
        return logits



## Split into training and val
dataset_size = len(dataset)
val_size = int(0.1 * dataset_size)
train_size = dataset_size - val_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)


## Define model, optimizer, loss, learning rate
model4 = HangmanRNN(chars_len=26, embed_dim=16, hidden_dim=128, num_layers=2, dropout=0.2)
optimizer = torch.optim.Adam(model4.parameters(), lr=1e-3)
criterion = torch.nn.BCEWithLogitsLoss()
# reduces LR by factor of 0.5 if val loss stagnant for 2 epochs
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2)


## Train
num_epochs = 6
for epoch in range(num_epochs):
    model4.train()
    total_train_loss = 0

    for x, y, mask in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} [Train]"):
        logits = model4(x)
        loss = criterion(logits[mask == 1], y[mask == 1])

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_train_loss += loss.item()

    avg_train_loss = total_train_loss / len(train_loader)

    # compute validation loss
    model4.eval()
    total_val_loss = 0
    with torch.no_grad():
        for x, y, mask in val_loader:
            logits = model4(x)
            val_loss = criterion(logits[mask == 1], y[mask == 1])
            total_val_loss += val_loss.item()

    avg_val_loss = total_val_loss / len(val_loader)
    scheduler.step(avg_val_loss)

    print(f"Epoch {epoch+1}: Train Loss = {avg_train_loss:.4f}, Val Loss = {avg_val_loss:.4f}")


## Inference
def guess(model, masked_word, guessed_letters):

    stripped = masked_word.replace(" ", "")
    if all(c == '_' for c in stripped):
        # First guess: prioritize vowels not yet guessed
        vowels = ['e', 'a', 'o', 'i', 'u']
        for v in vowels:
            if v not in guessed_letters:
                return v, None, None
        # If all vowels are wrong, fall back to consonants (ETAOIN SHRDLU consonants)
        sorted_consonants = ['t', 'n', 's', 'h', 'r', 'd', 'l', 'b', 'c', 'f', 'g', 'j', 'k', 'm', 'p', 'q', 'v', 'w', 'x', 'y', 'z']
        for c in sorted_consonants:
            if c not in guessed_letters:
                return c, None, None

    x_input, word_len = encode_word(stripped)

    model.eval()
    with torch.no_grad():
        logits = model(x_input.unsqueeze(0))        # 1 x max_word_len x all_chars_len
        probs = torch.softmax(logits[0], dim=-1)    # max_word_len x all_chars_len
        
        # 1. Zero out positions already known (non-zero in input)
        x_input = x_input[:word_len]
        probs = probs[:word_len]

        known_positions_mask = x_input.sum(dim=1) > 0  # [T]
        probs[known_positions_mask] = 0.0

        # 2. Zero out previously guessed letters
        if guessed_letters:
            guessed_char_idx = torch.tensor([char_to_idx.get(x) for x in guessed_letters])
            unknown_positions_mask = torch.tensor([i for i in range(word_len) if not known_positions_mask[i]])
            probs[unknown_positions_mask[:, None], guessed_char_idx] = 0

        # 3. Remove padding predictions
        probs = probs[:word_len]

        # 4. normalize probabilities within each position
        row_sums = probs.sum(dim=1, keepdim=True) + 1e-8  # avoid division by zero
        probs_normalized = probs / row_sums

        # 5. pick max probability among all positions and characters
        pos, char = torch.where(probs_normalized == probs_normalized.max())
        guessed_char = idx_to_char[char[0].item()]
        guessed_pos = pos[0].item()

    return guessed_char, guessed_pos, probs_normalized



## Example
masked_word = 'app_e'
hangman_input = ' '.join(masked_word)
guessed_letters = ['a', 'p', 'e', 'b', 'c', 'd', 'h']


char, pos, probs = guess(model4, hangman_input, guessed_letters)

if len(set(masked_word)) == 1:
    print(f"Next guess: '{char}'")
else:
    print(f"Next guess: '{char}' at position {pos}")
    print("Probability matrix (masked positions only):")
    masked_positions = [idx for idx, char in enumerate(masked_word) if char == '_']
    for i in masked_positions:
        probs_dict = {idx_to_char[j]: float(probs[i, j]) for j in range(all_chars_len) if probs[i,j]>0}
        print(f"Position {i}: ", probs_dict)
        print(sorted(probs_dict, key=probs_dict.get, reverse=True))