In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
import string
from tqdm import tqdm

MAX_LEN = 20
CHAR2IDX = {c: i+1 for i, c in enumerate(string.ascii_lowercase)}
CHAR2IDX['_'] = 27
IDX2CHAR = {v: k for k, v in CHAR2IDX.items()}
VOCAB_SIZE = 28  # 26 letters + _ + padding (0)
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

import torch
import torch.nn as nn

class HangmanCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.embedding = nn.Embedding(VOCAB_SIZE, 256, padding_idx=0)
        
        # Multiple kernel sizes to capture different n-gram patterns
        self.conv1_2 = nn.Conv1d(256, 128, kernel_size=2, padding=0)
        self.conv1_3 = nn.Conv1d(256, 128, kernel_size=3, padding=1)
        self.conv1_4 = nn.Conv1d(256, 128, kernel_size=4, padding=2)
        
        self.bn1 = nn.BatchNorm1d(384)  # 128*3 channels
        self.dropout1 = nn.Dropout(0.3)
        
        self.conv2 = nn.Conv1d(384, 512, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm1d(512)
        self.dropout2 = nn.Dropout(0.3)
        
        self.lstm = nn.LSTM(input_size=512, hidden_size=512, num_layers=3,
                           bidirectional=True, batch_first=True, dropout=0.3)
        
        self.attention = nn.MultiheadAttention(embed_dim=1024, num_heads=8)
        
        self.fc1 = nn.Linear(1024, 512)
        self.fc2 = nn.Linear(512, 26)
        
    def forward(self, x):
        x = self.embedding(x).transpose(1, 2)
        
        # Parallel convolutions
        x2 = torch.relu(self.conv1_2(x))
        x3 = torch.relu(self.conv1_3(x))
        x4 = torch.relu(self.conv1_4(x))
        
        # Concatenate along channel dimension
        x = torch.cat([x2, x3, x4], dim=1)
        x = self.dropout1(torch.relu(self.bn1(x)))
        
        x = self.dropout2(torch.relu(self.bn2(self.conv2(x))))
        
        x = x.transpose(1, 2)  # (batch, seq_len, channels)
        
        # BiLSTM
        lstm_out, _ = self.lstm(x)
        
        # Self-attention
        attn_out, _ = self.attention(lstm_out.transpose(0,1), 
                                   lstm_out.transpose(0,1),
                                   lstm_out.transpose(0,1))
        attn_out = attn_out.transpose(0,1)
        
        # Global max pooling and average pooling
        max_pool = torch.max(attn_out, dim=1)[0]
        avg_pool = torch.mean(attn_out, dim=1)
        
        # Combine pooling results
        x = max_pool + avg_pool
        
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Utility functions
def encode_word(word):
    return [CHAR2IDX.get(c, 0) for c in word]

def pad_word(encoded):
    if len(encoded) > MAX_LEN:
        return encoded[:MAX_LEN]
    return encoded + [0]*(MAX_LEN - len(encoded))

def mask_word(word, mask_ratio=0.4):
    masked = list(word)
    num_to_mask = max(1, int(len(word) * mask_ratio))
    indices = random.sample(range(len(word)), num_to_mask)
    for idx in indices:
        masked[idx] = '_'
    return "".join(masked), [word[i] for i in indices]  # returns masked word and original letters masked

# Load dictionary
with open("/content/drive/MyDrive/words_250000_train.txt") as f:
    dictionary = [line.strip().lower() for line in f if line.strip()]

# Prepare Dataset
train_data = []
for word in dictionary:
    if not word.isalpha() or len(word) > MAX_LEN:
        continue
    for _ in range(2):  # augment: 2 masked versions per word
        masked_word, hidden_letters = mask_word(word)
        for letter in hidden_letters:
            x = pad_word(encode_word(masked_word))
            y = CHAR2IDX[letter]
            train_data.append((x, y))

print(f"Total training samples: {len(train_data)}")

# Dataloader
class HangmanDataset(torch.utils.data.Dataset):
    def __init__(self, data):
        self.data = data
    def __len__(self):
        return len(self.data)
    def __getitem__(self, idx):
        x, y = self.data[idx]
        return torch.tensor(x, dtype=torch.long), torch.tensor(y-1, dtype=torch.long)  # 0-based

train_loader = torch.utils.data.DataLoader(HangmanDataset(train_data), batch_size=512, shuffle=True)

# Training
model = HangmanCNN().to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()

for epoch in range(10):
    model.train()
    running_loss = 0
    progress_bar = tqdm(train_loader, desc=f'Epoch {epoch+1}')
    for x_batch, y_batch in progress_bar:
        x_batch, y_batch = x_batch.to(DEVICE), y_batch.to(DEVICE)
        optimizer.zero_grad()
        logits = model(x_batch)
        loss = criterion(logits, y_batch)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
        progress_bar.set_postfix({'loss': f'{loss.item():.4f}'})
    print(f"Epoch {epoch+1}: Loss {running_loss/len(train_loader):.4f}")

torch.save(model.state_dict(), "hangman_cnn_updated.pth")
print("Model saved as hangman_cnn_updated.pth")

In [2]:
import torch
import torch.nn as nn
import string
import re
import collections

# Constants for model
MAX_LEN = 20
CHAR2IDX = {c: i+1 for i, c in enumerate(string.ascii_lowercase)}
CHAR2IDX['_'] = 27
IDX2CHAR = {v: k for k, v in CHAR2IDX.items()}
VOCAB_SIZE = 28
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

# Model class
class HangmanCNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.embedding = nn.Embedding(VOCAB_SIZE, 128, padding_idx=0)
        
        self.conv1 = nn.Conv1d(128, 256, kernel_size=3, padding=1)
        self.bn1 = nn.BatchNorm1d(256)
        
        self.conv2 = nn.Conv1d(256, 256, kernel_size=3, padding=1)
        self.bn2 = nn.BatchNorm1d(256)
        
        self.lstm = nn.LSTM(input_size=256, hidden_size=256, num_layers=2, 
                            bidirectional=True, batch_first=True)
        
        self.fc = nn.Linear(512, 26)  # 2*256 from BiLSTM
        
    def forward(self, x):
        x = self.embedding(x).transpose(1, 2)  # (batch, embed_dim, seq_len)
        
        x = torch.relu(self.bn1(self.conv1(x)))
        x = torch.relu(self.bn2(self.conv2(x)))
        
        x = x.transpose(1, 2)  # (batch, seq_len, channels) for LSTM
        
        output, _ = self.lstm(x)  # BiLSTM
        
        x = output[:, -1, :]  # take the final timestep
        
        x = self.fc(x)
        return x

# Lazy load model
_model = None
def load_model():
    global _model
    if _model is None:
        _model = HangmanCNN().to(DEVICE)
        _model.load_state_dict(torch.load("/Users/dhairya/cs projects/trexquant assignment/hangman_cnn_updated.pth", map_location=DEVICE))
        _model.eval()
    return _model

# Helper to encode input
def encode_input(word):
    cleaned = word[::2].replace(' ', '').lower()
    encoded = [CHAR2IDX.get(c, 0) for c in cleaned]
    if len(encoded) > MAX_LEN:
        encoded = encoded[:MAX_LEN]
    else:
        encoded += [0] * (MAX_LEN - len(encoded))
    return torch.tensor(encoded, dtype=torch.long).unsqueeze(0).to(DEVICE)

# 🚀 THE REPLACED FUNCTION
def guess(self, word):  # word example: "_ p p _ e "
    model = load_model()

    input_tensor = encode_input(word)  # (1, MAX_LEN)
    logits = model(input_tensor)       # (1, 26)
    probs = torch.softmax(logits, dim=1).detach().cpu().numpy()[0]

    # Sort predictions by probability descending
    letter_indices = probs.argsort()[::-1]

    # Find the highest probability letter that hasn't been guessed yet
    for idx in letter_indices:
        letter = string.ascii_lowercase[idx]
        if letter not in self.guessed_letters:
            return letter

    # Fallback (should rarely happen)
    for letter in string.ascii_lowercase:
        if letter not in self.guessed_letters:
            return letter

    return 'e'  # Emergency fallback