In [None]:
file_names = ["/content/mcts_puzzle_{}_results.csv".format(i) for i in range(1, 24) ]

dataframes = []

for file in file_names:
    df = pd.read_csv(file)
    dataframes.append(df)

combined_df = pd.concat(dataframes, ignore_index=True)

shuffled_df = combined_df.sample(frac=1).reset_index(drop=True)

print(shuffled_df.head())

shuffled_df.to_csv("/content/shuffled_results.csv", index=False)
df_original = pd.read_csv("/content/shuffled_results.csv")

In [None]:
import random

def generate_invalid_sudoku():
    grid = [[str(random.randint(1, 3)) for _ in range(3)] for _ in range(3)]
    return str(grid)

invalid_states = [{"state": generate_invalid_sudoku(), "q_value": -1.0} for _ in range(500)]

def generate_close_sudoku():
    grid = [[str((i + j) % 3 + 1) for j in range(3)] for i in range(3)]
    if random.random() > 0.5:
        grid[random.randint(0, 2)][random.randint(0, 2)] = '*'
    return str(grid)

close_states = [{"state": generate_close_sudoku(), "q_value": random.uniform(0.5, 0.9)} for _ in range(500)]


def generate_neutral_sudoku():
    """
    Generates a 3x3 grid that:
    - Fills some cells without violating Sudoku rules.
    - Ensures no duplicates in rows or columns.
    - Leaves enough empty cells to be considered "neutral."
    """

    grid = [['*' for _ in range(3)] for _ in range(3)]


    for i in range(3):
        used = set()
        for j in range(3):
            if random.random() > 0.7:
                num = str(random.randint(1, 3))
                while num in used:
                    num = str(random.randint(1, 3))
                grid[i][j] = num
                used.add(num)


    for col_idx in range(3):
        col_values = [grid[row_idx][col_idx] for row_idx in range(3) if grid[row_idx][col_idx] != '*']
        if len(col_values) != len(set(col_values)):

            for row_idx in range(3):
                grid[row_idx][col_idx] = '*'

    return grid


neutral_states = []
for _ in range(500):
    state = generate_neutral_sudoku()
    q_value = random.uniform(0.0, 0.2)
    neutral_states.append({"state": str(state), "q_value": round(q_value, 3)})


for example in neutral_states[:5]:
    print(f"State: {example['state']}")
    print(f"Q-Value: {example['q_value']}\n")




In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from transformers import AutoTokenizer


class TextRegressionDataset(Dataset):
    def __init__(self, texts, targets, tokenizer, max_len):
        self.texts = texts
        self.targets = targets
        self.tokenizer = tokenizer
        self.max_len = max_len

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        text = self.texts[idx]
        target = self.targets[idx]

        tokenized = self.tokenizer(
            text, padding="max_length", truncation=True, max_length=self.max_len, return_tensors="pt"
        )
        return {
            "input_ids": tokenized["input_ids"].squeeze(0),
            "attention_mask": tokenized["attention_mask"].squeeze(0),
            "target": torch.tensor(target, dtype=torch.float)
        }


class LSTMRegressor(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, pad_idx):
        super(LSTMRegressor, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.activation = nn.Tanh()

    def forward(self, input_ids, attention_mask):
        embedded = self.embedding(input_ids)
        packed_output, (hidden, cell) = self.lstm(embedded)
        output = self.fc(hidden[-1])
        return self.activation(output)


texts = list(df_combined['state'])
targets = list(df_combined['q_value'])


tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased")
max_len = 50

dataset = TextRegressionDataset(texts, targets, tokenizer, max_len)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=2)


vocab_size = tokenizer.vocab_size
embedding_dim = 128
hidden_dim = 64
output_dim = 1
pad_idx = tokenizer.pad_token_id

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = LSTMRegressor(vocab_size, embedding_dim, hidden_dim, output_dim, pad_idx).to(device)
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=1e-3)

epochs = 7
for epoch in range(epochs):
    # Training phase
    model.train()
    epoch_train_loss = 0
    for batch in train_loader:
        optimizer.zero_grad()
        input_ids = batch["input_ids"].to(device)
        attention_mask = batch["attention_mask"].to(device)
        targets = batch["target"].to(device)

        predictions = model(input_ids, attention_mask)
        loss = criterion(predictions.squeeze(), targets)
        loss.backward()
        optimizer.step()
        epoch_train_loss += loss.item()

    # Validation phase
    model.eval()
    epoch_val_loss = 0
    with torch.no_grad():
        for batch in val_loader:
            input_ids = batch["input_ids"].to(device)
            attention_mask = batch["attention_mask"].to(device)
            targets = batch["target"].to(device)

            predictions = model(input_ids, attention_mask)
            loss = criterion(predictions.squeeze(), targets)
            epoch_val_loss += loss.item()

    print(f"Epoch {epoch + 1}/{epochs}, Training Loss: {epoch_train_loss / len(train_loader):.4f}, "
          f"Validation Loss: {epoch_val_loss / len(val_loader):.4f}")

In [None]:
input_texts = [
    # Test 1: A typical puzzle with a mix of numbers
    "[['1', '3', '2'], ['2', '1', '3'], ['3', '2', '1']]",
    # Test 2: A uniform puzzle
    "[['1', '1', '1'], ['1', '1', '1'], ['1', '1', '1']]",
    # Test 3: A puzzle with stars ('*') representing unknown values
    "[['*', '3', '2'], ['2', '*', '*'], ['3', '*', '1']]",
    # Test 4: A partially solved puzzle
    "[['1', '3', '2'], ['2', '1', '3'], ['3', '*', '*']]",
    # Test 5: A puzzle with descending numbers
    "[['3', '2', '1'], ['3', '2', '1'], ['3', '2', '1']]",
    # Test 6: A diagonal puzzle
    "[['1', '*', '*'], ['*', '2', '*'], ['*', '*', '3']]",
    # Test 7: Randomized puzzle values
    "[['3', '1', '2'], ['2', '3', '1'], ['1', '2', '3']]",
    # Test 8: An edge case with all zeros
    "[['0', '0', '0'], ['0', '0', '0'], ['0', '0', '0']]",
    # Test 9: A puzzle with mixed known and unknown values
    "[['1', '*', '2'], ['*', '1', '*'], ['2', '*', '3']]",
    # Test 10: An inverse diagonal pattern
    "[['*', '*', '1'], ['*', '2', '*'], ['3', '*', '*']]"
]

input_data = tokenizer(
    input_texts, padding="max_length", truncation=True, max_length=max_len, return_tensors="pt"
)
input_ids = input_data["input_ids"].to(device)
attention_mask = input_data["attention_mask"].to(device)

model.eval()
with torch.no_grad():
    predicted_scores = model(input_ids, attention_mask)

for text, score in zip(input_texts, predicted_scores.squeeze().tolist()):
    print(f"Puzzle: {text}")
    print(f"Predicted Score: {score:.4f}")

In [None]:
model_save_path = "lstm_regressor.pth"
torch.save(model.state_dict(), model_save_path)
print(f"Model saved to {model_save_path}")