In [1]:
import pandas as pd
import os       

In [2]:
data_path = os.path.join('data', 'puzzles')

data_pandas = pd.concat([
    pd.read_parquet(os.path.join(data_path, 'puzzles_3m.parquet')),
    pd.read_parquet(os.path.join(data_path, 'solutions_3m.parquet'))
    ],
    axis=1,
).astype('int32')

# 81: is labels

# NN Stuff

## Dataloading

In [3]:
import torch
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torch.distributions.categorical import Categorical

from sklearn.model_selection import train_test_split

In [4]:
class SudokuDataset(Dataset):
    def __init__(self, data) -> None:

        self.data = data
        super().__init__()

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        label = self.data[index, 81:].astype('int64')
        data = self.data[index, :81].astype('float32')

        return data, label


In [68]:
device = ('cuda' if torch.cuda.is_available() else 'cpu')
# split data
batch_size = 500
train_size = int(0.2 * len(data_pandas))

train_data, test_data = train_test_split(
    data_pandas, 
    test_size=len(data_pandas) - train_size, 
    train_size=train_size
)

# init torch dataloaders
train_data = DataLoader(
    SudokuDataset(train_data.to_numpy()),
    batch_size=batch_size 
)
test_data = DataLoader(
    SudokuDataset(test_data.to_numpy()),
    batch_size=batch_size
)

In [87]:
batch_size = 20000

## model definition

In [88]:
# Might need to come back with a convNN

class NeuralNetwork(nn.Module):
    def __init__(self, input_size=81) -> None:
        super().__init__(),
        self.input_size = input_size
        self.flatten = nn.Flatten()
        # need to convert data to float
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_size, 512),
            nn.ReLU(),
            nn.Linear(512,1024),
            nn.ReLU(),
            nn.Linear(1024,2048),
            nn.ReLU(),
            nn.Linear(2048,4096),
            nn.ReLU(),
            nn.Linear(4096,2048),
            nn.ReLU(),
            nn.Linear(2048, 1024),
            nn.ReLU(),
            nn.Linear(1024, input_size * 9),
            nn.ReLU()
        )

    def forward(self, x):
        # might not have to flatten as its already linear
        x = self.flatten(x)
        x = x.to(torch.float32)
        logits = self.linear_relu_stack(x)
        
        # reshape for categorical
        # len(logits) = batch_size 
        logits = logits.reshape(len(logits), self.input_size, 9)

        logits = torch.softmax(logits, dim=2)

        return logits

model = NeuralNetwork(
    input_size=data_pandas.shape[1] // 2
).to(device)

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=100)

## trainer definition

In [89]:
def train(dataloader, model, loss_fn, optimizer, batch_print=100):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        optimizer.zero_grad()
        X, y = X.to(device), y.to(device)
        y = y - 1 # accounting for index 0

        pred = model(X)
        pred = pred.view(-1, 81, 9) # reshape to [batch, 81, 9]

        loss = loss_fn(pred.permute(0, 2, 1), y) # Permute to [batch, 9, 81]
        loss.backward()
        optimizer.step()

        if batch % batch_print == 0:
            loss, current = loss.item(), (batch+1) * len(X)
            print(f'loss: {loss:>7f} [{current:<5d}/{size:>5d}]')

## Train

In [90]:
epochs = 15
for t in range(epochs):
    print(f'Epoch {t+1}\n-----------')
    train(train_data, model, loss_fn, optimizer, batch_print=100)

print('Done!')

Epoch 1
-----------
loss: 2.197226 [500  /600000]
loss: 2.260464 [50500/600000]
loss: 2.260613 [100500/600000]
loss: 2.258637 [150500/600000]
loss: 2.261279 [200500/600000]
loss: 2.262588 [250500/600000]
loss: 2.259798 [300500/600000]
loss: 2.261600 [350500/600000]
loss: 2.263798 [400500/600000]
loss: 2.259823 [450500/600000]
loss: 2.259428 [500500/600000]
loss: 2.261551 [550500/600000]
Epoch 2
-----------
loss: 2.245921 [500  /600000]
loss: 2.260464 [50500/600000]
loss: 2.260613 [100500/600000]
loss: 2.258637 [150500/600000]
loss: 2.261279 [200500/600000]
loss: 2.262588 [250500/600000]
loss: 2.259798 [300500/600000]
loss: 2.261600 [350500/600000]
loss: 2.263798 [400500/600000]
loss: 2.259823 [450500/600000]
loss: 2.259428 [500500/600000]
loss: 2.261551 [550500/600000]
Epoch 3
-----------
loss: 2.245921 [500  /600000]
loss: 2.260464 [50500/600000]
loss: 2.260613 [100500/600000]
loss: 2.258637 [150500/600000]
loss: 2.261279 [200500/600000]
loss: 2.262588 [250500/600000]
loss: 2.259798 [

KeyboardInterrupt: 

## Example Prediction

In [None]:
print(test_data.dataset.data.shape)

x = torch.tensor(test_data.dataset.data[0:2, :81]).to(device)
y = torch.tensor(test_data.dataset.data[0:2, 81:]).to(device)

pred = model(x)
print(x[0])
print('\npred')
print(torch.argmax(pred[0], dim=1))
print('\n')

print(y[0] - 1)


(600000, 162)
tensor([0, 0, 8, 0, 0, 6, 2, 0, 0, 3, 0, 0, 0, 1, 0, 5, 0, 0, 6, 0, 0, 0, 5, 0,
        0, 7, 4, 0, 7, 0, 6, 0, 0, 0, 2, 0, 4, 0, 0, 0, 0, 0, 0, 0, 1, 9, 0, 3,
        2, 0, 0, 0, 4, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 3, 2, 0, 0, 6, 0,
        0, 0, 0, 0, 0, 8, 0, 0, 0], device='cuda:0', dtype=torch.int32)

pred
tensor([6, 4, 5, 5, 0, 8, 8, 4, 5, 8, 7, 3, 1, 2, 2, 3, 1, 3, 0, 3, 2, 7, 1, 7,
        5, 4, 0, 5, 3, 7, 3, 4, 6, 7, 8, 1, 4, 2, 1, 3, 7, 5, 0, 0, 1, 3, 8, 1,
        0, 5, 7, 6, 3, 0, 2, 2, 1, 3, 6, 4, 3, 5, 3, 1, 5, 6, 1, 7, 7, 1, 0, 8,
        5, 0, 1, 5, 6, 8, 3, 7, 7], device='cuda:0')


tensor([6, 4, 7, 3, 2, 5, 1, 0, 8, 2, 3, 1, 8, 0, 6, 4, 7, 5, 5, 0, 8, 7, 4, 1,
        2, 6, 3, 4, 6, 0, 5, 3, 2, 8, 1, 7, 3, 1, 5, 4, 7, 8, 6, 2, 0, 8, 7, 2,
        1, 6, 0, 5, 3, 4, 0, 2, 4, 6, 5, 3, 7, 8, 1, 7, 8, 3, 2, 1, 4, 0, 5, 6,
        1, 5, 6, 0, 8, 7, 3, 4, 2], device='cuda:0', dtype=torch.int32)
