In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Dataset
import numpy as np
from PIL import Image
import os

AttributeError: partially initialized module 'torchvision' has no attribute 'extension' (most likely due to a circular import)

In [None]:
class SudokuDataset(Dataset):
    def __init__(self, mnist_dataset, empty_dir, transform=None):
        self.mnist_dataset = mnist_dataset
        self.transform = transform
        self.empty_images = [Image.open(os.path.join(empty_dir, img)) for img in os.listdir(empty_dir)]
        self.empty_labels = [0] * len(self.empty_images)  # Label 0 for empty block
    
    def __len__(self):
        return len(self.mnist_dataset) + len(self.empty_images)
    
    def __getitem__(self, idx):
        if idx < len(self.mnist_dataset):
            image, label = self.mnist_dataset[idx]
            # Ignore digit 0, use 1-9
            if label == 0:
                return self.__getitem__(np.random.randint(0, self.__len__()))  # Skip digit 0
        else:
            image = self.empty_images[idx - len(self.mnist_dataset)]
            label = 0  # Label for empty block
        
        if self.transform:
            image = self.transform(image)
        
        return image, label


In [None]:
# Define data transformation
transform = transforms.Compose([
    transforms.Grayscale(num_output_channels=1),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load MNIST dataset (train and test)
train_mnist = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_mnist = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Directory with empty block images
empty_dir = './empty_blocks/'

# Create a 28x28 empty (white) image for an empty Sudoku block
empty_img = Image.fromarray(np.ones((28, 28), dtype=np.uint8) * 255)
empty_img.save('./empty_blocks/empty1.png')
empty_img.save('./empty_blocks/empty2.png')
# Create more variations if needed


# Combine MNIST digits and empty blocks into SudokuDataset
train_dataset = SudokuDataset(train_mnist, empty_dir, transform=transform)
test_dataset = SudokuDataset(test_mnist, empty_dir, transform=transform)

# Loaders for batching
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [None]:
class SudokuNet(nn.Module):
    def __init__(self):
        super(SudokuNet, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)  # 10 output classes: digits 1-9 + empty (0)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))
        x = x.view(-1, 64 * 7 * 7)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [None]:
def train(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # Statistics
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        running_loss += loss.item()
    
    print(f'Train Loss: {running_loss / len(train_loader):.3f}, Accuracy: {100. * correct / total:.2f}%')


def evaluate(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            running_loss += loss.item()
    
    print(f'Test Loss: {running_loss / len(test_loader):.3f}, Accuracy: {100. * correct / total:.2f}%')


In [None]:
# Setup device (GPU or CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Instantiate the model, define the loss function and optimizer
model = SudokuNet().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train for multiple epochs
epochs = 10
for epoch in range(epochs):
    print(f'Epoch {epoch+1}/{epochs}')
    train(model, train_loader, criterion, optimizer, device)
    evaluate(model, test_loader, criterion, device)
