In [1]:
# Simple Sudoku Digit Classification
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
import keras
from pathlib import Path
import sys

# Add src to path for dataset loading
sys.path.append(str(Path.cwd().parent))
from training.dataset import retrieve_digit_dataset

# Set random seeds
np.random.seed(42)
keras.utils.set_random_seed(42)

print("Environment ready!")


Environment ready!


In [2]:
# Load and explore dataset
print("Loading dataset...")
X, y = retrieve_digit_dataset('../../data/digits', return_categorical=False)
print(f"Dataset loaded: {X.shape[0]} samples")
print(f"Image shape: {X.shape[1:]}")

# Check label distribution
unique, counts = np.unique(y, return_counts=True)
print(f"\nLabel distribution:")
for label, count in zip(unique, counts):
    print(f"  Digit {label}: {count} samples")

# Convert to categorical
y_categorical = keras.utils.to_categorical(y, num_classes=10)

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_categorical, test_size=0.2, random_state=42, stratify=y
)

print(f"\nTrain set: {X_train.shape[0]} samples")
print(f"Test set: {X_test.shape[0]} samples")


Loading dataset...
Dataset loaded: 0 samples
Image shape: ()

Label distribution:


ValueError: With n_samples=0, test_size=0.2 and train_size=None, the resulting train set will be empty. Adjust any of the aforementioned parameters.

In [None]:
# Display sample images
fig, axes = plt.subplots(2, 5, figsize=(12, 6))
for i in range(10):
    # Find first occurrence of each digit
    digit_idx = np.where(y == i)[0][0]
    row, col = i // 5, i % 5
    axes[row, col].imshow(X[digit_idx].squeeze(), cmap='gray')
    axes[row, col].set_title(f'Digit {i}')
    axes[row, col].axis('off')
plt.tight_layout()
plt.show()


In [None]:
# Create simple Sequential model
def create_simple_model(input_shape=(50, 50, 1), num_classes=10):
    model = keras.Sequential([
        keras.layers.Conv2D(64, (3, 3), activation="swish", input_shape=input_shape, padding="same"),
        keras.layers.BatchNormalization(),
        keras.layers.MaxPooling2D((2, 2)),
        keras.layers.Dropout(0.4),

        keras.layers.Conv2D(64, (3, 3), activation="swish", padding="same"),
        keras.layers.BatchNormalization(),
        keras.layers.MaxPooling2D((2, 2)),
        keras.layers.Dropout(0.4),

        keras.layers.Conv2D(32, (2, 2), activation="swish", padding="same"),
        keras.layers.BatchNormalization(),
        keras.layers.Dropout(0.3),

        keras.layers.GlobalAveragePooling2D(),
        keras.layers.Dense(128, activation="swish"),
        keras.layers.Dropout(0.5),
        keras.layers.Dense(num_classes, activation="softmax")
    ])
    return model

        # Define layers


# Create and compile model
model = create_simple_model()
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

print("Model created!")
print(f"Parameters: {model.count_params():,}")
model.summary()


In [None]:
# Train the model
print("Training model...")
history = model.fit(
    X_train, y_train,
    batch_size=32,
    epochs=150,
    validation_data=(X_test, y_test),
    verbose=1
)

print("Training completed!")


In [None]:
# Plot training history
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
# Evaluate model
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"Test Loss: {test_loss:.4f}")

# Get predictions
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)
y_true_classes = np.argmax(y_test, axis=1)

# Confusion matrix
cm = confusion_matrix(y_true_classes, y_pred_classes)
plt.figure(figsize=(8, 6))
plt.imshow(cm, interpolation='nearest', cmap='Blues')
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(10)
plt.xticks(tick_marks, range(10))
plt.yticks(tick_marks, range(10))
plt.xlabel('Predicted')
plt.ylabel('Actual')

# Add text annotations
thresh = cm.max() / 2.
for i, j in np.ndindex(cm.shape):
    plt.text(j, i, format(cm[i, j], 'd'),
             ha="center", va="center",
             color="white" if cm[i, j] > thresh else "black")

plt.tight_layout()
plt.show()


In [None]:
# Classification report
print("Classification Report:")
print(classification_report(y_true_classes, y_pred_classes, 
                          target_names=[str(i) for i in range(10)]))

# Per-digit accuracy
print("\nPer-digit accuracy:")
for digit in range(10):
    digit_mask = y_true_classes == digit
    if np.any(digit_mask):
        digit_accuracy = np.mean(y_pred_classes[digit_mask] == digit)
        print(f"  Digit {digit}: {digit_accuracy:.3f}")

# Show some misclassified examples
misclassified = np.where(y_pred_classes != y_true_classes)[0]
if len(misclassified) > 0:
    print(f"\nShowing {min(10, len(misclassified))} misclassified examples:")
    fig, axes = plt.subplots(2, 5, figsize=(12, 6))
    for i in range(min(10, len(misclassified))):
        idx = misclassified[i]
        row, col = i // 5, i % 5
        axes[row, col].imshow(X_test[idx].squeeze(), cmap='gray')
        axes[row, col].set_title(f'True: {y_true_classes[idx]}, Pred: {y_pred_classes[idx]}')
        axes[row, col].axis('off')
    plt.tight_layout()
    plt.show()
