In [None]:
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# EMNIST images are 28x28 but rotated; we fix orientation with .transpose()
transform = transforms.Compose(
    [
        transforms.ToTensor(),
        transforms.Lambda(
            lambda x: x.transpose(1, 2).transpose(0, 1)
        ),  # rotate to upright
    ]
)

train_dataset = datasets.EMNIST(
    root="./data", split="balanced", train=True, download=True, transform=transform
)

test_dataset = datasets.EMNIST(
    root="./data", split="balanced", train=False, download=True, transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

print(f"Train samples: {len(train_dataset)}")

In [None]:
images, labels = next(iter(train_loader))
print(images.shape)  # expected: [64, 1, 28, 28]
print(labels[:10])

In [None]:
for images, labels in train_loader:
    images = images.view(images.size(0), -1)  # flatten to [batch, 784]
    break

print(images.shape)  # e.g., [64, 784]

In [None]:
import numpy as np


def dataset_to_numpy(dataset):
    X, y = [], []
    for img, label in dataset:
        X.append(img.numpy())
        y.append(label)
    X = np.stack(X)  # [N, 1, 28, 28]
    X = X.reshape(len(dataset), 28 * 28)  # flatten for FFNN
    X = X.astype("float32") / 255.0  # normalize
    y = np.array(y)
    return X, y


X_train, y_train = dataset_to_numpy(train_dataset)
X_test, y_test = dataset_to_numpy(test_dataset)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

from sklearn.model_selection import train_test_split

# Keep e.g. 30% of the training set
X_train_small, _, y_train_small, _ = train_test_split(
    X_train, y_train, train_size=0.30, stratify=y_train, random_state=42
)

print(X_train_small.shape, y_train_small.shape)

# **Note**: In class, you only need to run the cells above this cell once!

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

num_classes = 47  # EMNIST balanced

model = keras.Sequential(
    [
        layers.Input(shape=(784,)),
        layers.Dense(128, activation="relu"),
        # layers.Dropout(0.2),
        layers.Dense(num_classes, activation="softmax"),
    ]
)

model.summary()

In [None]:
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"],
)

In [None]:
history = model.fit(
    X_train_small,
    y_train_small,
    validation_split=0.1,
    epochs=10,
    batch_size=256,
    verbose=1,
)

In [None]:
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
print(f"Test accuracy: {test_acc:.4f}")

In [None]:
from matplotlib import pyplot as plt

plt.plot(history.history["accuracy"])
plt.plot(history.history["val_accuracy"])
plt.title("model accuracy")
plt.ylabel("accuracy")
plt.xlabel("epoch")
plt.legend(["train", "val"], loc="upper left")
plt.show()