In [1]:
import tensorflow as tf

# Load and prepare the MNIST dataset
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0

# Build the model
model = tf.keras.models.Sequential([
    tf.keras.layers.Flatten(input_shape=(28, 28)),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(10)
])

# Compile and train
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
model.compile(optimizer='adam', loss=loss_fn, metrics=['accuracy'])
model.fit(x_train, y_train, epochs=5)

# Evaluate
model.evaluate(x_test, y_test, verbose=2)

# Probability model
probability_model = tf.keras.Sequential([
    model,
    tf.keras.layers.Softmax()
])
probability_model(x_test[:5])


  super().__init__(**kwargs)


Epoch 1/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - accuracy: 0.8586 - loss: 0.4853
Epoch 2/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9544 - loss: 0.1515
Epoch 3/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9671 - loss: 0.1078
Epoch 4/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9740 - loss: 0.0880
Epoch 5/5
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 4ms/step - accuracy: 0.9769 - loss: 0.0733
313/313 - 1s - 4ms/step - accuracy: 0.9774 - loss: 0.0755


<tf.Tensor: shape=(5, 10), dtype=float32, numpy=
array([[6.87426791e-08, 3.79114629e-10, 5.61414367e-07, 3.94980307e-05,
        2.44268104e-12, 6.43464944e-08, 1.09934365e-14, 9.99959469e-01,
        2.03434123e-08, 4.34497764e-07],
       [2.86235968e-09, 3.02296190e-04, 9.99666095e-01, 2.98409122e-05,
        3.47161468e-14, 1.10542238e-07, 2.37503617e-08, 3.13636005e-14,
        1.64034100e-06, 2.15755724e-14],
       [4.71124252e-07, 9.99687076e-01, 7.54776047e-05, 2.09085351e-06,
        2.53736162e-05, 5.64270692e-07, 2.26362090e-06, 1.90234277e-04,
        1.44359037e-05, 1.97122131e-06],
       [9.99887705e-01, 1.28722286e-08, 8.88814830e-05, 9.16536521e-07,
        2.46999088e-09, 7.05126558e-06, 5.56670193e-06, 6.89807894e-06,
        1.17508527e-08, 3.01832506e-06],
       [2.95175892e-06, 1.76698336e-08, 1.33558451e-05, 6.03140677e-07,
        9.96140420e-01, 1.09101884e-05, 2.33089509e-06, 2.70344986e-04,
        3.53984137e-06, 3.55555187e-03]], dtype=float32)>

In [2]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten, Conv2D
from tensorflow.keras import Model

# Load and prepare dataset
mnist = tf.keras.datasets.mnist
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_test = x_train / 255.0, x_test / 255.0
x_train = x_train[..., tf.newaxis].astype("float32")
x_test = x_test[..., tf.newaxis].astype("float32")

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(32)

# Define model
class MyModel(Model):
    def __init__(self):
        super().__init__()
        self.conv1 = Conv2D(32, 3, activation='relu')
        self.flatten = Flatten()
        self.d1 = Dense(128, activation='relu')
        self.d2 = Dense(10)

    def call(self, x):
        x = self.conv1(x)
        x = self.flatten(x)
        x = self.d1(x)
        return self.d2(x)

model = MyModel()

# Loss and optimizer
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam()

# Metrics
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')

# Training functions
@tf.function
def train_step(images, labels):
    with tf.GradientTape() as tape:
        predictions = model(images, training=True)
        loss = loss_object(labels, predictions)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    train_loss(loss)
    train_accuracy(labels, predictions)

@tf.function
def test_step(images, labels):
    predictions = model(images, training=False)
    t_loss = loss_object(labels, predictions)
    test_loss(t_loss)
    test_accuracy(labels, predictions)

# Train
EPOCHS = 5
for epoch in range(EPOCHS):
    train_loss.reset_state()
    train_accuracy.reset_state()
    test_loss.reset_state()
    test_accuracy.reset_state()

    for images, labels in train_ds:
        train_step(images, labels)

    for test_images, test_labels in test_ds:
        test_step(test_images, test_labels)

    print(
        f'Epoch {epoch+1}, '
        f'Loss: {train_loss.result()}, '
        f'Accuracy: {train_accuracy.result()*100}, '
        f'Test Loss: {test_loss.result()}, '
        f'Test Accuracy: {test_accuracy.result()*100}'
    )


Epoch 1, Loss: 0.12532560527324677, Accuracy: 96.21333312988281, Test Loss: 0.06504768878221512, Test Accuracy: 97.7699966430664
Epoch 2, Loss: 0.03958753123879433, Accuracy: 98.73333740234375, Test Loss: 0.05408887192606926, Test Accuracy: 98.3699951171875
Epoch 3, Loss: 0.02017257921397686, Accuracy: 99.32666778564453, Test Loss: 0.057186059653759, Test Accuracy: 98.07999420166016
Epoch 4, Loss: 0.012950601987540722, Accuracy: 99.55500030517578, Test Loss: 0.05140584707260132, Test Accuracy: 98.54999542236328
Epoch 5, Loss: 0.008785175159573555, Accuracy: 99.70500183105469, Test Loss: 0.06520866602659225, Test Accuracy: 98.36000061035156


In [3]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

# Data
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)
test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

batch_size = 64
train_dataloader = DataLoader(training_data, batch_size=batch_size)
test_dataloader = DataLoader(test_data, batch_size=batch_size)

# Device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

model = NeuralNetwork().to(device)

# Loss and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=1e-3)

# Train function
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        pred = model(X)
        loss = loss_fn(pred, y)

        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch+1) * len(X)
            print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")

# Test function
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f}\n")

# Training loop
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_fn, optimizer)
    test(test_dataloader, model, loss_fn)
print("Done!")

# Save and load
torch.save(model.state_dict(), "model.pth")
model.load_state_dict(torch.load("model.pth", weights_only=True))

# Inference
classes = ["T-shirt/top", "Trouser", "Pullover", "Dress", "Coat",
           "Sandal", "Shirt", "Sneaker", "Bag", "Ankle boot"]
model.eval()
x, y = test_data[0][0], test_data[0][1]
with torch.no_grad():
    x = x.to(device)
    pred = model(x)
    predicted, actual = classes[pred[0].argmax(0)], classes[y]
    print(f'Predicted: "{predicted}", Actual: "{actual}"')


100%|█████████████████████████████████████████████████████████████████████████████| 26.4M/26.4M [00:02<00:00, 12.3MB/s]
100%|██████████████████████████████████████████████████████████████████████████████| 29.5k/29.5k [00:00<00:00, 486kB/s]
100%|█████████████████████████████████████████████████████████████████████████████| 4.42M/4.42M [00:01<00:00, 3.81MB/s]
100%|█████████████████████████████████████████████████████████████████████████████████████| 5.15k/5.15k [00:00<?, ?B/s]


Epoch 1
-------------------------------
loss: 2.310038 [   64/60000]
loss: 2.295533 [ 6464/60000]
loss: 2.279214 [12864/60000]
loss: 2.262993 [19264/60000]
loss: 2.255733 [25664/60000]
loss: 2.223017 [32064/60000]
loss: 2.223302 [38464/60000]
loss: 2.193006 [44864/60000]
loss: 2.187628 [51264/60000]
loss: 2.151226 [57664/60000]
Test Error: 
 Accuracy: 43.1%, Avg loss: 2.149732

Epoch 2
-------------------------------
loss: 2.166018 [   64/60000]
loss: 2.152042 [ 6464/60000]
loss: 2.101968 [12864/60000]
loss: 2.101223 [19264/60000]
loss: 2.060765 [25664/60000]
loss: 2.007085 [32064/60000]
loss: 2.023669 [38464/60000]
loss: 1.953706 [44864/60000]
loss: 1.949428 [51264/60000]
loss: 1.873785 [57664/60000]
Test Error: 
 Accuracy: 56.8%, Avg loss: 1.876494

Epoch 3
-------------------------------
loss: 1.916075 [   64/60000]
loss: 1.878450 [ 6464/60000]
loss: 1.776777 [12864/60000]
loss: 1.794409 [19264/60000]
loss: 1.696603 [25664/60000]
loss: 1.662526 [32064/60000]
loss: 1.666947 [38464/60