Math 5750/6880: Mathematics of Data Science \
Project 3

# 1. Fashion-MNIST image classification using sklearn

In [2]:
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
import seaborn as sns

In [8]:
from tensorflow.keras.datasets import fashion_mnist
from sklearn.preprocessing import StandardScaler

# Load Fashion-MNIST
# Classes (0-9): T-shirt/top, Trouser, Pullover, Dress, Coat, Sandal, Shirt, Sneaker, Bag, Ankle boot
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train = X_train.reshape(len(X_train), -1)
X_test  = X_test.reshape(len(X_test), -1)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

#For this command I asked chatgpt to give me an explination of what each parameter of the function MLPClassifier does to better understand how to modify them

#Baseline model with default parameters
mlp_baseline = MLPClassifier(hidden_layer_sizes=(128,),
                             activation='relu',
                             solver='adam',
                             learning_rate_init=0.001,
                             early_stopping=False,
                             alpha=0.0001,
                             max_iter=20,
                             random_state=42,
                             verbose=False)

start_time = time.time()
mlp_baseline.fit(X_train, y_train)
train_time = time.time() - start_time

y_pred = mlp_baseline.predict(X_test)

print("Baseline Accuracy:", accuracy_score(y_test, y_pred))
print(f"Baseline Training Time: {train_time:.2f} s")
print(confusion_matrix(y_test, y_pred))


Baseline Accuracy: 0.886
Baseline Training Time: 33.52 s
[[823   2  22  21   4   2 120   0   6   0]
 [  2 980   2   9   4   0   2   0   1   0]
 [ 19   0 847   9  63   1  59   0   2   0]
 [ 18   8  22 879  48   0  23   0   2   0]
 [  0   1 119  21 807   0  50   0   2   0]
 [  0   0   0   1   0 961   0  16   3  19]
 [102   2 106  20  70   0 691   0   9   0]
 [  0   0   0   0   0  19   0 954   0  27]
 [  7   0   7   4   5   6   8   3 959   1]
 [  1   0   0   0   0   9   1  30   0 959]]




In [11]:
#Funcation to iterate over different conditions with the baseline as default
def train_mlp(X_train, y_train, X_test, y_test,
              layers=(128,),
              activation='relu',
              solver='adam',
              learning=0.001,
              estop=False,
              alpha=0.0001,
              max_iter=20, #Do not change
              random_state=42, #Do not change
              verbose=False): #Do not change

    #Define Model
    model = MLPClassifier(hidden_layer_sizes=layers,
                          activation=activation,
                          solver=solver,
                          learning_rate_init=learning,
                          early_stopping=estop,
                          alpha=alpha,
                          max_iter=max_iter,
                          random_state=random_state,
                          verbose=verbose)

    #Train
    start_time = time.time()
    model.fit(X_train, y_train)
    train_time = time.time() - start_time

    #Evaluation Metrics
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    conf_mat = confusion_matrix(y_test, y_pred)

    print(f"Accuracy: {acc:.4f} | Training Time: {train_time:.2f} s")
    print(conf_mat)

    return acc, conf_mat

In [12]:
#Different Conditions
params = [
    ((128,), 'relu', 'adam', 0.001, False), #1 hidden layer neuron count test
    ((256,), 'relu', 'adam', 0.001, False), #1 hidden layer neuron count test
    ((256,128), 'relu', 'adam', 0.001, False), #2 hidden layers test
    ((256,128,64), 'relu', 'adam', 0.001, False), #3 hidden layers test
    ((128,), 'tanh', 'adam', 0.001, False), #Activation test
    ((128,), 'relu', 'sgd', 0.001, False), #Solver test
    ((128,), 'relu', 'adam', 0.01, False), #Large learning rate test
    ((128,), 'relu', 'adam', 0.0001, False), #Small learning rate test
    ((128,), 'relu', 'adam', 0.001, True) #Early stopping test
]

#Iterate over parameters
results = []
for i, (a,b,c,d,e) in enumerate(params, 1):
    print(f"\n--- Training Model {i} ---")
    acc, conf_mat = train_mlp(X_train, y_train, X_test, y_test, a, b, c, d, e)
    results.append((i, acc, conf_mat))



--- Training Model 1 ---




Accuracy: 0.8860 | Training Time: 68.15 s
[[823   2  22  21   4   2 120   0   6   0]
 [  2 980   2   9   4   0   2   0   1   0]
 [ 19   0 847   9  63   1  59   0   2   0]
 [ 18   8  22 879  48   0  23   0   2   0]
 [  0   1 119  21 807   0  50   0   2   0]
 [  0   0   0   1   0 961   0  16   3  19]
 [102   2 106  20  70   0 691   0   9   0]
 [  0   0   0   0   0  19   0 954   0  27]
 [  7   0   7   4   5   6   8   3 959   1]
 [  1   0   0   0   0   9   1  30   0 959]]

--- Training Model 2 ---




Accuracy: 0.8898 | Training Time: 101.83 s
[[849   3  15  22   6   1  95   0   9   0]
 [  5 977   2  10   5   0   1   0   0   0]
 [ 22   1 825   8  87   0  55   0   2   0]
 [ 18   8  21 871  63   0  14   0   5   0]
 [  0   1  95  16 844   0  41   0   3   0]
 [  0   0   0   1   0 960   0  24   1  14]
 [104   2  91  26  74   0 695   0   8   0]
 [  0   0   0   0   0   8   0 955   0  37]
 [ 10   1  10   5   4   6   5   4 955   0]
 [  1   0   0   1   0   7   1  23   0 967]]

--- Training Model 3 ---




Accuracy: 0.8901 | Training Time: 128.93 s
[[876   1  15  13   2   1  85   0   7   0]
 [  6 979   3   6   1   0   4   0   1   0]
 [ 23   0 846   7  59   0  64   0   1   0]
 [ 40   9  12 893  21   1  20   0   4   0]
 [  3   0 104  36 796   0  59   0   2   0]
 [  1   0   0   0   0 959   0  27   0  13]
 [157   1  84  25  60   0 667   0   6   0]
 [  0   0   0   0   0   9   0 977   0  14]
 [  3   0   6   3   5   3   9   6 965   0]
 [  0   0   0   0   0  11   1  45   0 943]]

--- Training Model 4 ---




Accuracy: 0.8863 | Training Time: 143.37 s
[[824   0  16  22   4   1 127   0   6   0]
 [  4 979   1  11   3   0   2   0   0   0]
 [ 19   0 847   7  50   1  72   0   4   0]
 [ 22   4  17 876  40   2  31   0   8   0]
 [  0   1 114  24 803   0  54   0   4   0]
 [  1   0   1   1   0 949   0  23   0  25]
 [134   0  88  20  54   0 696   0   8   0]
 [  0   0   0   0   0  12   0 956   1  31]
 [  4   0   3   4   3   4   9   4 969   0]
 [  0   0   0   0   0   3   1  32   0 964]]

--- Training Model 5 ---




Accuracy: 0.8837 | Training Time: 74.87 s
[[798   3  15  30   7   0 138   0   9   0]
 [  4 968   0  17   5   0   3   0   3   0]
 [ 14   1 817  14  85   2  62   1   4   0]
 [ 14   8  15 893  34   1  31   0   4   0]
 [  2   1  95  22 833   1  42   0   4   0]
 [  0   0   0   1   0 951   0  24   4  20]
 [ 90   2  83  28  76   0 714   0   6   1]
 [  0   0   0   0   0  23   0 949   0  28]
 [  4   1   7   8   6   4  13   5 952   0]
 [  0   0   0   0   0  12   1  25   0 962]]

--- Training Model 6 ---




Accuracy: 0.8646 | Training Time: 51.53 s
[[812   1  11  52   2   0 110   0  11   1]
 [  1 958   4  27   4   0   5   0   1   0]
 [ 16   1 783  16 105   1  73   0   5   0]
 [ 21  10  11 886  34   1  32   0   5   0]
 [  0   1  95  34 797   0  69   0   4   0]
 [  0   0   0   0   0 928   0  46   2  24]
 [118   2  95  42  85   1 647   0  10   0]
 [  0   0   0   0   0  30   0 939   0  31]
 [  1   1   6  10   4   6  19   4 949   0]
 [  0   0   0   1   0  15   1  36   0 947]]

--- Training Model 7 ---




Accuracy: 0.8671 | Training Time: 58.92 s
[[827   3   9  28   2   0 120   0  11   0]
 [  5 975   0  11   4   0   4   0   1   0]
 [ 49   1 776  19  81   1  72   0   1   0]
 [ 34  14  18 862  35   0  31   0   6   0]
 [  9   0 108  30 791   1  57   0   4   0]
 [  0   0   0   1   0 905   0  54  24  16]
 [121   1  75  33  96   1 661   0  12   0]
 [  0   0   0   0   0   5   0 962   1  32]
 [  5   1   0   6   2   2   8   3 973   0]
 [  0   0   0   0   0  10   1  50   0 939]]

--- Training Model 8 ---




Accuracy: 0.8781 | Training Time: 60.04 s
[[830   0  11  33   2   0 114   0   9   1]
 [  2 963   2  23   3   0   5   0   2   0]
 [ 13   0 795  15 101   1  72   0   3   0]
 [ 16   6  12 897  33   1  31   0   4   0]
 [  0   0  89  31 810   0  68   0   2   0]
 [  0   0   0   1   0 939   0  39   3  18]
 [114   1  83  34  77   0 681   0  10   0]
 [  0   0   0   0   0  22   0 955   0  23]
 [  3   1   3   8   4   5   9   4 962   1]
 [  0   0   0   1   0   8   1  41   0 949]]

--- Training Model 9 ---
Accuracy: 0.8860 | Training Time: 55.41 s
[[853   2   8  14   4   0 109   0  10   0]
 [  4 978   2   8   4   0   3   0   1   0]
 [ 27   1 818  11  67   1  72   1   2   0]
 [ 29   8   8 875  44   1  31   0   4   0]
 [  1   1 107  18 803   0  69   0   1   0]
 [  0   0   0   1   0 954   0  27   2  16]
 [128   0  74  21  68   2 700   0   7   0]
 [  0   0   0   0   0  16   0 961   0  23]
 [  8   0   5   3   3   3   6   5 966   1]
 [  0   0   0   1   0  11   2  34   0 952]]




In [13]:
#Attempt to run with optimal conditions
params = [
    ((256,128), 'relu', 'adam', 0.001, True)
]

results = []
for i, (a,b,c,d,e) in enumerate(params, 1):
    print(f"\n--- Training Model {i} ---")
    acc, conf_mat = train_mlp(X_train, y_train, X_test, y_test, a, b, c, d, e)
    results.append((i, acc, conf_mat))


--- Training Model 1 ---




Accuracy: 0.8894 | Training Time: 121.08 s
[[837   2  19  21   4   0 110   0   6   1]
 [  3 978   2  13   1   0   2   0   1   0]
 [ 15   1 837  14  76   1  56   0   0   0]
 [ 16   6   8 911  29   0  29   0   1   0]
 [  1   0 101  30 813   0  54   0   1   0]
 [  0   0   0   0   0 954   0  27   2  17]
 [125   1  91  28  73   0 676   0   6   0]
 [  0   0   0   0   0   8   0 958   1  33]
 [  6   1   6   9   2   6   7   3 960   0]
 [  0   0   0   0   0   9   1  20   0 970]]


# 3. Fashion-MNIST image classification  using pytorch

In [1]:
import numpy as np
from tensorflow.keras.datasets import fashion_mnist
import torch
from torch.utils.data import TensorDataset, DataLoader

# Load Fashion-MNIST
# Classes (0-9): T-shirt/top, Trouser, Pullover, Dress, Coat, Sandal, Shirt, Sneaker, Bag, Ankle boot
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

# scale to [0,1], add channel dimension -> (N, 1, 28, 28)
X_train = (X_train.astype("float32") / 255.0)[:, None, :, :]
X_test  = (X_test.astype("float32")  / 255.0)[:,  None, :, :]

y_train = y_train.astype(np.int64)
y_test  = y_test.astype(np.int64)

# train/val split: last 10k of train as validation
X_tr, X_val = X_train[:50000], X_train[50000:]
y_tr, y_val = y_train[:50000], y_train[50000:]

# wrap in PyTorch TensorDatasets and DataLoaders
train_ds = TensorDataset(torch.from_numpy(X_tr),  torch.from_numpy(y_tr))
val_ds   = TensorDataset(torch.from_numpy(X_val), torch.from_numpy(y_val))
test_ds  = TensorDataset(torch.from_numpy(X_test), torch.from_numpy(y_test))

train_loader = DataLoader(train_ds, batch_size=128, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=256, shuffle=False)
test_loader  = DataLoader(test_ds,  batch_size=256, shuffle=False)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [41]:
import torch.nn as nn
import torch.optim as optim
from torchsummary import summary

# In colab, you should ``change runtime type'' to GPU.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

#Baseline model
class BaseNN(nn.Module):
    def __init__(self):
        super(BaseNN, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28*28, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.flatten(x)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.fc3(x)
        return x

model_fc = BaseNN().to(device)
summary(model_fc, (1, 28, 28))

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_fc.parameters(), lr=0.001)

def train(model, loader, criterion, optimizer, epochs=5):
    start_time = time.time()
    model.train()
    for epoch in range(epochs):
        running_loss = 0
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(loader):.4f}")
    elapsed_time = time.time() - start_time
    print(f"Training completed in {elapsed_time:.2f} seconds\n")

def test(model, loader):
    model.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, preds = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (preds == labels).sum().item()
    print(f"Test Accuracy: {correct / total:.2f}")

train(model_fc, train_loader, criterion, optimizer, epochs=5)
test(model_fc, test_loader)

Using device: cuda
----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
           Flatten-1                  [-1, 784]               0
            Linear-2                  [-1, 256]         200,960
              ReLU-3                  [-1, 256]               0
            Linear-4                  [-1, 128]          32,896
              ReLU-5                  [-1, 128]               0
            Linear-6                   [-1, 10]           1,290
Total params: 235,146
Trainable params: 235,146
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.01
Params size (MB): 0.90
Estimated Total Size (MB): 0.91
----------------------------------------------------------------
Epoch 1/5, Loss: 0.5937
Epoch 2/5, Loss: 0.4023
Epoch 3/5, Loss: 0.3577
Epoch 4/5, Loss: 0.3286
Epoch 5/5, Loss: 0.3072
Training completed in 7.73 seco

In [42]:
#Model Variations

#Model with more neurons
print("--- More Neurons ---")
class BiggerNN(nn.Module):
    def __init__(self):
        super(BiggerNN, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28*28, 512)
        self.fc2 = nn.Linear(512, 256)
        self.fc3 = nn.Linear(256, 128)
        self.fc4 = nn.Linear(128, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.flatten(x)
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        return x

model_big = BiggerNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_big.parameters(), lr=0.001)

train(model_big, train_loader, criterion, optimizer, epochs=5)
test(model_big, test_loader)

#Model with tanh activation
print("--- Activation Method ---")
class TanhNN(nn.Module):
    def __init__(self):
        super(TanhNN, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28*28, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)
        self.tanh = nn.Tanh()

    def forward(self, x):
        x = self.flatten(x)
        x = self.tanh(self.fc1(x))
        x = self.tanh(self.fc2(x))
        x = self.fc3(x)
        return x

model_tanh = TanhNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_tanh.parameters(), lr=0.001)

train(model_tanh, train_loader, criterion, optimizer, epochs=5)
test(model_tanh, test_loader)

#Optimizer and Learning Rate
print("--- Optimizer and Learning Rate ---")
model_sgd = BaseNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model_sgd.parameters(), lr=0.05, momentum=0.9)

train(model_sgd, train_loader, criterion, optimizer, epochs=5)
test(model_sgd, test_loader)

#Early Stopping and Regulaization - This variation was written using the help of generative AI beacsue I could not figure out an equivlant to this intially
print("--- Early Stopping and Regularization ---")
class DropoutNN(nn.Module):
    def __init__(self):
        super(DropoutNN, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28*28, 256)
        self.drop1 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(256, 128)
        self.drop2 = nn.Dropout(0.3)
        self.fc3 = nn.Linear(128, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.flatten(x)
        x = self.relu(self.fc1(x))
        x = self.drop1(x)
        x = self.relu(self.fc2(x))
        x = self.drop2(x)
        x = self.fc3(x)
        return x

model_drop = DropoutNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model_drop.parameters(), lr=0.001)

best_acc = 0
patience, counter = 2, 0

for epoch in range(10):
    train(model_drop, train_loader, criterion, optimizer, epochs=1)
    model_drop.eval()
    correct, total = 0, 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model_drop(images)
            _, preds = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (preds == labels).sum().item()
    acc = 100 * correct / total
    print(f"Validation Accuracy: {acc:.2f}%")
    if acc > best_acc:
        best_acc = acc
        counter = 0
    else:
        counter += 1
        if counter >= patience:
            print("Early stopping!")
            break

--- More Neurons ---
Epoch 1/5, Loss: 0.5850
Epoch 2/5, Loss: 0.3855
Epoch 3/5, Loss: 0.3375
Epoch 4/5, Loss: 0.3144
Epoch 5/5, Loss: 0.2926
Training completed in 5.69 seconds

Test Accuracy: 0.88
--- Activation Method ---
Epoch 1/5, Loss: 0.5437
Epoch 2/5, Loss: 0.3869
Epoch 3/5, Loss: 0.3481
Epoch 4/5, Loss: 0.3246
Epoch 5/5, Loss: 0.3082
Training completed in 5.53 seconds

Test Accuracy: 0.87
--- Optimizer and Learning Rate ---
Epoch 1/5, Loss: 0.6374
Epoch 2/5, Loss: 0.4050
Epoch 3/5, Loss: 0.3627
Epoch 4/5, Loss: 0.3401
Epoch 5/5, Loss: 0.3194
Training completed in 4.56 seconds

Test Accuracy: 0.86
--- Early Stopping and Regularization ---
Epoch 1/1, Loss: 0.6691
Training completed in 1.07 seconds

Validation Accuracy: 83.05%
Epoch 1/1, Loss: 0.4411
Training completed in 1.15 seconds

Validation Accuracy: 85.05%
Epoch 1/1, Loss: 0.4004
Training completed in 1.37 seconds

Validation Accuracy: 85.35%
Epoch 1/1, Loss: 0.3765
Training completed in 1.17 seconds

Validation Accuracy: 86

In [43]:
#CNN Model
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
            nn.Conv2d(32, 64, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2, 2)
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 7 * 7, 128),
            nn.ReLU(),
            nn.Linear(128, 10)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

model_cnn = CNNModel().to(device)
summary(model_cnn, (1, 28, 28))

optimizer = optim.Adam(model_cnn.parameters(), lr=0.001)
train(model_cnn, train_loader, criterion, optimizer, epochs=5)
test(model_cnn, test_loader)


----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1           [-1, 32, 28, 28]             320
              ReLU-2           [-1, 32, 28, 28]               0
         MaxPool2d-3           [-1, 32, 14, 14]               0
            Conv2d-4           [-1, 64, 14, 14]          18,496
              ReLU-5           [-1, 64, 14, 14]               0
         MaxPool2d-6             [-1, 64, 7, 7]               0
           Flatten-7                 [-1, 3136]               0
            Linear-8                  [-1, 128]         401,536
              ReLU-9                  [-1, 128]               0
           Linear-10                   [-1, 10]           1,290
Total params: 421,642
Trainable params: 421,642
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.67
Params size (MB): 1.61
Estimated T

In [44]:
#Transfer Learning
resnet = models.resnet18(weights='IMAGENET1K_V1')
resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
resnet.fc = nn.Linear(resnet.fc.in_features, 10)
resnet = resnet.to(device)

optimizer = optim.Adam(resnet.parameters(), lr=1e-4)

train(resnet, train_loader, criterion, optimizer, epochs=5)
test(resnet, test_loader)


Epoch 1/5, Loss: 0.5530
Epoch 2/5, Loss: 0.3412
Epoch 3/5, Loss: 0.2843
Epoch 4/5, Loss: 0.2420
Epoch 5/5, Loss: 0.2120
Training completed in 100.02 seconds

Test Accuracy: 0.88
