Math 5750/6880: Mathematics of Data Science \
Project 3

# 1. Fashion-MNIST image classification using sklearn

In [2]:
import pandas as pd
import numpy as np
import time
import matplotlib.pyplot as plt
import seaborn as sns

In [8]:
from tensorflow.keras.datasets import fashion_mnist
from sklearn.preprocessing import StandardScaler

# Load Fashion-MNIST
# Classes (0-9): T-shirt/top, Trouser, Pullover, Dress, Coat, Sandal, Shirt, Sneaker, Bag, Ankle boot
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
X_train = X_train.reshape(len(X_train), -1)
X_test  = X_test.reshape(len(X_test), -1)

# Scale features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [9]:
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

#For this command I asked chatgpt to give me an explination of what each parameter of the function MLPClassifier does to better understand how to modify them

#Baseline model with default parameters
mlp_baseline = MLPClassifier(hidden_layer_sizes=(128,),
                             activation='relu',
                             solver='adam',
                             learning_rate_init=0.001,
                             early_stopping=False,
                             alpha=0.0001,
                             max_iter=20,
                             random_state=42,
                             verbose=False)

start_time = time.time()
mlp_baseline.fit(X_train, y_train)
train_time = time.time() - start_time

y_pred = mlp_baseline.predict(X_test)

print("Baseline Accuracy:", accuracy_score(y_test, y_pred))
print(f"Baseline Training Time: {train_time:.2f} s")
print(confusion_matrix(y_test, y_pred))


Baseline Accuracy: 0.886
Baseline Training Time: 33.52 s
[[823   2  22  21   4   2 120   0   6   0]
 [  2 980   2   9   4   0   2   0   1   0]
 [ 19   0 847   9  63   1  59   0   2   0]
 [ 18   8  22 879  48   0  23   0   2   0]
 [  0   1 119  21 807   0  50   0   2   0]
 [  0   0   0   1   0 961   0  16   3  19]
 [102   2 106  20  70   0 691   0   9   0]
 [  0   0   0   0   0  19   0 954   0  27]
 [  7   0   7   4   5   6   8   3 959   1]
 [  1   0   0   0   0   9   1  30   0 959]]




In [11]:
#Funcation to iterate over different conditions with the baseline as default
def train_mlp(X_train, y_train, X_test, y_test,
              layers=(128,),
              activation='relu',
              solver='adam',
              learning=0.001,
              estop=False,
              alpha=0.0001,
              max_iter=20, #Do not change
              random_state=42, #Do not change
              verbose=False): #Do not change

    #Define Model
    model = MLPClassifier(hidden_layer_sizes=layers,
                          activation=activation,
                          solver=solver,
                          learning_rate_init=learning,
                          early_stopping=estop,
                          alpha=alpha,
                          max_iter=max_iter,
                          random_state=random_state,
                          verbose=verbose)

    #Train
    start_time = time.time()
    model.fit(X_train, y_train)
    train_time = time.time() - start_time

    #Evaluation Metrics
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    conf_mat = confusion_matrix(y_test, y_pred)

    print(f"Accuracy: {acc:.4f} | Training Time: {train_time:.2f} s")
    print(conf_mat)

    return acc, conf_mat

In [12]:
#Different Conditions
params = [
    ((128,), 'relu', 'adam', 0.001, False), #1 hidden layer neuron count test
    ((256,), 'relu', 'adam', 0.001, False), #1 hidden layer neuron count test
    ((256,128), 'relu', 'adam', 0.001, False), #2 hidden layers test
    ((256,128,64), 'relu', 'adam', 0.001, False), #3 hidden layers test
    ((128,), 'tanh', 'adam', 0.001, False), #Activation test
    ((128,), 'relu', 'sgd', 0.001, False), #Solver test
    ((128,), 'relu', 'adam', 0.01, False), #Large learning rate test
    ((128,), 'relu', 'adam', 0.0001, False), #Small learning rate test
    ((128,), 'relu', 'adam', 0.001, True) #Early stopping test
]

#Iterate over parameters
results = []
for i, (a,b,c,d,e) in enumerate(params, 1):
    print(f"\n--- Training Model {i} ---")
    acc, conf_mat = train_mlp(X_train, y_train, X_test, y_test, a, b, c, d, e)
    results.append((i, acc, conf_mat))



--- Training Model 1 ---




Accuracy: 0.8860 | Training Time: 68.15 s
[[823   2  22  21   4   2 120   0   6   0]
 [  2 980   2   9   4   0   2   0   1   0]
 [ 19   0 847   9  63   1  59   0   2   0]
 [ 18   8  22 879  48   0  23   0   2   0]
 [  0   1 119  21 807   0  50   0   2   0]
 [  0   0   0   1   0 961   0  16   3  19]
 [102   2 106  20  70   0 691   0   9   0]
 [  0   0   0   0   0  19   0 954   0  27]
 [  7   0   7   4   5   6   8   3 959   1]
 [  1   0   0   0   0   9   1  30   0 959]]

--- Training Model 2 ---




Accuracy: 0.8898 | Training Time: 101.83 s
[[849   3  15  22   6   1  95   0   9   0]
 [  5 977   2  10   5   0   1   0   0   0]
 [ 22   1 825   8  87   0  55   0   2   0]
 [ 18   8  21 871  63   0  14   0   5   0]
 [  0   1  95  16 844   0  41   0   3   0]
 [  0   0   0   1   0 960   0  24   1  14]
 [104   2  91  26  74   0 695   0   8   0]
 [  0   0   0   0   0   8   0 955   0  37]
 [ 10   1  10   5   4   6   5   4 955   0]
 [  1   0   0   1   0   7   1  23   0 967]]

--- Training Model 3 ---




Accuracy: 0.8901 | Training Time: 128.93 s
[[876   1  15  13   2   1  85   0   7   0]
 [  6 979   3   6   1   0   4   0   1   0]
 [ 23   0 846   7  59   0  64   0   1   0]
 [ 40   9  12 893  21   1  20   0   4   0]
 [  3   0 104  36 796   0  59   0   2   0]
 [  1   0   0   0   0 959   0  27   0  13]
 [157   1  84  25  60   0 667   0   6   0]
 [  0   0   0   0   0   9   0 977   0  14]
 [  3   0   6   3   5   3   9   6 965   0]
 [  0   0   0   0   0  11   1  45   0 943]]

--- Training Model 4 ---




Accuracy: 0.8863 | Training Time: 143.37 s
[[824   0  16  22   4   1 127   0   6   0]
 [  4 979   1  11   3   0   2   0   0   0]
 [ 19   0 847   7  50   1  72   0   4   0]
 [ 22   4  17 876  40   2  31   0   8   0]
 [  0   1 114  24 803   0  54   0   4   0]
 [  1   0   1   1   0 949   0  23   0  25]
 [134   0  88  20  54   0 696   0   8   0]
 [  0   0   0   0   0  12   0 956   1  31]
 [  4   0   3   4   3   4   9   4 969   0]
 [  0   0   0   0   0   3   1  32   0 964]]

--- Training Model 5 ---




Accuracy: 0.8837 | Training Time: 74.87 s
[[798   3  15  30   7   0 138   0   9   0]
 [  4 968   0  17   5   0   3   0   3   0]
 [ 14   1 817  14  85   2  62   1   4   0]
 [ 14   8  15 893  34   1  31   0   4   0]
 [  2   1  95  22 833   1  42   0   4   0]
 [  0   0   0   1   0 951   0  24   4  20]
 [ 90   2  83  28  76   0 714   0   6   1]
 [  0   0   0   0   0  23   0 949   0  28]
 [  4   1   7   8   6   4  13   5 952   0]
 [  0   0   0   0   0  12   1  25   0 962]]

--- Training Model 6 ---




Accuracy: 0.8646 | Training Time: 51.53 s
[[812   1  11  52   2   0 110   0  11   1]
 [  1 958   4  27   4   0   5   0   1   0]
 [ 16   1 783  16 105   1  73   0   5   0]
 [ 21  10  11 886  34   1  32   0   5   0]
 [  0   1  95  34 797   0  69   0   4   0]
 [  0   0   0   0   0 928   0  46   2  24]
 [118   2  95  42  85   1 647   0  10   0]
 [  0   0   0   0   0  30   0 939   0  31]
 [  1   1   6  10   4   6  19   4 949   0]
 [  0   0   0   1   0  15   1  36   0 947]]

--- Training Model 7 ---




Accuracy: 0.8671 | Training Time: 58.92 s
[[827   3   9  28   2   0 120   0  11   0]
 [  5 975   0  11   4   0   4   0   1   0]
 [ 49   1 776  19  81   1  72   0   1   0]
 [ 34  14  18 862  35   0  31   0   6   0]
 [  9   0 108  30 791   1  57   0   4   0]
 [  0   0   0   1   0 905   0  54  24  16]
 [121   1  75  33  96   1 661   0  12   0]
 [  0   0   0   0   0   5   0 962   1  32]
 [  5   1   0   6   2   2   8   3 973   0]
 [  0   0   0   0   0  10   1  50   0 939]]

--- Training Model 8 ---




Accuracy: 0.8781 | Training Time: 60.04 s
[[830   0  11  33   2   0 114   0   9   1]
 [  2 963   2  23   3   0   5   0   2   0]
 [ 13   0 795  15 101   1  72   0   3   0]
 [ 16   6  12 897  33   1  31   0   4   0]
 [  0   0  89  31 810   0  68   0   2   0]
 [  0   0   0   1   0 939   0  39   3  18]
 [114   1  83  34  77   0 681   0  10   0]
 [  0   0   0   0   0  22   0 955   0  23]
 [  3   1   3   8   4   5   9   4 962   1]
 [  0   0   0   1   0   8   1  41   0 949]]

--- Training Model 9 ---
Accuracy: 0.8860 | Training Time: 55.41 s
[[853   2   8  14   4   0 109   0  10   0]
 [  4 978   2   8   4   0   3   0   1   0]
 [ 27   1 818  11  67   1  72   1   2   0]
 [ 29   8   8 875  44   1  31   0   4   0]
 [  1   1 107  18 803   0  69   0   1   0]
 [  0   0   0   1   0 954   0  27   2  16]
 [128   0  74  21  68   2 700   0   7   0]
 [  0   0   0   0   0  16   0 961   0  23]
 [  8   0   5   3   3   3   6   5 966   1]
 [  0   0   0   1   0  11   2  34   0 952]]




In [13]:
#Attempt to run with optimal conditions
params = [
    ((256,128), 'relu', 'adam', 0.001, True)
]

results = []
for i, (a,b,c,d,e) in enumerate(params, 1):
    print(f"\n--- Training Model {i} ---")
    acc, conf_mat = train_mlp(X_train, y_train, X_test, y_test, a, b, c, d, e)
    results.append((i, acc, conf_mat))


--- Training Model 1 ---




Accuracy: 0.8894 | Training Time: 121.08 s
[[837   2  19  21   4   0 110   0   6   1]
 [  3 978   2  13   1   0   2   0   1   0]
 [ 15   1 837  14  76   1  56   0   0   0]
 [ 16   6   8 911  29   0  29   0   1   0]
 [  1   0 101  30 813   0  54   0   1   0]
 [  0   0   0   0   0 954   0  27   2  17]
 [125   1  91  28  73   0 676   0   6   0]
 [  0   0   0   0   0   8   0 958   1  33]
 [  6   1   6   9   2   6   7   3 960   0]
 [  0   0   0   0   0   9   1  20   0 970]]


# 3. Fashion-MNIST image classification  using pytorch

In [1]:
import numpy as np
from tensorflow.keras.datasets import fashion_mnist
import torch
from torch.utils.data import TensorDataset, DataLoader

# Load Fashion-MNIST
# Classes (0-9): T-shirt/top, Trouser, Pullover, Dress, Coat, Sandal, Shirt, Sneaker, Bag, Ankle boot
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

# scale to [0,1], add channel dimension -> (N, 1, 28, 28)
X_train = (X_train.astype("float32") / 255.0)[:, None, :, :]
X_test  = (X_test.astype("float32")  / 255.0)[:,  None, :, :]

y_train = y_train.astype(np.int64)
y_test  = y_test.astype(np.int64)

# train/val split: last 10k of train as validation
X_tr, X_val = X_train[:50000], X_train[50000:]
y_tr, y_val = y_train[:50000], y_train[50000:]

# wrap in PyTorch TensorDatasets and DataLoaders
train_ds = TensorDataset(torch.from_numpy(X_tr),  torch.from_numpy(y_tr))
val_ds   = TensorDataset(torch.from_numpy(X_val), torch.from_numpy(y_val))
test_ds  = TensorDataset(torch.from_numpy(X_test), torch.from_numpy(y_test))

train_loader = DataLoader(train_ds, batch_size=128, shuffle=True)
val_loader   = DataLoader(val_ds,   batch_size=256, shuffle=False)
test_loader  = DataLoader(test_ds,  batch_size=256, shuffle=False)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
[1m29515/29515[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
[1m26421880/26421880[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
[1m5148/5148[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
[1m4422102/4422102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step


In [13]:
import torch.nn as nn
import torch.optim as optim

# In colab, you should ``change runtime type'' to GPU.
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

#Defining the model
class MLP(nn.Module):
    def __init__(self, hidden_sizes=[256, 128], activation='relu', dropout=0.0):
        super().__init__()
        self.flatten = nn.Flatten()
        layers = []
        input_size = 28 * 28

        for h in hidden_sizes:
            layers.append(nn.Linear(input_size, h))
            if activation == 'relu':
                layers.append(nn.ReLU())
            elif activation == 'tanh':
                layers.append(nn.Tanh())
            elif activation == 'sigmoid':
                layers.append(nn.Sigmoid())
            if dropout > 0:
                layers.append(nn.Dropout(dropout))
            input_size = h

        layers.append(nn.Linear(input_size, 10))
        self.net = nn.Sequential(*layers)

    def forward(self, x):
        x = self.flatten(x)
        return self.net(x)

#Function for Training
def train_model(model, train_loader, val_loader, optimizer, criterion, epochs=5, device='cuda'):
    model.to(device)
    start_time = time.time()

    for epoch in range(epochs):
        model.train()
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        model.eval()
        val_correct, val_total = 0, 0
        with torch.no_grad():
            for images, labels in val_loader:
                images, labels = images.to(device), labels.to(device)
                preds = model(images).argmax(dim=1)
                val_correct += (preds == labels).sum().item()
                val_total += labels.size(0)
        val_acc = val_correct / val_total
        print(f"Epoch [{epoch+1}/{epochs}] - Loss: {running_loss/len(train_loader):.4f}, Val Acc: {val_acc:.4f}")

    total_time = time.time() - start_time
    return total_time

#Defining the test run
def evaluate_model(model, test_loader, device='cuda'):
    model.eval()
    y_true, y_pred = [], []
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            preds = model(images).argmax(dim=1)
            y_true.extend(labels.cpu().numpy())
            y_pred.extend(preds.cpu().numpy())
    return np.array(y_true), np.array(y_pred)

Using device: cuda


In [17]:
#Variations
param_sets = [
    {"hidden_sizes": [128], "activation": "relu", "optimizer": "adam", "lr": 0.001, "dropout": 0.0},
    {"hidden_sizes": [256, 128], "activation": "relu", "optimizer": "adam", "lr": 0.001, "dropout": 0.0},
    {"hidden_sizes": [256, 128, 64], "activation": "tanh", "optimizer": "adam", "lr": 0.001, "dropout": 0.2},
    {"hidden_sizes": [128], "activation": "relu", "optimizer": "sgd", "lr": 0.01, "dropout": 0.0},
    {"hidden_sizes": [128], "activation": "relu", "optimizer": "adam", "lr": 0.001, "dropout": 0.3},
]

#Iterate through variations
results = []
for i, params in enumerate(param_sets, 1):
    print(f"\n--- Training Model {i}: {params} ---")
    model = MLP(hidden_sizes=params["hidden_sizes"],
                activation=params["activation"],
                dropout=params["dropout"]).to(device)

    criterion = nn.CrossEntropyLoss()
    if params["optimizer"] == "adam":
        optimizer = optim.Adam(model.parameters(), lr=params["lr"])
    elif params["optimizer"] == "sgd":
        optimizer = optim.SGD(model.parameters(), lr=params["lr"])

    #Training and evalution
    train_time = train_model(model, train_loader, val_loader, optimizer, criterion, epochs=5, device=device)
    y_true, y_pred = evaluate_model(model, test_loader, device)
    acc = accuracy_score(y_true, y_pred)
    cm = confusion_matrix(y_true, y_pred)

    print(f"Accuracy: {acc:.4f} | Training Time: {train_time:.2f} s")
    print(cm)


--- Training Model 1: {'hidden_sizes': [128], 'activation': 'relu', 'optimizer': 'adam', 'lr': 0.001, 'dropout': 0.0} ---
Epoch [1/5] - Loss: 0.6261, Val Acc: 0.8338
Epoch [2/5] - Loss: 0.4373, Val Acc: 0.8538
Epoch [3/5] - Loss: 0.3950, Val Acc: 0.8619
Epoch [4/5] - Loss: 0.3700, Val Acc: 0.8655
Epoch [5/5] - Loss: 0.3486, Val Acc: 0.8590
Accuracy: 0.8552 | Training Time: 4.97 s
[[643   2  16  62   5   3 259   0  10   0]
 [  2 956   1  31   6   0   2   0   2   0]
 [  5   1 831  17  64   1  77   0   4   0]
 [  6   8   8 914  22   0  36   0   6   0]
 [  0   0 157  55 690   4  90   0   4   0]
 [  0   0   0   1   0 958   0  26   2  13]
 [ 53   1 110  53  50   1 718   0  14   0]
 [  0   0   0   0   0  39   0 923   0  38]
 [  1   1   2   6   3   3  14   5 965   0]
 [  0   0   0   0   0  14   1  31   0 954]]

--- Training Model 2: {'hidden_sizes': [256, 128], 'activation': 'relu', 'optimizer': 'adam', 'lr': 0.001, 'dropout': 0.0} ---
Epoch [1/5] - Loss: 0.5939, Val Acc: 0.8362
Epoch [2/5] -

In [19]:
#Define CNN Model with MaxPool2d layers
class CNN(nn.Module):
    def __init__(self, conv_channels=[32, 64, 128], fc_size=128, activation='relu', dropout=0.25):
        super().__init__()

        self.act = nn.ReLU() if activation == 'relu' else nn.Tanh()
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout = nn.Dropout(dropout) if dropout > 0 else nn.Identity()

        self.conv1 = nn.Conv2d(1, conv_channels[0], kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(conv_channels[0], conv_channels[1], kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(conv_channels[1], conv_channels[2], kernel_size=3, padding=1)

        self.fc1 = nn.Linear(conv_channels[2] * 3 * 3, fc_size)
        self.fc2 = nn.Linear(fc_size, 10)

    def forward(self, x):
        x = self.pool(self.act(self.conv1(x)))
        x = self.pool(self.act(self.conv2(x)))
        x = self.pool(self.act(self.conv3(x)))

        x = torch.flatten(x, 1)
        x = self.dropout(self.act(self.fc1(x)))
        x = self.fc2(x)
        return x

In [20]:
#Run CNN Model
cnn_params = {
    "conv_channels": [32, 64, 128],
    "fc_size": 128,
    "activation": "relu",
    "dropout": 0.25,
    "optimizer": "adam",
    "learning_rate": 0.001,
    "epochs": 5
}

print(f"\n--- Training Improved CNN with parameters: {cnn_params} ---")

cnn_model = CNN(conv_channels=cnn_params["conv_channels"],
                fc_size=cnn_params["fc_size"],
                activation=cnn_params["activation"],
                dropout=cnn_params["dropout"]).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(cnn_model.parameters(), lr=cnn_params["learning_rate"])

#Training and Evulation
train_time = train_model(cnn_model, train_loader, val_loader, optimizer, criterion,
                         epochs=cnn_params["epochs"], device=device)

y_true, y_pred = evaluate_model(cnn_model, test_loader, device)
acc = accuracy_score(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred)

print(f"Accuracy: {acc:.4f} | Training Time: {train_time:.2f} s")
print(cm)


--- Training Improved CNN with parameters: {'conv_channels': [32, 64, 128], 'fc_size': 128, 'activation': 'relu', 'dropout': 0.25, 'optimizer': 'adam', 'learning_rate': 0.001, 'epochs': 5} ---
Epoch [1/5] - Loss: 0.6652, Val Acc: 0.8486
Epoch [2/5] - Loss: 0.3858, Val Acc: 0.8774
Epoch [3/5] - Loss: 0.3205, Val Acc: 0.8843
Epoch [4/5] - Loss: 0.2859, Val Acc: 0.9023
Epoch [5/5] - Loss: 0.2606, Val Acc: 0.9111
Accuracy: 0.9035 | Training Time: 11.82 s
[[821   0  13  17   4   1 139   0   5   0]
 [  2 974   0  15   3   0   4   0   2   0]
 [ 12   1 859   9  59   0  59   0   1   0]
 [ 18   3   8 910  29   0  31   0   1   0]
 [  1   1  64  30 845   0  58   0   1   0]
 [  0   0   0   1   0 976   0  14   0   9]
 [ 87   1  58  27  73   0 749   0   5   0]
 [  0   0   0   0   0   5   0 984   0  11]
 [  5   1   4   1   5   1   3   5 975   0]
 [  1   0   0   0   0   4   0  53   0 942]]


In [24]:
from torchvision import models

#Transfer Learning
transfer_params = {
    "base_model": "ResNet18",
    "weights": "IMAGENET1K_V1",
    "frozen_layers": True,
    "optimizer": "adam",
    "learning_rate": 1e-4,
    "epochs": 3
}

print(f"\n--- Training Transfer Learning Model ({transfer_params['base_model']}) ---")

resnet = models.resnet18(weights=transfer_params["weights"])
resnet.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
resnet.fc = nn.Linear(resnet.fc.in_features, 10)
if transfer_params["frozen_layers"]:
    for param in list(resnet.parameters())[:-2]:
        param.requires_grad = False

resnet = resnet.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(resnet.parameters(), lr=transfer_params["learning_rate"])

#Training and Evaluation
train_time = train_model(resnet, train_loader, val_loader, optimizer, criterion,
                         epochs=transfer_params["epochs"], device=device)

y_true, y_pred = evaluate_model(resnet, test_loader, device)
acc = accuracy_score(y_true, y_pred)
cm = confusion_matrix(y_true, y_pred)

print(f"Accuracy: {acc:.4f} | Training Time: {train_time:.2f} s")
print(cm)


--- Training Transfer Learning Model (ResNet18) ---
Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth


100%|██████████| 44.7M/44.7M [00:00<00:00, 75.4MB/s]


Epoch [1/3] - Loss: 1.8653, Val Acc: 0.5202
Epoch [2/3] - Loss: 1.3700, Val Acc: 0.5834
Epoch [3/3] - Loss: 1.2193, Val Acc: 0.6107
Accuracy: 0.5999 | Training Time: 12.92 s
[[585  24  92 103  21  11 106   5  45   8]
 [ 26 843  15  75  11   2  16   1   7   4]
 [ 81  19 483  16 128   8 211   2  49   3]
 [122 125  42 569  37  14  65   6  16   4]
 [ 51  13 226  50 435   8 151   1  55  10]
 [ 11   4   9   2   4 647  18 226  32  47]
 [202  20 211  59 108   8 326   5  51  10]
 [  3   5   1   3   0 139   3 735   7 104]
 [ 57   8  56  30  27  56  39  45 631  51]
 [  6  10  11   8  19  56   8  92  45 745]]
