In [45]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [46]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
print(device)

mps


In [72]:
transform = transforms.Compose([
    transforms.ToTensor(),
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

In [73]:
train_data = train_dataset.data.to(device).float() / 255.0
train_targets = train_dataset.targets.to(device)

test_data = test_dataset.data.to(device).float() / 255.0
test_targets = test_dataset.targets.to(device)

train_data = train_data.unsqueeze(1)
test_data = test_data.unsqueeze(1)

def get_batches(data, targets, batch_size):
    for i in range(0, len(data), batch_size):
        yield data[i:i + batch_size], targets[i:i + batch_size]

batch_size = 500
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [74]:
class ExperimentalModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=2, stride=2, padding=1)  # 28x28 -> 15x15
        self.conv2 = nn.Conv2d(8, 32, kernel_size=3, stride=2, padding=1) # 15x15 -> 8x8
        self.conv3 = nn.Conv2d(8, 32, kernel_size=4, stride=1, padding=1) # 8x8   -> 7x7
        self.conv4 = nn.Conv2d(8, 32, kernel_size=5, stride=1, padding=1) # 7x7   -> 5x5
        # self.conv5 = nn.Conv2d(8, 32, kernel_size=5, stride=1, padding=0) # 5x5   -> 1x1

        self.compression = nn.Conv2d(32, 32, kernel_size=1, stride=1, padding=0)
        self.compression2 = nn.Conv2d(32, 8, kernel_size=1, stride=1, padding=0)

        self.relu = nn.ReLU(inplace=True)
        self.tanh = nn.Tanh()
        self.fc1 = nn.Linear(288, 10)

    def forward(self, x): # (B, 1, 28, 28)
        x = self.relu(self.conv1(x))
        x = self.tanh(self.compression(x))
        x = self.tanh(self.compression2(x))
        x = self.relu(self.conv2(x))
        x = self.tanh(self.compression(x))
        x = self.tanh(self.compression2(x))
        x = self.relu(self.conv3(x))
        x = self.tanh(self.compression(x))
        x = self.tanh(self.compression2(x))
        x = self.relu(self.conv4(x))
        x = self.tanh(self.compression(x))
        x = self.tanh(self.compression2(x))
        x = self.relu(self.conv4(x))

        x = x.view(x.size(0), -1)
        x = x.reshape(x.size(0), -1)  # Flatten
        x = self.fc1(x)
        return x

In [75]:
class ExperimentalModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=2, stride=2, padding=1)  # 28x28 -> 15x15
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=2, padding=1) # 15x15 -> 8x8
        self.conv3 = nn.Conv2d(32, 32, kernel_size=4, stride=1, padding=1) # 8x8   -> 7x7
        self.conv4 = nn.Conv2d(32, 32, kernel_size=5, stride=1, padding=1) # 7x7   -> 5x5

        self.conv = nn.Conv2d(32, 64, kernel_size=2, stride=1, padding=0)
        self.process = nn.Conv2d(64, 128, kernel_size=1, stride=1, padding=0)
        self.compress = nn.Conv2d(128, 32, kernel_size=1, stride=1, padding=0)

        self.relu = nn.ReLU()
        self.tanh = nn.Tanh()
        self.fc1 = nn.Linear(32, 10)

    def forward(self, x): # (B, 1, 28, 28)
        x = self.relu(self.conv1(x))
        x = self.relu(self.conv2(x))
        x = self.relu(self.conv3(x))
        x = self.relu(self.conv4(x))

        for _ in range(4):
            conv_x = self.tanh(self.conv(x))
            x = self.tanh(self.process(conv_x))
            x = self.tanh(self.compress(x))

        x = x.view(x.size(0), -1)
        x = x.reshape(x.size(0), -1)  # Flatten
        x = self.fc1(x)
        return x

In [76]:
class ExperimentalModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.lstm = nn.LSTM(input_size=1, hidden_size=32, batch_first=True, num_layers=2)
        self.fc = nn.Linear(32, 10)  # assuming classification into 10 classes

    def forward(self, x):  # x: (B, 1, 28, 28)
        x = x.view(x.size(0), -1)         # (B, 784)
        x = x.unsqueeze(-1)               # (B, 784, 1) — each pixel is a time step with 1 feature
        out, _ = self.lstm(x)             # out: (B, 784, 64)
        final_output = out[:, -1, :]      # use the output from the last time step
        logits = self.fc(final_output)    # (B, 10)
        return logits

In [77]:
class SimpleConcatRNNCell(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.fc = nn.Linear(input_size + hidden_size, hidden_size)

    def forward(self, x_t, h_prev):
        combined = torch.cat([x_t, h_prev], dim=1)  # shape: (batch, input + hidden)
        h_t = torch.tanh(self.fc(combined))  # non-linearity just like a normal RNN
        return h_t

In [79]:
class ExperimentalModel(nn.Module):
    def __init__(self, input_size=784, hidden_size=32):
        super().__init__()
        self.rnn_cell = SimpleConcatRNNCell(input_size, hidden_size)
        self.hidden_size = hidden_size

        self.h0 = nn.Parameter(torch.randn(1, hidden_size))  # (1, 64)
        self.fc = nn.Linear(hidden_size, 10)

    def forward(self, x):  # x: (B, 1, 28, 28)
        batch_size = x.size(0)
        x = x.view(batch_size, -1)  # (B, 784)

        h_t = self.h0.expand(batch_size, -1).clone()

        for _ in range(16):
            h_t = self.rnn_cell(x, h_t)

        return self.fc(h_t)

In [80]:
learning_rate = 0.001 * 1
epochs = 1000

device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
model = ExperimentalModel().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [81]:
for name, param in model.named_parameters():
    print(f"{name}: {param.numel()} params, requires_grad={param.requires_grad}")

total_params = sum(p.numel() for p in model.parameters())
print()
print(total_params)

h0: 32 params, requires_grad=True
rnn_cell.fc.weight: 26112 params, requires_grad=True
rnn_cell.fc.bias: 32 params, requires_grad=True
fc.weight: 320 params, requires_grad=True
fc.bias: 10 params, requires_grad=True

26506


In [82]:
patience = 40
best_val_loss = float('inf')
no_improvement_epochs = 0

all_outputs = []

for epoch in range(10000):
    model.train()
    running_loss = 0.0
    num_batches = 0
    
    for data, target in get_batches(train_data, train_targets, batch_size):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()
        num_batches += 1

    print(f"Epoch [{epoch + 1}/{epochs}], Training Loss: {running_loss / num_batches:.4f}")

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    num_batches = 0
    epoch_outputs = []

    with torch.no_grad():
        for data, target in get_batches(test_data, test_targets, batch_size):
            outputs = model(data)
            loss = criterion(outputs, target)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += target.size(0)
            num_batches += 1
            correct += (predicted == target).sum().item()

            epoch_outputs.append(outputs)

    all_outputs_tensor = torch.cat(epoch_outputs, dim=0)
    all_outputs.append(all_outputs_tensor)

    max_val = torch.max(all_outputs_tensor).item()
    min_val = torch.min(all_outputs_tensor).item()
    median_val = torch.median(all_outputs_tensor).item()
    mean_val = torch.mean(all_outputs_tensor).item()

    accuracy = 100 * correct / total
    val_loss /= num_batches
    print(f"Epoch [{epoch + 1}/{epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {accuracy:.2f}%")
    print(f"Output Summary: Max={max_val:.4f}, Min={min_val:.4f}, Median={median_val:.4f}, Mean={mean_val:.4f}")
    print()

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improvement_epochs = 0
    else:
        no_improvement_epochs += 1

    if no_improvement_epochs >= patience:
        print(f"Early stopping triggered after {epoch + 1} epochs.")
        print(best_val_loss)
        break

Epoch [1/1000], Training Loss: 1.0089
Epoch [1/1000], Validation Loss: 0.4766, Validation Accuracy: 89.32%
Output Summary: Max=5.3546, Min=-3.9023, Median=-0.1389, Mean=0.1702

Epoch [2/1000], Training Loss: 0.3813
Epoch [2/1000], Validation Loss: 0.3105, Validation Accuracy: 91.84%
Output Summary: Max=6.5240, Min=-4.9030, Median=-0.2221, Mean=0.1893

Epoch [3/1000], Training Loss: 0.2818
Epoch [3/1000], Validation Loss: 0.2603, Validation Accuracy: 92.95%
Output Summary: Max=7.2185, Min=-5.6352, Median=-0.2477, Mean=0.2010

Epoch [4/1000], Training Loss: 0.2398
Epoch [4/1000], Validation Loss: 0.2376, Validation Accuracy: 93.54%
Output Summary: Max=7.7594, Min=-6.0359, Median=-0.2588, Mean=0.2135

Epoch [5/1000], Training Loss: 0.2133
Epoch [5/1000], Validation Loss: 0.2237, Validation Accuracy: 93.76%
Output Summary: Max=8.2009, Min=-6.2491, Median=-0.2617, Mean=0.2218

Epoch [6/1000], Training Loss: 0.1944
Epoch [6/1000], Validation Loss: 0.2085, Validation Accuracy: 94.09%
Output S

In [None]:
patience = 40
best_val_loss = float('inf')
no_improvement_epochs = 0

all_outputs = []

for epoch in range(10000):
    model.train()
    running_loss = 0.0
    num_batches = 0
    
    for data, target in get_batches(train_data, train_targets, batch_size):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()
        num_batches += 1

    print(f"Epoch [{epoch + 1}/{epochs}], Training Loss: {running_loss / num_batches:.4f}")

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    num_batches = 0
    epoch_outputs = []

    with torch.no_grad():
        for data, target in get_batches(test_data, test_targets, batch_size):
            outputs = model(data)
            loss = criterion(outputs, target)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += target.size(0)
            num_batches += 1
            correct += (predicted == target).sum().item()

            epoch_outputs.append(outputs)

    all_outputs_tensor = torch.cat(epoch_outputs, dim=0)
    all_outputs.append(all_outputs_tensor)

    max_val = torch.max(all_outputs_tensor).item()
    min_val = torch.min(all_outputs_tensor).item()
    median_val = torch.median(all_outputs_tensor).item()
    mean_val = torch.mean(all_outputs_tensor).item()

    accuracy = 100 * correct / total
    val_loss /= num_batches
    print(f"Epoch [{epoch + 1}/{epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {accuracy:.2f}%")
    print(f"Output Summary: Max={max_val:.4f}, Min={min_val:.4f}, Median={median_val:.4f}, Mean={mean_val:.4f}")
    print()

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improvement_epochs = 0
    else:
        no_improvement_epochs += 1

    if no_improvement_epochs >= patience:
        print(f"Early stopping triggered after {epoch + 1} epochs.")
        print(best_val_loss)
        break


Epoch [1/1000], Training Loss: 2.2517
Epoch [1/1000], Validation Loss: 2.0957, Validation Accuracy: 20.04%
Output Summary: Max=1.2725, Min=-2.0050, Median=0.0077, Mean=-0.0537

Epoch [2/1000], Training Loss: 1.9625
Epoch [2/1000], Validation Loss: 1.7842, Validation Accuracy: 32.11%
Output Summary: Max=2.2093, Min=-2.6049, Median=0.0567, Mean=-0.0725

Epoch [3/1000], Training Loss: 1.7022
Epoch [3/1000], Validation Loss: 1.5640, Validation Accuracy: 36.32%
Output Summary: Max=2.7516, Min=-3.2000, Median=-0.1785, Mean=-0.0719

Epoch [4/1000], Training Loss: 1.6269
Epoch [4/1000], Validation Loss: 1.5317, Validation Accuracy: 38.76%
Output Summary: Max=3.3650, Min=-3.4459, Median=-0.0654, Mean=-0.0498

Epoch [5/1000], Training Loss: 1.5474
Epoch [5/1000], Validation Loss: 1.7003, Validation Accuracy: 29.86%
Output Summary: Max=3.8634, Min=-3.7197, Median=0.1047, Mean=-0.0994



KeyboardInterrupt: 

In [46]:
patience = 40
best_val_loss = float('inf')
no_improvement_epochs = 0

all_outputs = []

for epoch in range(10000):
    model.train()
    running_loss = 0.0
    num_batches = 0
    
    for data, target in get_batches(train_data, train_targets, batch_size):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()
        num_batches += 1

    print(f"Epoch [{epoch + 1}/{epochs}], Training Loss: {running_loss / num_batches:.4f}")

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    num_batches = 0
    epoch_outputs = []

    with torch.no_grad():
        for data, target in get_batches(test_data, test_targets, batch_size):
            outputs = model(data)
            loss = criterion(outputs, target)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += target.size(0)
            num_batches += 1
            correct += (predicted == target).sum().item()

            epoch_outputs.append(outputs)

    all_outputs_tensor = torch.cat(epoch_outputs, dim=0)
    all_outputs.append(all_outputs_tensor)

    max_val = torch.max(all_outputs_tensor).item()
    min_val = torch.min(all_outputs_tensor).item()
    median_val = torch.median(all_outputs_tensor).item()
    mean_val = torch.mean(all_outputs_tensor).item()

    accuracy = 100 * correct / total
    val_loss /= num_batches
    print(f"Epoch [{epoch + 1}/{epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {accuracy:.2f}%")
    print(f"Output Summary: Max={max_val:.4f}, Min={min_val:.4f}, Median={median_val:.4f}, Mean={mean_val:.4f}")
    print()

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improvement_epochs = 0
    else:
        no_improvement_epochs += 1

    if no_improvement_epochs >= patience:
        print(f"Early stopping triggered after {epoch + 1} epochs.")
        print(best_val_loss)
        break


Epoch [1/1000], Training Loss: 2.2311
Epoch [1/1000], Validation Loss: 1.9066, Validation Accuracy: 36.21%
Output Summary: Max=1.3244, Min=-1.3029, Median=0.0263, Mean=0.0092

Epoch [2/1000], Training Loss: 1.5773
Epoch [2/1000], Validation Loss: 1.3439, Validation Accuracy: 51.18%
Output Summary: Max=2.4252, Min=-2.5180, Median=0.1199, Mean=0.0338

Epoch [3/1000], Training Loss: 1.2121
Epoch [3/1000], Validation Loss: 1.1048, Validation Accuracy: 61.69%
Output Summary: Max=3.4407, Min=-3.4125, Median=0.2224, Mean=0.0764

Epoch [4/1000], Training Loss: 1.0464
Epoch [4/1000], Validation Loss: 1.0135, Validation Accuracy: 64.08%
Output Summary: Max=4.0750, Min=-4.0683, Median=0.1827, Mean=0.1100

Epoch [5/1000], Training Loss: 0.9560
Epoch [5/1000], Validation Loss: 0.9286, Validation Accuracy: 67.96%
Output Summary: Max=4.5175, Min=-4.5271, Median=0.1332, Mean=0.1093

Epoch [6/1000], Training Loss: 0.9012
Epoch [6/1000], Validation Loss: 0.8909, Validation Accuracy: 69.00%
Output Summar

In [42]:
patience = 40
best_val_loss = float('inf')
no_improvement_epochs = 0

all_outputs = []

for epoch in range(10000):
    model.train()
    running_loss = 0.0
    num_batches = 0
    
    for data, target in get_batches(train_data, train_targets, batch_size):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()
        num_batches += 1

    print(f"Epoch [{epoch + 1}/{epochs}], Training Loss: {running_loss / num_batches:.4f}")

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    num_batches = 0
    epoch_outputs = []

    with torch.no_grad():
        for data, target in get_batches(test_data, test_targets, batch_size):
            outputs = model(data)
            loss = criterion(outputs, target)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += target.size(0)
            num_batches += 1
            correct += (predicted == target).sum().item()

            epoch_outputs.append(outputs)

    all_outputs_tensor = torch.cat(epoch_outputs, dim=0)
    all_outputs.append(all_outputs_tensor)

    max_val = torch.max(all_outputs_tensor).item()
    min_val = torch.min(all_outputs_tensor).item()
    median_val = torch.median(all_outputs_tensor).item()
    mean_val = torch.mean(all_outputs_tensor).item()

    accuracy = 100 * correct / total
    val_loss /= num_batches
    print(f"Epoch [{epoch + 1}/{epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {accuracy:.2f}%")
    print(f"Output Summary: Max={max_val:.4f}, Min={min_val:.4f}, Median={median_val:.4f}, Mean={mean_val:.4f}")
    print()

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improvement_epochs = 0
    else:
        no_improvement_epochs += 1

    if no_improvement_epochs >= patience:
        print(f"Early stopping triggered after {epoch + 1} epochs.")
        print(best_val_loss)
        break


Epoch [1/1000], Training Loss: 2.2328
Epoch [1/1000], Validation Loss: 1.8919, Validation Accuracy: 30.74%
Output Summary: Max=1.3390, Min=-1.3423, Median=-0.0758, Mean=-0.0389

Epoch [2/1000], Training Loss: 1.5925
Epoch [2/1000], Validation Loss: 1.3902, Validation Accuracy: 50.82%
Output Summary: Max=2.2988, Min=-2.4589, Median=-0.1856, Mean=-0.0449

Epoch [3/1000], Training Loss: 1.2808
Epoch [3/1000], Validation Loss: 1.2068, Validation Accuracy: 57.12%
Output Summary: Max=3.3833, Min=-3.3065, Median=-0.1649, Mean=-0.0322

Epoch [4/1000], Training Loss: 1.1325
Epoch [4/1000], Validation Loss: 1.0830, Validation Accuracy: 60.91%
Output Summary: Max=4.0129, Min=-4.0211, Median=-0.1337, Mean=-0.0012

Epoch [5/1000], Training Loss: 1.0168
Epoch [5/1000], Validation Loss: 0.9879, Validation Accuracy: 64.50%
Output Summary: Max=4.4506, Min=-4.5307, Median=-0.0589, Mean=0.0146

Epoch [6/1000], Training Loss: 0.9325
Epoch [6/1000], Validation Loss: 0.9052, Validation Accuracy: 68.49%
Outp

KeyboardInterrupt: 

In [38]:
patience = 40
best_val_loss = float('inf')
no_improvement_epochs = 0

all_outputs = []

for epoch in range(10000):
    model.train()
    running_loss = 0.0
    num_batches = 0
    
    for data, target in get_batches(train_data, train_targets, batch_size):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()
        num_batches += 1

    print(f"Epoch [{epoch + 1}/{epochs}], Training Loss: {running_loss / num_batches:.4f}")

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    num_batches = 0
    epoch_outputs = []

    with torch.no_grad():
        for data, target in get_batches(test_data, test_targets, batch_size):
            outputs = model(data)
            loss = criterion(outputs, target)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += target.size(0)
            num_batches += 1
            correct += (predicted == target).sum().item()

            epoch_outputs.append(outputs)

    all_outputs_tensor = torch.cat(epoch_outputs, dim=0)
    all_outputs.append(all_outputs_tensor)

    max_val = torch.max(all_outputs_tensor).item()
    min_val = torch.min(all_outputs_tensor).item()
    median_val = torch.median(all_outputs_tensor).item()
    mean_val = torch.mean(all_outputs_tensor).item()

    accuracy = 100 * correct / total
    val_loss /= num_batches
    print(f"Epoch [{epoch + 1}/{epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {accuracy:.2f}%")
    print(f"Output Summary: Max={max_val:.4f}, Min={min_val:.4f}, Median={median_val:.4f}, Mean={mean_val:.4f}")
    print()

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improvement_epochs = 0
    else:
        no_improvement_epochs += 1

    if no_improvement_epochs >= patience:
        print(f"Early stopping triggered after {epoch + 1} epochs.")
        print(best_val_loss)
        break


Epoch [1/500], Training Loss: 2.1461
Epoch [1/500], Validation Loss: 1.5070, Validation Accuracy: 47.16%
Output Summary: Max=3.3682, Min=-7.2822, Median=-0.2279, Mean=-0.5001

Epoch [2/500], Training Loss: 1.0211
Epoch [2/500], Validation Loss: 0.8294, Validation Accuracy: 69.11%
Output Summary: Max=8.6911, Min=-14.9689, Median=-0.8498, Mean=-1.4651

Epoch [3/500], Training Loss: 0.7244
Epoch [3/500], Validation Loss: 0.6846, Validation Accuracy: 74.84%
Output Summary: Max=11.2197, Min=-19.3116, Median=-1.0854, Mean=-1.8775

Epoch [4/500], Training Loss: 0.6251
Epoch [4/500], Validation Loss: 0.6270, Validation Accuracy: 76.49%
Output Summary: Max=11.8628, Min=-21.6829, Median=-0.9526, Mean=-1.9810

Epoch [5/500], Training Loss: 0.5768
Epoch [5/500], Validation Loss: 0.5782, Validation Accuracy: 78.19%
Output Summary: Max=12.1790, Min=-23.4848, Median=-1.0644, Mean=-2.0934

Epoch [6/500], Training Loss: 0.5441
Epoch [6/500], Validation Loss: 0.5562, Validation Accuracy: 78.98%
Output S

In [30]:
patience = 40
best_val_loss = float('inf')
no_improvement_epochs = 0

all_outputs = []

for epoch in range(10000):
    model.train()
    running_loss = 0.0
    num_batches = 0
    
    for data, target in get_batches(train_data, train_targets, batch_size):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()
        num_batches += 1

    print(f"Epoch [{epoch + 1}/{epochs}], Training Loss: {running_loss / num_batches:.4f}")

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    num_batches = 0
    epoch_outputs = []

    with torch.no_grad():
        for data, target in get_batches(test_data, test_targets, batch_size):
            outputs = model(data)
            loss = criterion(outputs, target)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += target.size(0)
            num_batches += 1
            correct += (predicted == target).sum().item()

            epoch_outputs.append(outputs)

    all_outputs_tensor = torch.cat(epoch_outputs, dim=0)
    all_outputs.append(all_outputs_tensor)

    max_val = torch.max(all_outputs_tensor).item()
    min_val = torch.min(all_outputs_tensor).item()
    median_val = torch.median(all_outputs_tensor).item()
    mean_val = torch.mean(all_outputs_tensor).item()

    accuracy = 100 * correct / total
    val_loss /= num_batches
    print(f"Epoch [{epoch + 1}/{epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {accuracy:.2f}%")
    print(f"Output Summary: Max={max_val:.4f}, Min={min_val:.4f}, Median={median_val:.4f}, Mean={mean_val:.4f}")
    print()

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improvement_epochs = 0
    else:
        no_improvement_epochs += 1

    if no_improvement_epochs >= patience:
        print(f"Early stopping triggered after {epoch + 1} epochs.")
        print(best_val_loss)
        break


Epoch [1/500], Training Loss: 2.3038
Epoch [1/500], Validation Loss: 2.2955, Validation Accuracy: 15.41%
Output Summary: Max=0.0208, Min=-0.1083, Median=-0.0313, Mean=-0.0342

Epoch [2/500], Training Loss: 1.6354
Epoch [2/500], Validation Loss: 1.0566, Validation Accuracy: 58.71%
Output Summary: Max=6.1864, Min=-6.9947, Median=-0.6447, Mean=-0.6246

Epoch [3/500], Training Loss: 0.9068
Epoch [3/500], Validation Loss: 0.8285, Validation Accuracy: 68.94%
Output Summary: Max=8.5042, Min=-10.2753, Median=-0.8808, Mean=-0.9490

Epoch [4/500], Training Loss: 0.7195
Epoch [4/500], Validation Loss: 0.7618, Validation Accuracy: 71.35%
Output Summary: Max=10.0401, Min=-12.6190, Median=-1.2397, Mean=-1.1394

Epoch [5/500], Training Loss: 0.6106
Epoch [5/500], Validation Loss: 0.5856, Validation Accuracy: 78.60%
Output Summary: Max=10.1276, Min=-13.0410, Median=-1.4849, Mean=-1.2440

Epoch [6/500], Training Loss: 0.5368
Epoch [6/500], Validation Loss: 0.5364, Validation Accuracy: 80.32%
Output Sum

In [26]:

patience = 40
best_val_loss = float('inf')
no_improvement_epochs = 0

all_outputs = []

for epoch in range(10000):
    model.train()
    running_loss = 0.0
    num_batches = 0
    
    for data, target in get_batches(train_data, train_targets, batch_size):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()
        num_batches += 1

    print(f"Epoch [{epoch + 1}/{epochs}], Training Loss: {running_loss / num_batches:.4f}")

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    num_batches = 0
    epoch_outputs = []

    with torch.no_grad():
        for data, target in get_batches(test_data, test_targets, batch_size):
            outputs = model(data)
            loss = criterion(outputs, target)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += target.size(0)
            num_batches += 1
            correct += (predicted == target).sum().item()

            epoch_outputs.append(outputs)

    all_outputs_tensor = torch.cat(epoch_outputs, dim=0)
    all_outputs.append(all_outputs_tensor)

    max_val = torch.max(all_outputs_tensor).item()
    min_val = torch.min(all_outputs_tensor).item()
    median_val = torch.median(all_outputs_tensor).item()
    mean_val = torch.mean(all_outputs_tensor).item()

    accuracy = 100 * correct / total
    val_loss /= num_batches
    print(f"Epoch [{epoch + 1}/{epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {accuracy:.2f}%")
    print(f"Output Summary: Max={max_val:.4f}, Min={min_val:.4f}, Median={median_val:.4f}, Mean={mean_val:.4f}")
    print()

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improvement_epochs = 0
    else:
        no_improvement_epochs += 1

    if no_improvement_epochs >= patience:
        print(f"Early stopping triggered after {epoch + 1} epochs.")
        print(best_val_loss)
        break


Epoch [1/500], Training Loss: 2.0800
Epoch [1/500], Validation Loss: 1.3692, Validation Accuracy: 48.54%
Output Summary: Max=3.9115, Min=-3.1265, Median=0.3433, Mean=0.3374

Epoch [2/500], Training Loss: 0.9554
Epoch [2/500], Validation Loss: 0.7651, Validation Accuracy: 70.67%
Output Summary: Max=9.6127, Min=-5.5399, Median=0.5062, Mean=0.5975

Epoch [3/500], Training Loss: 0.6711
Epoch [3/500], Validation Loss: 0.6342, Validation Accuracy: 76.22%
Output Summary: Max=10.9876, Min=-7.0159, Median=0.3247, Mean=0.6116

Epoch [4/500], Training Loss: 0.5780
Epoch [4/500], Validation Loss: 0.5700, Validation Accuracy: 78.84%
Output Summary: Max=11.8787, Min=-8.3133, Median=0.2984, Mean=0.5432

Epoch [5/500], Training Loss: 0.5261
Epoch [5/500], Validation Loss: 0.5266, Validation Accuracy: 80.52%
Output Summary: Max=12.9717, Min=-8.9813, Median=0.2332, Mean=0.4706

Epoch [6/500], Training Loss: 0.4876
Epoch [6/500], Validation Loss: 0.4970, Validation Accuracy: 81.64%
Output Summary: Max=13

In [15]:

patience = 40
best_val_loss = float('inf')
no_improvement_epochs = 0

all_outputs = []

for epoch in range(10000):
    model.train()
    running_loss = 0.0
    num_batches = 0
    
    for data, target in get_batches(train_data, train_targets, batch_size):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()
        num_batches += 1

    print(f"Epoch [{epoch + 1}/{epochs}], Training Loss: {running_loss / num_batches:.4f}")

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    num_batches = 0
    epoch_outputs = []

    with torch.no_grad():
        for data, target in get_batches(test_data, test_targets, batch_size):
            outputs = model(data)
            loss = criterion(outputs, target)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += target.size(0)
            num_batches += 1
            correct += (predicted == target).sum().item()

            epoch_outputs.append(outputs)

    all_outputs_tensor = torch.cat(epoch_outputs, dim=0)
    all_outputs.append(all_outputs_tensor)

    max_val = torch.max(all_outputs_tensor).item()
    min_val = torch.min(all_outputs_tensor).item()
    median_val = torch.median(all_outputs_tensor).item()
    mean_val = torch.mean(all_outputs_tensor).item()

    accuracy = 100 * correct / total
    val_loss /= num_batches
    print(f"Epoch [{epoch + 1}/{epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {accuracy:.2f}%")
    print(f"Output Summary: Max={max_val:.4f}, Min={min_val:.4f}, Median={median_val:.4f}, Mean={mean_val:.4f}")
    print()

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improvement_epochs = 0
    else:
        no_improvement_epochs += 1

    if no_improvement_epochs >= patience:
        print(f"Early stopping triggered after {epoch + 1} epochs.")
        print(best_val_loss)
        break


Epoch [1/500], Training Loss: 1.7379
Epoch [1/500], Validation Loss: 0.9901, Validation Accuracy: 64.27%
Output Summary: Max=9.6573, Min=-13.9246, Median=-0.4931, Mean=-0.6575

Epoch [2/500], Training Loss: 0.8534
Epoch [2/500], Validation Loss: 0.8293, Validation Accuracy: 68.90%
Output Summary: Max=13.9460, Min=-23.6459, Median=-1.0560, Mean=-1.7873

Epoch [3/500], Training Loss: 0.7421
Epoch [3/500], Validation Loss: 0.7515, Validation Accuracy: 71.75%
Output Summary: Max=15.0294, Min=-29.1099, Median=-1.2599, Mean=-2.1201

Epoch [4/500], Training Loss: 0.6856
Epoch [4/500], Validation Loss: 0.7114, Validation Accuracy: 72.82%
Output Summary: Max=16.2604, Min=-32.6620, Median=-1.3487, Mean=-2.1970

Epoch [5/500], Training Loss: 0.6497
Epoch [5/500], Validation Loss: 0.6806, Validation Accuracy: 74.07%
Output Summary: Max=17.3173, Min=-35.2507, Median=-1.3950, Mean=-2.2731

Epoch [6/500], Training Loss: 0.6217
Epoch [6/500], Validation Loss: 0.6617, Validation Accuracy: 74.94%
Output

In [70]:

patience = 40
best_val_loss = float('inf')
no_improvement_epochs = 0

all_outputs = []

for epoch in range(10000):
    model.train()
    running_loss = 0.0
    num_batches = 0
    
    for data, target in get_batches(train_data, train_targets, batch_size):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()
        num_batches += 1

    print(f"Epoch [{epoch + 1}/{epochs}], Training Loss: {running_loss / num_batches:.4f}")

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    num_batches = 0
    epoch_outputs = []

    with torch.no_grad():
        for data, target in get_batches(test_data, test_targets, batch_size):
            outputs = model(data)
            loss = criterion(outputs, target)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += target.size(0)
            num_batches += 1
            correct += (predicted == target).sum().item()

            epoch_outputs.append(outputs)

    all_outputs_tensor = torch.cat(epoch_outputs, dim=0)
    all_outputs.append(all_outputs_tensor)

    max_val = torch.max(all_outputs_tensor).item()
    min_val = torch.min(all_outputs_tensor).item()
    median_val = torch.median(all_outputs_tensor).item()
    mean_val = torch.mean(all_outputs_tensor).item()

    accuracy = 100 * correct / total
    val_loss /= num_batches
    print(f"Epoch [{epoch + 1}/{epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {accuracy:.2f}%")
    print(f"Output Summary: Max={max_val:.4f}, Min={min_val:.4f}, Median={median_val:.4f}, Mean={mean_val:.4f}")
    print()

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improvement_epochs = 0
    else:
        no_improvement_epochs += 1

    if no_improvement_epochs >= patience:
        print(f"Early stopping triggered after {epoch + 1} epochs.")
        print(best_val_loss)
        break


Epoch [1/500], Training Loss: 1.9182
Epoch [1/500], Validation Loss: 1.2326, Validation Accuracy: 56.59%
Output Summary: Max=11.2913, Min=-14.3222, Median=0.5692, Mean=0.3573

Epoch [2/500], Training Loss: 0.8644
Epoch [2/500], Validation Loss: 0.7375, Validation Accuracy: 72.58%
Output Summary: Max=20.4001, Min=-20.5289, Median=2.7531, Mean=2.7068

Epoch [3/500], Training Loss: 0.6860
Epoch [3/500], Validation Loss: 0.6699, Validation Accuracy: 75.01%
Output Summary: Max=21.3139, Min=-21.5421, Median=3.3929, Mean=3.0213

Epoch [4/500], Training Loss: 0.6417
Epoch [4/500], Validation Loss: 0.6359, Validation Accuracy: 76.19%
Output Summary: Max=22.2096, Min=-21.9511, Median=3.9949, Mean=3.3501

Epoch [5/500], Training Loss: 0.6117
Epoch [5/500], Validation Loss: 0.6099, Validation Accuracy: 77.13%
Output Summary: Max=22.9944, Min=-21.2470, Median=4.3341, Mean=3.5689

Epoch [6/500], Training Loss: 0.5858
Epoch [6/500], Validation Loss: 0.5880, Validation Accuracy: 78.20%
Output Summary:

In [74]:

patience = 40
best_val_loss = float('inf')
no_improvement_epochs = 0

all_outputs = []

for epoch in range(10000):
    model.train()
    running_loss = 0.0
    num_batches = 0
    
    for data, target in get_batches(train_data, train_targets, batch_size):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()
        num_batches += 1

    print(f"Epoch [{epoch + 1}/{epochs}], Training Loss: {running_loss / num_batches:.4f}")

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    num_batches = 0
    epoch_outputs = []

    with torch.no_grad():
        for data, target in get_batches(test_data, test_targets, batch_size):
            outputs = model(data)
            loss = criterion(outputs, target)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += target.size(0)
            num_batches += 1
            correct += (predicted == target).sum().item()

            epoch_outputs.append(outputs)

    all_outputs_tensor = torch.cat(epoch_outputs, dim=0)
    all_outputs.append(all_outputs_tensor)

    max_val = torch.max(all_outputs_tensor).item()
    min_val = torch.min(all_outputs_tensor).item()
    median_val = torch.median(all_outputs_tensor).item()
    mean_val = torch.mean(all_outputs_tensor).item()

    accuracy = 100 * correct / total
    val_loss /= num_batches
    print(f"Epoch [{epoch + 1}/{epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {accuracy:.2f}%")
    print(f"Output Summary: Max={max_val:.4f}, Min={min_val:.4f}, Median={median_val:.4f}, Mean={mean_val:.4f}")
    print()

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improvement_epochs = 0
    else:
        no_improvement_epochs += 1

    if no_improvement_epochs >= patience:
        print(f"Early stopping triggered after {epoch + 1} epochs.")
        break


Epoch [1/500], Training Loss: 2.1232
Epoch [1/500], Validation Loss: 1.4701, Validation Accuracy: 40.80%
Output Summary: Max=10.2939, Min=-10.7633, Median=0.2907, Mean=0.1655

Epoch [2/500], Training Loss: 0.9177
Epoch [2/500], Validation Loss: 0.7180, Validation Accuracy: 72.62%
Output Summary: Max=31.6700, Min=-34.7090, Median=-0.2609, Mean=0.0209

Epoch [3/500], Training Loss: 0.6307
Epoch [3/500], Validation Loss: 0.5989, Validation Accuracy: 78.11%
Output Summary: Max=30.9822, Min=-30.8298, Median=0.0657, Mean=-0.0007

Epoch [4/500], Training Loss: 0.5357
Epoch [4/500], Validation Loss: 0.5289, Validation Accuracy: 80.80%
Output Summary: Max=33.5743, Min=-31.3023, Median=0.1645, Mean=-0.0661

Epoch [5/500], Training Loss: 0.4897
Epoch [5/500], Validation Loss: 0.4936, Validation Accuracy: 81.92%
Output Summary: Max=34.3337, Min=-29.7314, Median=0.3283, Mean=-0.0808

Epoch [6/500], Training Loss: 0.4611
Epoch [6/500], Validation Loss: 0.4706, Validation Accuracy: 82.56%
Output Summ

In [51]:

patience = 10000
best_val_loss = float('inf')
no_improvement_epochs = 0

all_outputs = []

for epoch in range(10000):
    model.train()
    running_loss = 0.0
    num_batches = 0
    
    for data, target in get_batches(train_data, train_targets, batch_size):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()
        num_batches += 1

    print(f"Epoch [{epoch + 1}/{epochs}], Training Loss: {running_loss / num_batches:.4f}")

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    num_batches = 0
    epoch_outputs = []

    with torch.no_grad():
        for data, target in get_batches(test_data, test_targets, batch_size):
            outputs = model(data)
            loss = criterion(outputs, target)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += target.size(0)
            num_batches += 1
            correct += (predicted == target).sum().item()

            epoch_outputs.append(outputs)

    all_outputs_tensor = torch.cat(epoch_outputs, dim=0)
    all_outputs.append(all_outputs_tensor)

    max_val = torch.max(all_outputs_tensor).item()
    min_val = torch.min(all_outputs_tensor).item()
    median_val = torch.median(all_outputs_tensor).item()
    mean_val = torch.mean(all_outputs_tensor).item()

    accuracy = 100 * correct / total
    val_loss /= num_batches
    print(f"Epoch [{epoch + 1}/{epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {accuracy:.2f}%")
    print(f"Output Summary: Max={max_val:.4f}, Min={min_val:.4f}, Median={median_val:.4f}, Mean={mean_val:.4f}")
    print()

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improvement_epochs = 0
    else:
        no_improvement_epochs += 1

    if no_improvement_epochs >= patience:
        print(f"Early stopping triggered after {epoch + 1} epochs.")
        break


Epoch [1/500], Training Loss: 2.3282
Epoch [1/500], Validation Loss: 2.3035, Validation Accuracy: 10.28%
Output Summary: Max=0.4745, Min=0.0816, Median=0.2738, Mean=0.2726

Epoch [2/500], Training Loss: 2.3005
Epoch [2/500], Validation Loss: 2.2889, Validation Accuracy: 10.28%
Output Summary: Max=0.4730, Min=0.0296, Median=0.2607, Mean=0.2452

Epoch [3/500], Training Loss: 1.1786
Epoch [3/500], Validation Loss: 0.4489, Validation Accuracy: 85.79%
Output Summary: Max=17.3885, Min=-16.1064, Median=0.8293, Mean=0.9764

Epoch [4/500], Training Loss: 0.3915
Epoch [4/500], Validation Loss: 0.3475, Validation Accuracy: 89.59%
Output Summary: Max=17.1890, Min=-14.7909, Median=0.8270, Mean=1.0324

Epoch [5/500], Training Loss: 0.3467
Epoch [5/500], Validation Loss: 0.3206, Validation Accuracy: 90.50%
Output Summary: Max=18.5099, Min=-15.6053, Median=0.8729, Mean=1.0608

Epoch [6/500], Training Loss: 0.3244
Epoch [6/500], Validation Loss: 0.3039, Validation Accuracy: 91.03%
Output Summary: Max=1

KeyboardInterrupt: 

In [12]:
import torch
import torch.nn.functional as F

# Metal Backend device or CPU device
device = "mps" if torch.backends.mps.is_available() else "cpu"

if __name__ == '__main__':

    tensor = torch.empty(4, 2, 40, 40).to(device)
    unfolded_tensor = F.unfold(input=tensor, kernel_size=3, padding=1, stride=1)
    print("torch version:", torch.__version__)

torch version: 2.6.0


In [16]:
patience = 10000
best_val_loss = float('inf')
no_improvement_epochs = 0

all_outputs = []

for epoch in range(10000):
    model.train()
    running_loss = 0.0
    num_batches = 0
    
    for data, target in get_batches(train_data, train_targets, batch_size):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()
        num_batches += 1

    print(f"Epoch [{epoch + 1}/{epochs}], Training Loss: {running_loss / num_batches:.4f}")

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    num_batches = 0
    epoch_outputs = []

    with torch.no_grad():
        for data, target in get_batches(test_data, test_targets, batch_size):
            outputs = model(data)
            loss = criterion(outputs, target)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += target.size(0)
            num_batches += 1
            correct += (predicted == target).sum().item()

            epoch_outputs.append(outputs)

    all_outputs_tensor = torch.cat(epoch_outputs, dim=0)
    all_outputs.append(all_outputs_tensor)

    max_val = torch.max(all_outputs_tensor).item()
    min_val = torch.min(all_outputs_tensor).item()
    median_val = torch.median(all_outputs_tensor).item()
    mean_val = torch.mean(all_outputs_tensor).item()

    accuracy = 100 * correct / total
    val_loss /= num_batches
    print(f"Epoch [{epoch + 1}/{epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {accuracy:.2f}%")
    print(f"Output Summary: Max={max_val:.4f}, Min={min_val:.4f}, Median={median_val:.4f}, Mean={mean_val:.4f}")
    print()

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improvement_epochs = 0
    else:
        no_improvement_epochs += 1

    if no_improvement_epochs >= patience:
        print(f"Early stopping triggered after {epoch + 1} epochs.")
        break

here
here
here


KeyboardInterrupt: 

In [None]:
patience = 10000
best_val_loss = float('inf')
no_improvement_epochs = 0

all_outputs = []

for epoch in range(10000):
    model.train()
    running_loss = 0.0
    num_batches = 0
    
    for data, target in get_batches(train_data, train_targets, batch_size):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()
        num_batches += 1

    print(f"Epoch [{epoch + 1}/{epochs}], Training Loss: {running_loss / num_batches:.4f}")

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    num_batches = 0
    epoch_outputs = []

    with torch.no_grad():
        for data, target in get_batches(test_data, test_targets, batch_size):
            outputs = model(data)
            loss = criterion(outputs, target)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += target.size(0)
            num_batches += 1
            correct += (predicted == target).sum().item()

            epoch_outputs.append(outputs)

    all_outputs_tensor = torch.cat(epoch_outputs, dim=0)
    all_outputs.append(all_outputs_tensor)

    max_val = torch.max(all_outputs_tensor).item()
    min_val = torch.min(all_outputs_tensor).item()
    median_val = torch.median(all_outputs_tensor).item()
    mean_val = torch.mean(all_outputs_tensor).item()

    accuracy = 100 * correct / total
    val_loss /= num_batches
    print(f"Epoch [{epoch + 1}/{epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {accuracy:.2f}%")
    print(f"Output Summary: Max={max_val:.4f}, Min={min_val:.4f}, Median={median_val:.4f}, Mean={mean_val:.4f}")
    print()

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improvement_epochs = 0
    else:
        no_improvement_epochs += 1

    if no_improvement_epochs >= patience:
        print(f"Early stopping triggered after {epoch + 1} epochs.")
        break

Epoch [1/1000], Training Loss: 2.1728
Epoch [1/1000], Validation Loss: 1.6802, Validation Accuracy: 56.44%
Output Summary: Max=2.3652, Min=-2.7602, Median=0.0619, Mean=0.0334

Epoch [2/1000], Training Loss: 1.2055
Epoch [2/1000], Validation Loss: 0.7723, Validation Accuracy: 84.77%
Output Summary: Max=4.2014, Min=-2.8849, Median=-0.0332, Mean=0.1487

Epoch [3/1000], Training Loss: 0.5983
Epoch [3/1000], Validation Loss: 0.4247, Validation Accuracy: 90.35%
Output Summary: Max=5.0898, Min=-3.8560, Median=-0.2723, Mean=0.0734

Epoch [4/1000], Training Loss: 0.3736
Epoch [4/1000], Validation Loss: 0.3020, Validation Accuracy: 92.28%
Output Summary: Max=5.7756, Min=-4.4066, Median=-0.3293, Mean=0.0801

Epoch [5/1000], Training Loss: 0.2840
Epoch [5/1000], Validation Loss: 0.2441, Validation Accuracy: 93.43%
Output Summary: Max=6.2063, Min=-4.4058, Median=-0.3786, Mean=0.0622

Epoch [6/1000], Training Loss: 0.2379
Epoch [6/1000], Validation Loss: 0.2234, Validation Accuracy: 93.91%
Output Su

KeyboardInterrupt: 

In [None]:
patience = 10000
best_val_loss = float('inf')
no_improvement_epochs = 0

all_outputs = []

for epoch in range(10000):
    model.train()
    running_loss = 0.0
    num_batches = 0
    
    for data, target in get_batches(train_data, train_targets, batch_size):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()
        num_batches += 1

    print(f"Epoch [{epoch + 1}/{epochs}], Training Loss: {running_loss / num_batches:.4f}")

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    num_batches = 0
    epoch_outputs = []

    with torch.no_grad():
        for data, target in get_batches(test_data, test_targets, batch_size):
            outputs = model(data)
            loss = criterion(outputs, target)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += target.size(0)
            num_batches += 1
            correct += (predicted == target).sum().item()

            epoch_outputs.append(outputs)

    all_outputs_tensor = torch.cat(epoch_outputs, dim=0)
    all_outputs.append(all_outputs_tensor)

    max_val = torch.max(all_outputs_tensor).item()
    min_val = torch.min(all_outputs_tensor).item()
    median_val = torch.median(all_outputs_tensor).item()
    mean_val = torch.mean(all_outputs_tensor).item()

    accuracy = 100 * correct / total
    val_loss /= num_batches
    print(f"Epoch [{epoch + 1}/{epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {accuracy:.2f}%")
    print(f"Output Summary: Max={max_val:.4f}, Min={min_val:.4f}, Median={median_val:.4f}, Mean={mean_val:.4f}")
    print()

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improvement_epochs = 0
    else:
        no_improvement_epochs += 1

    if no_improvement_epochs >= patience:
        print(f"Early stopping triggered after {epoch + 1} epochs.")
        break

Epoch [1/1000], Training Loss: 2.3023
Epoch [1/1000], Validation Loss: 2.3016, Validation Accuracy: 15.66%
Output Summary: Max=0.1028, Min=-0.0965, Median=0.0467, Mean=0.0335

Epoch [2/1000], Training Loss: 2.2845
Epoch [2/1000], Validation Loss: 2.1563, Validation Accuracy: 27.01%
Output Summary: Max=0.7537, Min=-0.6424, Median=0.0407, Mean=0.0154

Epoch [3/1000], Training Loss: 1.7491
Epoch [3/1000], Validation Loss: 1.2844, Validation Accuracy: 61.41%
Output Summary: Max=3.8563, Min=-3.6543, Median=0.1253, Mean=0.0692

Epoch [4/1000], Training Loss: 1.0003
Epoch [4/1000], Validation Loss: 0.7231, Validation Accuracy: 80.32%
Output Summary: Max=5.1553, Min=-4.6620, Median=-0.1656, Mean=0.0501

Epoch [5/1000], Training Loss: 0.6128
Epoch [5/1000], Validation Loss: 0.4861, Validation Accuracy: 86.23%
Output Summary: Max=6.4259, Min=-4.8845, Median=-0.1543, Mean=0.0677

Epoch [6/1000], Training Loss: 0.4516
Epoch [6/1000], Validation Loss: 0.3873, Validation Accuracy: 88.88%
Output Summ

KeyboardInterrupt: 

In [None]:
patience = 10000
best_val_loss = float('inf')
no_improvement_epochs = 0

all_outputs = []

for epoch in range(10000):
    model.train()
    running_loss = 0.0
    num_batches = 0
    
    for data, target in get_batches(train_data, train_targets, batch_size):
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()

        optimizer.step()
        running_loss += loss.item()
        num_batches += 1

    print(f"Epoch [{epoch + 1}/{epochs}], Training Loss: {running_loss / num_batches:.4f}")

    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    num_batches = 0
    epoch_outputs = []

    with torch.no_grad():
        for data, target in get_batches(test_data, test_targets, batch_size):
            outputs = model(data)
            loss = criterion(outputs, target)
            val_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            total += target.size(0)
            num_batches += 1
            correct += (predicted == target).sum().item()

            epoch_outputs.append(outputs)

    all_outputs_tensor = torch.cat(epoch_outputs, dim=0)
    all_outputs.append(all_outputs_tensor)

    max_val = torch.max(all_outputs_tensor).item()
    min_val = torch.min(all_outputs_tensor).item()
    median_val = torch.median(all_outputs_tensor).item()
    mean_val = torch.mean(all_outputs_tensor).item()

    accuracy = 100 * correct / total
    val_loss /= num_batches
    print(f"Epoch [{epoch + 1}/{epochs}], Validation Loss: {val_loss:.4f}, Validation Accuracy: {accuracy:.2f}%")
    print(f"Output Summary: Max={max_val:.4f}, Min={min_val:.4f}, Median={median_val:.4f}, Mean={mean_val:.4f}")
    print()

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        no_improvement_epochs = 0
    else:
        no_improvement_epochs += 1

    if no_improvement_epochs >= patience:
        print(f"Early stopping triggered after {epoch + 1} epochs.")
        break

torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
Epoch [1/1000], Training Loss: 2.3031
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
torch.Size([2500, 32, 5, 5])
Epoch [1/1000], Validation Loss: 2.3011, Validation Accuracy: 10.28%
Output Summary: Max=0.0767, Min=-0.0834, Median=0.0046, Mean=0.0008

torch.Size([

KeyboardInterrupt: 