In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import tqdm

import numpy as np
from scipy.sparse.csgraph import connected_components

# -------------------------
# Configurable CNN with Batch Normalization and Hidden Activations Collection
# -------------------------
class ConfigurableCNN(nn.Module):
    def __init__(self, conv_channels, fc_hidden_units=512, dropout_p=0.25,
                 num_classes=10, input_size=32, input_channels=3, use_batchnorm=True):
        """
        Args:
            conv_channels (list of int): List of output channels for each convolutional layer.
            fc_hidden_units (int): Number of neurons in the hidden fully connected layer.
            dropout_p (float): Dropout probability.
            num_classes (int): Number of output classes.
            input_size (int): Height/width of the input images (assumed square).
            input_channels (int): Number of channels in the input images.
            use_batchnorm (bool): Whether to use batch normalization after each convolution.
        """
        super(ConfigurableCNN, self).__init__()
        self.use_batchnorm = use_batchnorm
        self.conv_layers = nn.ModuleList()
        if self.use_batchnorm:
            self.bn_layers = nn.ModuleList()
        
        in_channels = input_channels  # For colored images, this is 3.
        self.num_pool = len(conv_channels)  # One pooling per conv layer
        
        # Create convolutional layers along with optional batch normalization.
        for out_channels in conv_channels:
            self.conv_layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
            if self.use_batchnorm:
                self.bn_layers.append(nn.BatchNorm2d(out_channels))
            in_channels = out_channels
        
        # Define a max pooling layer (2x2) applied after each conv block.
        self.pool = nn.MaxPool2d(2, 2)
        
        # Compute the spatial size after all pooling operations.
        final_size = input_size // (2 ** self.num_pool)
        self.flattened_size = conv_channels[-1] * final_size * final_size
        
        # Fully connected layers.
        self.fc1 = nn.Linear(self.flattened_size, fc_hidden_units)
        self.fc2 = nn.Linear(fc_hidden_units, num_classes)
        
        # Dropout layer for regularization.
        self.dropout = nn.Dropout(dropout_p)
        self.act = F.tanh  # You can change this activation if desired

    def forward(self, x, return_hidden=False):
        hidden_activations = []  # List to collect hidden activations

        # Pass through each convolutional layer
        for idx, conv in enumerate(self.conv_layers):
            x = conv(x)
            if self.use_batchnorm:
                x = self.bn_layers[idx](x)
            if return_hidden:
                hidden_activations.append(x.detach().cpu())
            x = self.act(x)
            x = self.pool(x)
        
        x = self.dropout(x)
        x = x.view(x.size(0), -1)  # Flatten the tensor

        # First fully connected layer with activation
        x = self.fc1(x)
        x = self.act(x)
        x = self.dropout(x)

        # Final fully connected layer (logits)
        x = self.fc2(x)
        
        if return_hidden:
            return x, hidden_activations
        return x


def eval_features(model, testloader, thresh=0.9, tol=1e-10, rank_atol=1e-2, dead_tol=0.05):
    model.eval()
    sample_inputs, _ = next(iter(testloader))
    sample_inputs = sample_inputs.to(device)
    with torch.no_grad():
        _, hidden_activations = model(sample_inputs, return_hidden=True)
        
    for act in hidden_activations:
        # Reshape: (batch, channels, H, W) --> (channels, batch * H * W)
        A = act.transpose(0, 1).flatten(1)
        # Normalize each row (avoid division by zero with a small epsilon)
        # A = A - A.mean(dim=1,keepdim=True)
        A = A / (A.norm(dim=1, keepdim=True) + tol)
        stds = A.std(dim=1) / A.abs().mean(dim=1)
        # print(stds.shape, stds)
        dead_features = (stds<dead_tol).sum()
        # Compute cosine similarity matrix
        C = A @ A.t()
        rank = torch.linalg.matrix_rank(C, atol=rank_atol)
        soft_rank = torch.trace(C)**2 / torch.trace(C @ C)
        # Remove self-similarity by zeroing the diagonal and take absolute value.
        C.fill_diagonal_(0)
        C = C.abs()
        # Create an adjacency matrix by thresholding.
        Adj = (C > thresh).float()
        
        # Convert to numpy array (scipy works with numpy arrays)
        Adj_np = Adj.numpy()
        # Compute the number of connected components using SciPy's stable routine.
        n_components, labels = connected_components(csgraph=Adj_np, directed=False)
        R = Adj_np.shape[0]
        print(f'# CC  = {n_components:4}, e-rank = {rank:4}, soft rank = {soft_rank:4.3f}, dead features = {dead_features:4} / {R}')

# -------------------------
# Data Preparation (Tiny ImageNet with Selected Classes)
# -------------------------
# Specify which classes to use.
selected_classes = range(50)  # Set to None to use all available classes

# Define the image size for resizing
input_size = 128

transform = transforms.Compose([
    transforms.Resize((input_size, input_size)),  # Resize images to input_size x input_size
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

# Load the Tiny ImageNet datasets using ImageFolder.
# Adjust the root paths to where you have Tiny ImageNet stored.
trainset = torchvision.datasets.ImageFolder(root='./data/tiny-imagenet-200/train', transform=transform)
testset = torchvision.datasets.ImageFolder(root='./data/tiny-imagenet-200/val', transform=transform)

# If selected_classes is specified, filter the dataset to include only those classes.
if selected_classes is not None:
    train_indices = [i for i, (_, label) in enumerate(trainset.samples) if label in selected_classes]
    trainset = torch.utils.data.Subset(trainset, train_indices)
    test_indices = [i for i, (_, label) in enumerate(testset.samples) if label in selected_classes]
    testset = torch.utils.data.Subset(testset, test_indices)
    num_used_classes = len(selected_classes)
else:
    num_used_classes = len(trainset.classes)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=64,
                                          shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(testset, batch_size=128,
                                         shuffle=False, num_workers=2)

# -------------------------
# Model Configuration and Instantiation
# -------------------------
conv_channels = [256] * 7  # Example configuration
fc_hidden_units = conv_channels[0]
dropout_p = 0.0
use_batchnorm = True

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
net = ConfigurableCNN(conv_channels, fc_hidden_units, dropout_p,
                      num_classes=num_used_classes, input_size=input_size, input_channels=3,
                      use_batchnorm=use_batchnorm).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(), lr=0.001)

# -------------------------
# Training Loop
# -------------------------
num_epochs = 100  # Adjust the number of epochs as needed
for epoch in range(num_epochs):
    net.train()
    running_loss = 0.0
    for i, data in tqdm.tqdm(enumerate(trainloader, 0), total=len(trainloader)):
        inputs, labels = data[0].to(device), data[1].to(device)
        
        optimizer.zero_grad()  # Zero the parameter gradients
        outputs = net(inputs)   # Forward pass
        loss = criterion(outputs, labels)  # Compute loss
        loss.backward()  # Backpropagation
        optimizer.step()  # Update parameters
        
        running_loss += loss.item()
    print(f'Epoch {epoch + 1}, Train Loss: {running_loss / len(trainloader):.5f}')
    running_loss = 0.0

    # Optionally, evaluate feature connectivity
    eval_features(net, testloader, thresh=0.95)
    
    # -------------------------
    # Validation after each epoch
    # -------------------------
    net.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for data in testloader:
            images, labels = data[0].to(device), data[1].to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f'Accuracy on test set after epoch {epoch + 1}: {accuracy:.2f}%')

print("Training complete!")
x
# -------------------------
# Example: Obtaining Hidden Activations
# -------------------------
net.eval()
sample_inputs, _ = next(iter(testloader))
sample_inputs = sample_inputs.to(device)
with torch.no_grad():
    output, hidden_activations = net(sample_inputs, return_hidden=True)
print("Collected {} hidden activations.".format(len(hidden_activations)))


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.36it/s]

Epoch 1, Train Loss: 3.44915





# CC  =  156, e-rank =   21, soft rank = 4.318, dead features =    0 / 256
# CC  =  109, e-rank =   33, soft rank = 3.329, dead features =    0 / 256
# CC  =   56, e-rank =   26, soft rank = 3.329, dead features =    0 / 256
# CC  =   99, e-rank =   28, soft rank = 4.098, dead features =    0 / 256
# CC  =  195, e-rank =   36, soft rank = 4.890, dead features =    0 / 256
# CC  =  254, e-rank =   51, soft rank = 7.585, dead features =    0 / 256
# CC  =  256, e-rank =   71, soft rank = 7.968, dead features =    0 / 256
Accuracy on test set after epoch 1: 4.20%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.44it/s]

Epoch 2, Train Loss: 3.02895





# CC  =  168, e-rank =   21, soft rank = 4.433, dead features =    0 / 256
# CC  =  137, e-rank =   32, soft rank = 3.920, dead features =    0 / 256
# CC  =  101, e-rank =   27, soft rank = 3.776, dead features =    0 / 256
# CC  =  132, e-rank =   28, soft rank = 4.583, dead features =    0 / 256
# CC  =  230, e-rank =   39, soft rank = 6.132, dead features =    0 / 256
# CC  =  255, e-rank =   59, soft rank = 10.457, dead features =    0 / 256
# CC  =  256, e-rank =   87, soft rank = 11.493, dead features =    0 / 256
Accuracy on test set after epoch 2: 6.99%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.42it/s]

Epoch 3, Train Loss: 2.80418





# CC  =  167, e-rank =   21, soft rank = 4.828, dead features =    0 / 256
# CC  =  148, e-rank =   33, soft rank = 4.317, dead features =    0 / 256
# CC  =  117, e-rank =   29, soft rank = 4.128, dead features =    0 / 256
# CC  =  168, e-rank =   32, soft rank = 5.158, dead features =    0 / 256
# CC  =  236, e-rank =   46, soft rank = 7.306, dead features =    0 / 256
# CC  =  256, e-rank =   72, soft rank = 12.391, dead features =    0 / 256
# CC  =  256, e-rank =  101, soft rank = 12.230, dead features =    0 / 256
Accuracy on test set after epoch 3: 3.82%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.41it/s]

Epoch 4, Train Loss: 2.61524





# CC  =  173, e-rank =   21, soft rank = 5.138, dead features =    0 / 256
# CC  =  162, e-rank =   35, soft rank = 4.506, dead features =    0 / 256
# CC  =  121, e-rank =   32, soft rank = 4.010, dead features =    0 / 256
# CC  =  179, e-rank =   35, soft rank = 5.142, dead features =    0 / 256
# CC  =  248, e-rank =   53, soft rank = 8.356, dead features =    0 / 256
# CC  =  256, e-rank =   85, soft rank = 15.047, dead features =    0 / 256
# CC  =  256, e-rank =  117, soft rank = 15.663, dead features =    0 / 256
Accuracy on test set after epoch 4: 2.50%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.40it/s]

Epoch 5, Train Loss: 2.42739





# CC  =  171, e-rank =   21, soft rank = 5.457, dead features =    0 / 256
# CC  =  179, e-rank =   36, soft rank = 5.451, dead features =    0 / 256
# CC  =  159, e-rank =   36, soft rank = 5.024, dead features =    0 / 256
# CC  =  206, e-rank =   40, soft rank = 6.012, dead features =    0 / 256
# CC  =  253, e-rank =   59, soft rank = 9.638, dead features =    0 / 256
# CC  =  256, e-rank =   98, soft rank = 16.324, dead features =    0 / 256
# CC  =  256, e-rank =  134, soft rank = 18.581, dead features =    0 / 256
Accuracy on test set after epoch 5: 5.65%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.42it/s]

Epoch 6, Train Loss: 2.24817





# CC  =  175, e-rank =   21, soft rank = 5.571, dead features =    0 / 256
# CC  =  190, e-rank =   37, soft rank = 5.596, dead features =    0 / 256
# CC  =  176, e-rank =   39, soft rank = 5.372, dead features =    0 / 256
# CC  =  226, e-rank =   44, soft rank = 6.714, dead features =    0 / 256
# CC  =  254, e-rank =   66, soft rank = 10.265, dead features =    0 / 256
# CC  =  256, e-rank =  108, soft rank = 16.332, dead features =    0 / 256
# CC  =  256, e-rank =  152, soft rank = 21.466, dead features =    0 / 256
Accuracy on test set after epoch 6: 2.46%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.40it/s]

Epoch 7, Train Loss: 2.07016





# CC  =  178, e-rank =   21, soft rank = 5.720, dead features =    0 / 256
# CC  =  203, e-rank =   37, soft rank = 6.047, dead features =    0 / 256
# CC  =  196, e-rank =   41, soft rank = 5.781, dead features =    0 / 256
# CC  =  228, e-rank =   48, soft rank = 6.992, dead features =    0 / 256
# CC  =  254, e-rank =   73, soft rank = 11.604, dead features =    0 / 256
# CC  =  256, e-rank =  120, soft rank = 15.587, dead features =    0 / 256
# CC  =  256, e-rank =  164, soft rank = 22.159, dead features =    0 / 256
Accuracy on test set after epoch 7: 2.41%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.39it/s]

Epoch 8, Train Loss: 1.86404





# CC  =  173, e-rank =   21, soft rank = 5.606, dead features =    0 / 256
# CC  =  198, e-rank =   39, soft rank = 5.958, dead features =    0 / 256
# CC  =  195, e-rank =   42, soft rank = 5.798, dead features =    0 / 256
# CC  =  238, e-rank =   52, soft rank = 7.324, dead features =    0 / 256
# CC  =  256, e-rank =   79, soft rank = 12.163, dead features =    0 / 256
# CC  =  256, e-rank =  131, soft rank = 14.028, dead features =    0 / 256
# CC  =  256, e-rank =  184, soft rank = 26.783, dead features =    0 / 256
Accuracy on test set after epoch 8: 3.40%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.43it/s]

Epoch 9, Train Loss: 1.63346





# CC  =  180, e-rank =   21, soft rank = 5.715, dead features =    0 / 256
# CC  =  206, e-rank =   39, soft rank = 6.192, dead features =    0 / 256
# CC  =  205, e-rank =   45, soft rank = 6.408, dead features =    0 / 256
# CC  =  237, e-rank =   57, soft rank = 7.781, dead features =    0 / 256
# CC  =  256, e-rank =   86, soft rank = 13.041, dead features =    0 / 256
# CC  =  256, e-rank =  143, soft rank = 12.863, dead features =    0 / 256
# CC  =  256, e-rank =  200, soft rank = 29.141, dead features =    0 / 256
Accuracy on test set after epoch 9: 3.60%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.40it/s]

Epoch 10, Train Loss: 1.39614





# CC  =  168, e-rank =   21, soft rank = 5.728, dead features =    0 / 256
# CC  =  215, e-rank =   40, soft rank = 6.158, dead features =    0 / 256
# CC  =  207, e-rank =   48, soft rank = 6.595, dead features =    0 / 256
# CC  =  238, e-rank =   62, soft rank = 8.177, dead features =    0 / 256
# CC  =  256, e-rank =   94, soft rank = 13.869, dead features =    0 / 256
# CC  =  256, e-rank =  154, soft rank = 11.215, dead features =    0 / 256
# CC  =  256, e-rank =  216, soft rank = 32.567, dead features =    0 / 256
Accuracy on test set after epoch 10: 2.24%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.39it/s]

Epoch 11, Train Loss: 1.13320





# CC  =  167, e-rank =   21, soft rank = 5.735, dead features =    0 / 256
# CC  =  214, e-rank =   42, soft rank = 6.374, dead features =    0 / 256
# CC  =  208, e-rank =   50, soft rank = 6.732, dead features =    0 / 256
# CC  =  239, e-rank =   64, soft rank = 8.498, dead features =    0 / 256
# CC  =  256, e-rank =  101, soft rank = 14.187, dead features =    0 / 256
# CC  =  256, e-rank =  165, soft rank = 10.180, dead features =    0 / 256
# CC  =  256, e-rank =  227, soft rank = 34.868, dead features =    0 / 256
Accuracy on test set after epoch 11: 3.40%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.41it/s]

Epoch 12, Train Loss: 0.84474





# CC  =  170, e-rank =   21, soft rank = 5.772, dead features =    0 / 256
# CC  =  214, e-rank =   42, soft rank = 6.362, dead features =    0 / 256
# CC  =  213, e-rank =   53, soft rank = 6.912, dead features =    0 / 256
# CC  =  247, e-rank =   68, soft rank = 8.927, dead features =    0 / 256
# CC  =  256, e-rank =  107, soft rank = 14.343, dead features =    0 / 256
# CC  =  256, e-rank =  175, soft rank = 9.690, dead features =    0 / 256
# CC  =  256, e-rank =  237, soft rank = 35.143, dead features =    0 / 256
Accuracy on test set after epoch 12: 1.93%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.40it/s]

Epoch 13, Train Loss: 0.60338





# CC  =  168, e-rank =   21, soft rank = 5.741, dead features =    0 / 256
# CC  =  218, e-rank =   42, soft rank = 6.377, dead features =    0 / 256
# CC  =  215, e-rank =   55, soft rank = 7.136, dead features =    0 / 256
# CC  =  246, e-rank =   71, soft rank = 9.231, dead features =    0 / 256
# CC  =  256, e-rank =  113, soft rank = 15.063, dead features =    0 / 256
# CC  =  256, e-rank =  184, soft rank = 9.012, dead features =    0 / 256
# CC  =  256, e-rank =  243, soft rank = 38.961, dead features =    0 / 256
Accuracy on test set after epoch 13: 2.67%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.40it/s]

Epoch 14, Train Loss: 0.41620





# CC  =  165, e-rank =   21, soft rank = 5.763, dead features =    0 / 256
# CC  =  212, e-rank =   44, soft rank = 6.459, dead features =    0 / 256
# CC  =  216, e-rank =   56, soft rank = 7.026, dead features =    0 / 256
# CC  =  250, e-rank =   74, soft rank = 9.305, dead features =    0 / 256
# CC  =  256, e-rank =  116, soft rank = 14.702, dead features =    0 / 256
# CC  =  256, e-rank =  189, soft rank = 8.600, dead features =    0 / 256
# CC  =  256, e-rank =  246, soft rank = 38.495, dead features =    0 / 256
Accuracy on test set after epoch 14: 2.94%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.40it/s]

Epoch 15, Train Loss: 0.26362





# CC  =  164, e-rank =   21, soft rank = 5.774, dead features =    0 / 256
# CC  =  214, e-rank =   44, soft rank = 6.502, dead features =    0 / 256
# CC  =  222, e-rank =   59, soft rank = 7.201, dead features =    0 / 256
# CC  =  252, e-rank =   75, soft rank = 9.454, dead features =    0 / 256
# CC  =  256, e-rank =  121, soft rank = 15.005, dead features =    0 / 256
# CC  =  256, e-rank =  195, soft rank = 8.284, dead features =    0 / 256
# CC  =  256, e-rank =  249, soft rank = 39.690, dead features =    0 / 256
Accuracy on test set after epoch 15: 2.89%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.39it/s]

Epoch 16, Train Loss: 0.18091





# CC  =  161, e-rank =   21, soft rank = 5.795, dead features =    0 / 256
# CC  =  218, e-rank =   44, soft rank = 6.636, dead features =    0 / 256
# CC  =  220, e-rank =   59, soft rank = 7.193, dead features =    0 / 256
# CC  =  250, e-rank =   77, soft rank = 9.166, dead features =    0 / 256
# CC  =  256, e-rank =  122, soft rank = 14.523, dead features =    0 / 256
# CC  =  256, e-rank =  196, soft rank = 7.922, dead features =    0 / 256
# CC  =  256, e-rank =  251, soft rank = 37.906, dead features =    0 / 256
Accuracy on test set after epoch 16: 2.36%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.40it/s]

Epoch 17, Train Loss: 0.15264





# CC  =  157, e-rank =   21, soft rank = 5.757, dead features =    0 / 256
# CC  =  216, e-rank =   44, soft rank = 6.620, dead features =    0 / 256
# CC  =  222, e-rank =   60, soft rank = 7.320, dead features =    0 / 256
# CC  =  252, e-rank =   77, soft rank = 9.410, dead features =    0 / 256
# CC  =  256, e-rank =  123, soft rank = 14.586, dead features =    0 / 256
# CC  =  256, e-rank =  200, soft rank = 7.786, dead features =    0 / 256
# CC  =  256, e-rank =  250, soft rank = 37.983, dead features =    0 / 256
Accuracy on test set after epoch 17: 2.48%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.40it/s]

Epoch 18, Train Loss: 0.15876





# CC  =  156, e-rank =   21, soft rank = 5.781, dead features =    0 / 256
# CC  =  214, e-rank =   44, soft rank = 6.525, dead features =    0 / 256
# CC  =  223, e-rank =   59, soft rank = 7.132, dead features =    0 / 256
# CC  =  251, e-rank =   78, soft rank = 9.485, dead features =    0 / 256
# CC  =  256, e-rank =  124, soft rank = 14.482, dead features =    0 / 256
# CC  =  256, e-rank =  200, soft rank = 7.561, dead features =    0 / 256
# CC  =  256, e-rank =  251, soft rank = 37.043, dead features =    0 / 256
Accuracy on test set after epoch 18: 2.54%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.40it/s]


Epoch 19, Train Loss: 0.21302
# CC  =  162, e-rank =   21, soft rank = 5.748, dead features =    0 / 256
# CC  =  209, e-rank =   44, soft rank = 6.424, dead features =    0 / 256
# CC  =  220, e-rank =   60, soft rank = 7.033, dead features =    0 / 256
# CC  =  251, e-rank =   79, soft rank = 9.339, dead features =    0 / 256
# CC  =  256, e-rank =  126, soft rank = 14.645, dead features =    0 / 256
# CC  =  256, e-rank =  203, soft rank = 6.900, dead features =    0 / 256
# CC  =  256, e-rank =  253, soft rank = 37.251, dead features =    0 / 256
Accuracy on test set after epoch 19: 1.97%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.39it/s]

Epoch 20, Train Loss: 0.13362





# CC  =  159, e-rank =   21, soft rank = 5.786, dead features =    0 / 256
# CC  =  212, e-rank =   44, soft rank = 6.703, dead features =    0 / 256
# CC  =  224, e-rank =   61, soft rank = 7.488, dead features =    0 / 256
# CC  =  250, e-rank =   81, soft rank = 9.527, dead features =    0 / 256
# CC  =  256, e-rank =  127, soft rank = 14.577, dead features =    0 / 256
# CC  =  256, e-rank =  205, soft rank = 6.896, dead features =    0 / 256
# CC  =  256, e-rank =  252, soft rank = 35.377, dead features =    0 / 256
Accuracy on test set after epoch 20: 3.94%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.41it/s]

Epoch 21, Train Loss: 0.10753





# CC  =  148, e-rank =   21, soft rank = 5.735, dead features =    0 / 256
# CC  =  213, e-rank =   44, soft rank = 6.487, dead features =    0 / 256
# CC  =  223, e-rank =   62, soft rank = 7.497, dead features =    0 / 256
# CC  =  251, e-rank =   81, soft rank = 9.524, dead features =    0 / 256
# CC  =  256, e-rank =  128, soft rank = 14.662, dead features =    0 / 256
# CC  =  256, e-rank =  207, soft rank = 6.734, dead features =    0 / 256
# CC  =  256, e-rank =  254, soft rank = 35.922, dead features =    0 / 256
Accuracy on test set after epoch 21: 2.76%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.40it/s]

Epoch 22, Train Loss: 0.09273





# CC  =  156, e-rank =   20, soft rank = 5.741, dead features =    0 / 256
# CC  =  206, e-rank =   44, soft rank = 6.698, dead features =    0 / 256
# CC  =  226, e-rank =   61, soft rank = 7.403, dead features =    0 / 256
# CC  =  251, e-rank =   82, soft rank = 9.550, dead features =    0 / 256
# CC  =  256, e-rank =  129, soft rank = 14.565, dead features =    0 / 256
# CC  =  256, e-rank =  209, soft rank = 6.892, dead features =    0 / 256
# CC  =  256, e-rank =  254, soft rank = 35.922, dead features =    0 / 256
Accuracy on test set after epoch 22: 2.59%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.38it/s]

Epoch 23, Train Loss: 0.12112





# CC  =  154, e-rank =   20, soft rank = 5.758, dead features =    0 / 256
# CC  =  210, e-rank =   45, soft rank = 6.650, dead features =    0 / 256
# CC  =  224, e-rank =   61, soft rank = 7.547, dead features =    0 / 256
# CC  =  252, e-rank =   83, soft rank = 9.776, dead features =    0 / 256
# CC  =  256, e-rank =  129, soft rank = 14.313, dead features =    0 / 256
# CC  =  256, e-rank =  208, soft rank = 6.656, dead features =    0 / 256
# CC  =  256, e-rank =  252, soft rank = 35.326, dead features =    0 / 256
Accuracy on test set after epoch 23: 1.69%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.40it/s]

Epoch 24, Train Loss: 0.14980





# CC  =  143, e-rank =   20, soft rank = 5.726, dead features =    0 / 256
# CC  =  211, e-rank =   45, soft rank = 6.560, dead features =    0 / 256
# CC  =  222, e-rank =   62, soft rank = 7.429, dead features =    0 / 256
# CC  =  253, e-rank =   83, soft rank = 9.730, dead features =    0 / 256
# CC  =  256, e-rank =  130, soft rank = 14.088, dead features =    0 / 256
# CC  =  256, e-rank =  207, soft rank = 6.344, dead features =    0 / 256
# CC  =  256, e-rank =  254, soft rank = 34.734, dead features =    0 / 256
Accuracy on test set after epoch 24: 2.14%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.40it/s]

Epoch 25, Train Loss: 0.09602





# CC  =  149, e-rank =   20, soft rank = 5.681, dead features =    0 / 256
# CC  =  214, e-rank =   45, soft rank = 6.527, dead features =    0 / 256
# CC  =  225, e-rank =   62, soft rank = 7.272, dead features =    0 / 256
# CC  =  252, e-rank =   84, soft rank = 9.328, dead features =    0 / 256
# CC  =  256, e-rank =  131, soft rank = 13.836, dead features =    0 / 256
# CC  =  256, e-rank =  209, soft rank = 6.347, dead features =    0 / 256
# CC  =  256, e-rank =  254, soft rank = 34.029, dead features =    0 / 256
Accuracy on test set after epoch 25: 3.86%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.40it/s]

Epoch 26, Train Loss: 0.06965





# CC  =  146, e-rank =   20, soft rank = 5.710, dead features =    0 / 256
# CC  =  213, e-rank =   45, soft rank = 6.665, dead features =    0 / 256
# CC  =  225, e-rank =   62, soft rank = 7.490, dead features =    0 / 256
# CC  =  252, e-rank =   84, soft rank = 9.719, dead features =    0 / 256
# CC  =  256, e-rank =  132, soft rank = 14.097, dead features =    0 / 256
# CC  =  256, e-rank =  211, soft rank = 6.330, dead features =    0 / 256
# CC  =  256, e-rank =  254, soft rank = 34.310, dead features =    0 / 256
Accuracy on test set after epoch 26: 2.83%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.40it/s]

Epoch 27, Train Loss: 0.10885





# CC  =  156, e-rank =   20, soft rank = 5.672, dead features =    0 / 256
# CC  =  212, e-rank =   45, soft rank = 6.656, dead features =    0 / 256
# CC  =  224, e-rank =   61, soft rank = 7.368, dead features =    0 / 256
# CC  =  251, e-rank =   83, soft rank = 9.688, dead features =    0 / 256
# CC  =  256, e-rank =  132, soft rank = 14.114, dead features =    0 / 256
# CC  =  256, e-rank =  212, soft rank = 6.236, dead features =    0 / 256
# CC  =  256, e-rank =  254, soft rank = 33.326, dead features =    0 / 256
Accuracy on test set after epoch 27: 2.13%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.40it/s]

Epoch 28, Train Loss: 0.11675





# CC  =  147, e-rank =   20, soft rank = 5.671, dead features =    0 / 256
# CC  =  215, e-rank =   45, soft rank = 6.712, dead features =    0 / 256
# CC  =  226, e-rank =   63, soft rank = 7.687, dead features =    0 / 256
# CC  =  250, e-rank =   84, soft rank = 9.769, dead features =    0 / 256
# CC  =  256, e-rank =  134, soft rank = 13.758, dead features =    0 / 256
# CC  =  256, e-rank =  214, soft rank = 6.173, dead features =    0 / 256
# CC  =  256, e-rank =  254, soft rank = 33.160, dead features =    0 / 256
Accuracy on test set after epoch 28: 2.17%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.40it/s]

Epoch 29, Train Loss: 0.09010





# CC  =  144, e-rank =   20, soft rank = 5.703, dead features =    0 / 256
# CC  =  215, e-rank =   45, soft rank = 6.758, dead features =    0 / 256
# CC  =  225, e-rank =   63, soft rank = 7.584, dead features =    0 / 256
# CC  =  252, e-rank =   84, soft rank = 9.496, dead features =    0 / 256
# CC  =  256, e-rank =  133, soft rank = 13.190, dead features =    0 / 256
# CC  =  256, e-rank =  213, soft rank = 5.987, dead features =    0 / 256
# CC  =  256, e-rank =  254, soft rank = 32.003, dead features =    0 / 256
Accuracy on test set after epoch 29: 3.12%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.41it/s]

Epoch 30, Train Loss: 0.09110





# CC  =  149, e-rank =   20, soft rank = 5.624, dead features =    0 / 256
# CC  =  214, e-rank =   45, soft rank = 6.686, dead features =    0 / 256
# CC  =  228, e-rank =   62, soft rank = 7.643, dead features =    0 / 256
# CC  =  253, e-rank =   85, soft rank = 9.870, dead features =    0 / 256
# CC  =  256, e-rank =  135, soft rank = 13.619, dead features =    0 / 256
# CC  =  256, e-rank =  214, soft rank = 6.005, dead features =    0 / 256
# CC  =  256, e-rank =  255, soft rank = 31.567, dead features =    0 / 256
Accuracy on test set after epoch 30: 2.98%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.40it/s]

Epoch 31, Train Loss: 0.07908





# CC  =  142, e-rank =   20, soft rank = 5.626, dead features =    0 / 256
# CC  =  217, e-rank =   46, soft rank = 6.778, dead features =    0 / 256
# CC  =  229, e-rank =   63, soft rank = 7.768, dead features =    0 / 256
# CC  =  252, e-rank =   85, soft rank = 9.639, dead features =    0 / 256
# CC  =  256, e-rank =  135, soft rank = 13.635, dead features =    0 / 256
# CC  =  256, e-rank =  214, soft rank = 5.991, dead features =    0 / 256
# CC  =  256, e-rank =  255, soft rank = 31.086, dead features =    0 / 256
Accuracy on test set after epoch 31: 2.83%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.39it/s]

Epoch 32, Train Loss: 0.05976





# CC  =  141, e-rank =   20, soft rank = 5.618, dead features =    0 / 256
# CC  =  216, e-rank =   46, soft rank = 6.767, dead features =    0 / 256
# CC  =  231, e-rank =   63, soft rank = 7.724, dead features =    0 / 256
# CC  =  253, e-rank =   85, soft rank = 10.072, dead features =    0 / 256
# CC  =  256, e-rank =  136, soft rank = 13.553, dead features =    0 / 256
# CC  =  256, e-rank =  215, soft rank = 6.020, dead features =    0 / 256
# CC  =  256, e-rank =  254, soft rank = 31.688, dead features =    0 / 256
Accuracy on test set after epoch 32: 3.24%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.40it/s]

Epoch 33, Train Loss: 0.08020





# CC  =  147, e-rank =   20, soft rank = 5.631, dead features =    0 / 256
# CC  =  216, e-rank =   45, soft rank = 6.578, dead features =    0 / 256
# CC  =  229, e-rank =   64, soft rank = 7.488, dead features =    0 / 256
# CC  =  253, e-rank =   86, soft rank = 9.712, dead features =    0 / 256
# CC  =  256, e-rank =  138, soft rank = 13.450, dead features =    0 / 256
# CC  =  256, e-rank =  216, soft rank = 5.843, dead features =    0 / 256
# CC  =  256, e-rank =  254, soft rank = 30.630, dead features =    0 / 256
Accuracy on test set after epoch 33: 2.88%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.39it/s]

Epoch 34, Train Loss: 0.10483





# CC  =  139, e-rank =   20, soft rank = 5.611, dead features =    0 / 256
# CC  =  218, e-rank =   45, soft rank = 6.649, dead features =    0 / 256
# CC  =  229, e-rank =   63, soft rank = 7.580, dead features =    0 / 256
# CC  =  251, e-rank =   86, soft rank = 9.737, dead features =    0 / 256
# CC  =  256, e-rank =  136, soft rank = 12.611, dead features =    0 / 256
# CC  =  256, e-rank =  216, soft rank = 5.742, dead features =    0 / 256
# CC  =  256, e-rank =  254, soft rank = 29.536, dead features =    0 / 256
Accuracy on test set after epoch 34: 3.51%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.40it/s]

Epoch 35, Train Loss: 0.09099





# CC  =  139, e-rank =   20, soft rank = 5.569, dead features =    0 / 256
# CC  =  222, e-rank =   45, soft rank = 6.701, dead features =    0 / 256
# CC  =  227, e-rank =   64, soft rank = 7.523, dead features =    0 / 256
# CC  =  252, e-rank =   88, soft rank = 9.652, dead features =    0 / 256
# CC  =  256, e-rank =  138, soft rank = 13.301, dead features =    0 / 256
# CC  =  256, e-rank =  216, soft rank = 5.540, dead features =    0 / 256
# CC  =  256, e-rank =  255, soft rank = 29.057, dead features =    0 / 256
Accuracy on test set after epoch 35: 2.07%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.41it/s]

Epoch 36, Train Loss: 0.06491





# CC  =  137, e-rank =   19, soft rank = 5.627, dead features =    0 / 256
# CC  =  216, e-rank =   46, soft rank = 6.701, dead features =    0 / 256
# CC  =  227, e-rank =   64, soft rank = 7.551, dead features =    0 / 256
# CC  =  251, e-rank =   86, soft rank = 9.689, dead features =    0 / 256
# CC  =  256, e-rank =  139, soft rank = 12.927, dead features =    0 / 256
# CC  =  256, e-rank =  216, soft rank = 5.555, dead features =    0 / 256
# CC  =  256, e-rank =  254, soft rank = 28.927, dead features =    0 / 256
Accuracy on test set after epoch 36: 2.64%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.38it/s]

Epoch 37, Train Loss: 0.05846





# CC  =  131, e-rank =   19, soft rank = 5.558, dead features =    0 / 256
# CC  =  213, e-rank =   45, soft rank = 6.752, dead features =    0 / 256
# CC  =  226, e-rank =   64, soft rank = 7.771, dead features =    0 / 256
# CC  =  251, e-rank =   87, soft rank = 9.759, dead features =    0 / 256
# CC  =  256, e-rank =  139, soft rank = 13.411, dead features =    0 / 256
# CC  =  256, e-rank =  219, soft rank = 5.628, dead features =    0 / 256
# CC  =  256, e-rank =  255, soft rank = 29.098, dead features =    0 / 256
Accuracy on test set after epoch 37: 2.89%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.40it/s]

Epoch 38, Train Loss: 0.06078





# CC  =  143, e-rank =   19, soft rank = 5.588, dead features =    0 / 256
# CC  =  217, e-rank =   46, soft rank = 6.763, dead features =    0 / 256
# CC  =  225, e-rank =   64, soft rank = 7.713, dead features =    0 / 256
# CC  =  250, e-rank =   88, soft rank = 9.660, dead features =    0 / 256
# CC  =  256, e-rank =  139, soft rank = 12.969, dead features =    0 / 256
# CC  =  256, e-rank =  219, soft rank = 5.543, dead features =    0 / 256
# CC  =  256, e-rank =  255, soft rank = 29.048, dead features =    0 / 256
Accuracy on test set after epoch 38: 2.34%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.38it/s]

Epoch 39, Train Loss: 0.07687





# CC  =  131, e-rank =   19, soft rank = 5.583, dead features =    0 / 256
# CC  =  216, e-rank =   45, soft rank = 6.939, dead features =    0 / 256
# CC  =  227, e-rank =   64, soft rank = 7.892, dead features =    0 / 256
# CC  =  250, e-rank =   88, soft rank = 9.781, dead features =    0 / 256
# CC  =  256, e-rank =  140, soft rank = 13.247, dead features =    0 / 256
# CC  =  256, e-rank =  220, soft rank = 5.657, dead features =    0 / 256
# CC  =  256, e-rank =  256, soft rank = 27.542, dead features =    0 / 256
Accuracy on test set after epoch 39: 2.93%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.39it/s]

Epoch 40, Train Loss: 0.07582





# CC  =  126, e-rank =   19, soft rank = 5.514, dead features =    0 / 256
# CC  =  217, e-rank =   46, soft rank = 6.606, dead features =    0 / 256
# CC  =  232, e-rank =   64, soft rank = 7.749, dead features =    0 / 256
# CC  =  251, e-rank =   88, soft rank = 9.749, dead features =    0 / 256
# CC  =  256, e-rank =  140, soft rank = 12.738, dead features =    0 / 256
# CC  =  256, e-rank =  218, soft rank = 5.612, dead features =    0 / 256
# CC  =  256, e-rank =  255, soft rank = 27.459, dead features =    0 / 256
Accuracy on test set after epoch 40: 3.20%


100%|██████████████████████████████████████████████████████████████████████| 391/391 [00:37<00:00, 10.37it/s]

Epoch 41, Train Loss: 0.06718





# CC  =  138, e-rank =   18, soft rank = 5.529, dead features =    0 / 256
# CC  =  213, e-rank =   46, soft rank = 6.611, dead features =    0 / 256
# CC  =  229, e-rank =   64, soft rank = 7.533, dead features =    0 / 256
# CC  =  251, e-rank =   88, soft rank = 9.712, dead features =    0 / 256
# CC  =  256, e-rank =  140, soft rank = 12.747, dead features =    0 / 256
# CC  =  256, e-rank =  219, soft rank = 5.484, dead features =    0 / 256
# CC  =  256, e-rank =  256, soft rank = 26.884, dead features =    0 / 256
Accuracy on test set after epoch 41: 2.66%


 60%|█████████████████████████████████████████▋                            | 233/391 [00:22<00:15, 10.35it/s]


KeyboardInterrupt: 