In [1]:
import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
import numpy as np
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

In [2]:
class RegNetBlock(nn.Module):
    expansion =1
    def __init__(self, in_planes, out_planes, stride=1, groups=1, kernel_size=3, padding=1):
        super(RegNetBlock, self).__init__()
        self.groups = min(in_planes, groups, out_planes)  # Adjust groups to not exceed in_planes
       # print("group number:",groups)
        while in_planes % groups != 0 or out_planes % groups != 0:
            groups //= 2  # Reduce groups to ensure divisibility
       # if in_planes % groups !=0:
        #groups = in_planes
        if in_planes > 0:
            # Example condition: adjust kernel size for small inputs
            if in_planes <= 32:
                kernel_size = 3
                padding = 1
            else:
                kernel_size = 7
                padding = 3
        else:
            # Default to a standard kernel size if no input size info is available
            kernel_size = 3
            padding = 1
        #print(f"Config regnetblock: out_planes={out_planes}, stride={stride}, groups={groups}, kernel_size={kernel_size}, padding={padding}")
        groups = int(groups)
        #print(f"groups_regblock={groups}")
        #print(f"Out_planes_regnetblock={out_planes}")

        self.conv = nn.Conv2d(in_planes, out_planes, kernel_size=kernel_size,stride=stride, padding=padding, groups=groups, bias=False)
        self.bn = nn.BatchNorm2d(out_planes)
        self.relu = nn.ReLU(inplace=True)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != self.expansion * out_planes:
           # print("SHORTCUT DETECTED!!!!!!!!!!")
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, self.expansion * out_planes, kernel_size=1, groups=groups, stride=stride, bias=False),
                nn.BatchNorm2d(self.expansion * out_planes),
            )

    def forward(self, x):
        residual = x
        out = self.relu(self.bn(self.conv(x)))
        residual = self.shortcut(residual)
        if out.shape != residual.shape:
            out = F.interpolate(out, size=residual.shape[2:], mode='nearest')
        #out += self.shortcut(x)
        out = out + residual
        out = self.relu(out)
        return out

In [3]:
class AnyNetX(nn.Module):
    def __init__(self, config, num_classes=10):
        super(AnyNetX, self).__init__()
        self.in_planes = 64
        input_channels = 1
        out_planes = config[0][0]
        print(f"Configanynetx initial: out_planes={out_planes}, inplane={self.in_planes}")

        # Initial conv layer
        self.initial = nn.Sequential(
            nn.Conv2d(input_channels, self.in_planes, kernel_size=3, stride=2, padding=1, bias=False),
            nn.BatchNorm2d(self.in_planes),
            nn.ReLU(inplace=True)        
        )

        self.layers = self._make_layers(config)

        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(self.in_planes, num_classes)
        self.in_planes = out_planes


    def _make_layers(self, config):
        layers = []
        print(config)
        for idx,(out_planes, num_blocks, stride, groups, kernel_size, padding) in enumerate(config):
            #print(f"Configanynetx: in_planes={self.in_planes}, out_planes={out_planes}, num_blocks={num_blocks}, stride={stride}, groups={groups}, kernel_size={kernel_size}, padding={padding}")
            #print(f"Initial in_planes={self.in_planes}")
            blocks = nn.Sequential()
            for b in range(num_blocks):
                #print(f"b is: {b}")
                #print(f"  Block: in_planes={self.in_planes}, out_planes={out_planes}, groups={groups}")
                block = RegNetBlock(in_planes=self.in_planes,out_planes=out_planes, stride=stride, groups=groups, kernel_size=kernel_size, padding=padding)
                blocks.add_module(f"block_{idx}_{b}", block)
               # if b == 0:
                self.in_planes =out_planes# Update in_planes for the next layer
            
            #else:self.in_planes = groups
                #print(f"  Updated in_planes={self.in_planes}")

                #stride = 1  # Only the first block may have a stride > 1
            layers.append(blocks)
          #  print(blocks)

        return nn.Sequential(*layers)

    def forward(self, x):
       # print(nn.Sequential)
        x = self.initial(x)
        x = self.layers(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.fc(x)
        return x

In [4]:
class LinearRegressionModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(LinearRegressionModel, self).__init__()
        self.linear = nn.Linear(input_size, output_size)
        
    def forward(self, x):
        return self.linear(x)

In [5]:
def sample_configuration():
    depth_range = [2, 6, 12]
   # width_range = (64, 512)
   # group_width_range = (1, 64)
    width_multiplier_range = [1, 64, 128]  # Changed to a multiplier range
    group_width_choices = np.array([1, 2, 4, 8, 16, 32])
    kernel_size_choices = [3, 5, 7]
    stride_choices = [1, 2,3,4]
    padding_choices = [1, 2,3,4]

    num_stages = 4
    config = []
    #config.append((64,1,1,1,1,1))
    for _ in range(num_stages):
        depth = np.random.choice(depth_range)
        group_width = np.random.choice(group_width_choices)
        width_multiplier = np.random.choice(width_multiplier_range)
        out_planes = group_width * width_multiplier
        kernel_size = np.random.choice(kernel_size_choices)
        stride = np.random.choice(stride_choices)
        padding = np.random.choice(padding_choices)
        # Ensure group width is a divisor of width
       # group_width = width if group_width > width else group_width
        #if out_planes % group_width != 0:
         #   out_planes = (out_planes // group_width) * group_width
        config.append((out_planes,depth,stride, group_width, kernel_size, padding))

    return config

In [6]:
def train_and_evaluate(model):
    # Example dataset and dataloader setup
    transform = transforms.Compose([
        transforms.ToTensor(),
        #transforms.Lambda(lambda x: x.expand(-1, 3, -1, -1)),  # Expand single-channel images to three channels
        transforms.Normalize((0.5,), (0.5,))
    ])

    # Load training and validation datasets
    trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
    valset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

    trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
    valloader = DataLoader(valset, batch_size=64, shuffle=False)

    # Example loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    epochs = 10
    for epoch in range(epochs):
        model.train()
        running_loss = 0
        for images, labels in trainloader:
            optimizer.zero_grad()
            output = model(images)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}, Loss: {running_loss/len(trainloader)}")

    # Evaluate accuracy on the validation dataset
    correct = 0
    total = 0
    model.eval()
    with torch.no_grad():
        for images, labels in valloader:
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print(f'Accuracy on the validation set: {accuracy}%')

    return accuracy

In [7]:
number_of_samples=10
data = []
for _ in range(number_of_samples):
    config = sample_configuration()
    model = AnyNetX(config)
    accuracy = train_and_evaluate(model)  # This function is to be defined based on your dataset and training loop
    data.append((config, accuracy))

Configanynetx initial: out_planes=8, inplane=64
[(8, 12, 3, 8, 5, 2), (256, 2, 1, 2, 3, 3), (256, 2, 2, 2, 3, 1), (512, 6, 2, 8, 3, 2)]
Epoch 1, Loss: 1.8438198982017127
Epoch 2, Loss: 1.6367234592752924
Epoch 3, Loss: 1.5725938144014842
Epoch 4, Loss: 1.5523676764227943
Epoch 5, Loss: 1.5432295442135857
Epoch 6, Loss: 1.4766400476762735
Epoch 7, Loss: 1.505238528953178
Epoch 8, Loss: 1.5060469232388396
Epoch 9, Loss: 1.47072295860441
Epoch 10, Loss: 1.461047604012845
Accuracy on the validation set: 25.35%
Configanynetx initial: out_planes=256, inplane=64
[(256, 12, 4, 2, 3, 3), (512, 6, 1, 4, 7, 2), (4096, 2, 4, 32, 5, 2), (256, 6, 2, 4, 7, 3)]
Epoch 1, Loss: 0.5162335887773714
Epoch 2, Loss: 0.2195756041260162
Epoch 3, Loss: 0.1781825624537398
Epoch 4, Loss: 0.16247200904223424
Epoch 5, Loss: 0.127944096993369
Epoch 6, Loss: 0.10994036703793479
Epoch 7, Loss: 0.09915870188825777
Epoch 8, Loss: 0.07938810202158066
Epoch 9, Loss: 0.07631755222666707
Epoch 10, Loss: 0.07423029947955943


In [8]:
print(data)

[([(8, 12, 3, 8, 5, 2), (256, 2, 1, 2, 3, 3), (256, 2, 2, 2, 3, 1), (512, 6, 2, 8, 3, 2)], 25.35), ([(256, 12, 4, 2, 3, 3), (512, 6, 1, 4, 7, 2), (4096, 2, 4, 32, 5, 2), (256, 6, 2, 4, 7, 3)], 96.42), ([(256, 6, 3, 4, 3, 3), (16, 6, 4, 16, 3, 2), (2048, 2, 1, 16, 3, 1), (64, 12, 1, 1, 3, 4)], 89.26), ([(1024, 6, 3, 16, 5, 1), (1024, 2, 3, 16, 5, 1), (128, 12, 2, 2, 5, 4), (32, 2, 4, 32, 3, 4)], 97.42), ([(128, 12, 4, 2, 5, 1), (256, 6, 2, 4, 7, 3), (4, 6, 3, 4, 5, 4), (4096, 6, 3, 32, 3, 1)], 10.09), ([(4096, 12, 2, 32, 7, 1), (2048, 12, 3, 32, 7, 3), (2048, 2, 3, 32, 7, 1), (2048, 6, 3, 16, 5, 3)], 41.73), ([(4096, 6, 2, 32, 7, 3), (4, 6, 2, 4, 7, 3), (256, 12, 4, 2, 7, 4), (32, 2, 3, 32, 3, 2)], 46.48), ([(2048, 2, 3, 16, 5, 4), (512, 6, 3, 4, 3, 4), (16, 12, 4, 16, 5, 2), (128, 2, 3, 1, 3, 4)], 22.23), ([(4096, 2, 4, 32, 7, 3), (64, 6, 4, 1, 5, 4), (1024, 12, 2, 16, 3, 4), (512, 12, 4, 4, 3, 3)], 95.88), ([(256, 6, 4, 4, 5, 4), (32, 12, 2, 32, 5, 3), (8, 6, 3, 8, 3, 1), (1024, 6, 4,

In [10]:
def config_to_features(config):
    # Flatten the configuration tuples into a single list of features
    features = [feature for stage in config for feature in stage]
    return features

X = torch.tensor([config_to_features(config) for config, _ in data], dtype=torch.float32)
y = torch.tensor([[accuracy] for _, accuracy in data], dtype=torch.float32)  # Ensure y is 2D
print(y)
print(X)

input_size = len(config_to_features(data[0][0]))  # Number of features
print(input_size)
output_size = 1  # Predicting a single value, e.g., accuracy

model = LinearRegressionModel(input_size, output_size)
criterion = nn.MSELoss()  # Mean Squared Error Loss
optimizer = optim.Adam(model.parameters(), lr=0.001, weight_decay =1e-5)  # ADAM

tensor([[25.3500],
        [96.4200],
        [89.2600],
        [97.4200],
        [10.0900],
        [41.7300],
        [46.4800],
        [22.2300],
        [95.8800],
        [26.0500]])
tensor([[8.0000e+00, 1.2000e+01, 3.0000e+00, 8.0000e+00, 5.0000e+00, 2.0000e+00,
         2.5600e+02, 2.0000e+00, 1.0000e+00, 2.0000e+00, 3.0000e+00, 3.0000e+00,
         2.5600e+02, 2.0000e+00, 2.0000e+00, 2.0000e+00, 3.0000e+00, 1.0000e+00,
         5.1200e+02, 6.0000e+00, 2.0000e+00, 8.0000e+00, 3.0000e+00, 2.0000e+00],
        [2.5600e+02, 1.2000e+01, 4.0000e+00, 2.0000e+00, 3.0000e+00, 3.0000e+00,
         5.1200e+02, 6.0000e+00, 1.0000e+00, 4.0000e+00, 7.0000e+00, 2.0000e+00,
         4.0960e+03, 2.0000e+00, 4.0000e+00, 3.2000e+01, 5.0000e+00, 2.0000e+00,
         2.5600e+02, 6.0000e+00, 2.0000e+00, 4.0000e+00, 7.0000e+00, 3.0000e+00],
        [2.5600e+02, 6.0000e+00, 3.0000e+00, 4.0000e+00, 3.0000e+00, 3.0000e+00,
         1.6000e+01, 6.0000e+00, 4.0000e+00, 1.6000e+01, 3.0000e+00, 2.0000e+0

In [20]:
def lqm(model, X, y):
    epochs = 30000  # Number of epochs to train for
    for epoch in range(epochs):
        model.train()
        optimizer.zero_grad()
        # Forward pass
        outputs = model(X)
        loss = criterion(outputs, y)
        loss = (loss ** 2).mean()

        
        # Backward and optimize
        loss.backward()
        optimizer.step()
        
        if (epoch+1) % 10 == 0:  # Print loss every 100 epochs
            print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

In [21]:
predicted_accuracies = [outputs.detach().numpy() for config, _ in data]
print(predicted_accuracies)
print(y)
plt.figure(figsize=(10, 6))
plt.scatter(y, predicted_accuracies, alpha=0.5)
plt.title('Actual vs. Predicted Accuracies')
plt.xlabel('Actual Accuracy')
plt.ylabel('Predicted Accuracy')
plt.grid(True)

# Optional: plot a line representing perfect predictions for reference
#plt.plot([min(actual_accuracies), max(actual_accuracies)], [min(actual_accuracies), max(actual_accuracies)], 'r--')

plt.show()

NameError: name 'outputs' is not defined

In [22]:
def predict_performance(model, config):
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():
        features = torch.tensor([config_to_features(config)], dtype=torch.float32)
        prediction = model(features)
    return prediction.item()  # Return the predicted value

In [23]:
lqm(model, X, y)

Epoch [10/30000], Loss: 1936649.0000
Epoch [20/30000], Loss: 1927850.8750
Epoch [30/30000], Loss: 1919132.3750
Epoch [40/30000], Loss: 1910490.8750
Epoch [50/30000], Loss: 1901927.1250
Epoch [60/30000], Loss: 1893438.1250
Epoch [70/30000], Loss: 1885024.3750
Epoch [80/30000], Loss: 1876682.6250
Epoch [90/30000], Loss: 1868413.3750
Epoch [100/30000], Loss: 1860215.6250
Epoch [110/30000], Loss: 1852088.3750
Epoch [120/30000], Loss: 1844029.7500
Epoch [130/30000], Loss: 1836039.2500
Epoch [140/30000], Loss: 1828115.8750
Epoch [150/30000], Loss: 1820257.5000
Epoch [160/30000], Loss: 1812466.0000
Epoch [170/30000], Loss: 1804738.3750
Epoch [180/30000], Loss: 1797073.5000
Epoch [190/30000], Loss: 1789471.5000
Epoch [200/30000], Loss: 1781931.0000
Epoch [210/30000], Loss: 1774451.3750
Epoch [220/30000], Loss: 1767031.5000
Epoch [230/30000], Loss: 1759670.6250
Epoch [240/30000], Loss: 1752368.0000
Epoch [250/30000], Loss: 1745123.2500
Epoch [260/30000], Loss: 1737935.6250
Epoch [270/30000], Lo

In [None]:
#num_cycles = 10
target_accuracy = 0
data= [] 
#for _ in range(num_cycles):
while target_accuracy < 99.99:
    top_k = 3# Choose the top k configurations to train and evaluate
    bottom_k= 1
    new_configs = [sample_configuration() for _ in range(top_k + bottom_k)] # Sample new configurations, top 3 and 1 bottom to vary
    # Evaluate the predicted performance for each new configuration
    performances = [(config, predict_performance(model, config)) for config in new_configs]
    # Select the top performing configurations based on predicted performance
    top_performing_configs = sorted(performances, key=lambda x: x[1], reverse=True)[:top_k]
    print(top_performing_configs)
    lower_ranking_configs = sorted(performances, key=lambda x: x[1], reverse=False)[:bottom_k]
    configs_to_evaluate = top_performing_configs + lower_ranking_configs
    
    # Train and evaluate the top performing configurations using your RegNet-based model
    for config, _ in configs_to_evaluate:
        model = AnyNetX(config)  # Initialize the RegNet model with the configuration
        accuracy = train_and_evaluate(model)  # Train and evaluate the RegNet mode
        data.append((config, accuracy))
        target_accuracy = accuracy
    
    X = torch.tensor([config_to_features(config) for config, _ in data], dtype=torch.float32)
    y = torch.tensor([[performance] for _, performance in data], dtype=torch.float32)
    lqm(model, X, y)  # This function encapsulates retraining logic

[([(2048, 2, 1, 32, 5, 1), (4, 2, 1, 4, 5, 2), (16, 6, 1, 16, 7, 2), (2, 12, 2, 2, 7, 3)], 61.41572189331055), ([(8, 2, 1, 8, 5, 2), (128, 6, 3, 2, 7, 4), (2, 12, 3, 2, 5, 3), (4, 2, 1, 4, 3, 3)], 47.95137023925781), ([(128, 2, 4, 1, 7, 2), (1, 12, 3, 1, 7, 1), (8, 6, 3, 8, 5, 2), (64, 12, 4, 1, 3, 4)], 37.088775634765625)]
Configanynetx initial: out_planes=2048, inplane=64
[(2048, 2, 1, 32, 5, 1), (4, 2, 1, 4, 5, 2), (16, 6, 1, 16, 7, 2), (2, 12, 2, 2, 7, 3)]
Epoch 1, Loss: 2.26449989637078
Epoch 2, Loss: 2.050007141602319
