Sure! I can help you with that. Let's start by breaking down the implementation step by step.

Step 1: Import the required libraries

In [None]:
import random
import torch
import torch.nn as nn
import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets


Step 2: Define the MLP model class

In [None]:
class MLP_1hidden(nn.Module):
    def __init__(self, input_size, hidden_layer, activations):
        hidden_size = hidden_layer[0]
        output_size = 10
        super(MLP_1hidden, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, output_size)
        
        if activations[0] == 1:
            self.activation_input = nn.ReLU()
        elif activations[0] == 2:
            self.activation_input = nn.Sigmoid()
        
        if activations[1] == 1:
            self.activation_hidden1 = nn.ReLU()
        elif activations[1] == 2:
            self.activation_hidden1 = nn.Sigmoid()
        
        self.activation_output  = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.activation_input(x)
        x = self.fc1(x)
        x = self.activation_hidden1(x)
        x = self.fc2(x)
        x = self.activation_output(x)
        return x

In [None]:

class MLP_2hidden(nn.Module):
    def __init__(self, input_size, hidden_layer, activations):
        super(MLP_2hidden, self).__init__()
        hidden1_size = hidden_layer[0]
        hidden2_size = hidden_layer[1]
        output_size = 10
        self.fc1 = nn.Linear(input_size, hidden1_size)
        self.fc2 = nn.Linear(hidden1_size, hidden2_size)
        self.fc3 = nn.Linear(hidden2_size, output_size)
        
        if activations[0] == 1:
            self.activation_input = nn.ReLU()
        elif activations[0] == 2:
            self.activation_input = nn.Sigmoid()
            
        if activations[1] == 1 :
            self.activation_hidden1 = nn.ReLU()
        elif activations[1] == 2:
            self.activation_hidden1 = nn.Sigmoid()
        
        if activations[2] == 1:
            self.activation_hidden2 = nn.ReLU()
        elif activations[2] == 2:
            self.activation_hidden2 = nn.Sigmoid()
        
        
        
        self.activation_output = nn.Softmax(dim=1)        

    def forward(self, x):
        x = self.activation_input(x)
        x = self.fc1(x)
        x = self.activation_hidden1(x)
        x = self.fc2(x)
        x = self.activation_hidden2(x)
        x = self.fc3(x)
        x = self.activation_output(x)
        return x

In [None]:
class MLP_0hidden(nn.Module):
    def __init__(self, input_size, output_size, activation):
        super(MLP_0hidden, self).__init__()
        self.fc1 = nn.Linear(input_size, output_size)
        # self.activation_output = nn.ReLU() if torch.rand(1) > 0.5 else nn.Sigmoid()
        if activation[0] == 1:
            self.activation_input = nn.ReLU()
        elif activation[0] == 2:
            self.activation_input = nn.Sigmoid()
        self.activation_output = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.activation_input(x)
        x = self.fc1(x)
        x = self.activation_output(x)
        return x

Step 3: Set random seed for reproducibility (optional)

In [None]:
torch.manual_seed(42)

Step 4: Load the CIFAR-10 dataset and apply transformations

In [None]:
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=128, shuffle=False)


Step 5: Create an instance of the MLP model with random configurations

In [None]:
input_size = 512  # Size of the ResNet output feature vector
# hidden_size = torch.randint(10, 21, size=(1,)).item()  # Randomly select hidden layer size between 10 and 20

hidden_size = random.choice([0, 10, 20]) # Randomly select hidden layer size choice 0 or 10 or 20

output_size = 10  # Number of classes in CIFAR-10

#chromosome
temp = [{'feature extraction': 1, 'hidden layer': [10], 'activation function': [1, 2]}, 
        {'feature extraction': 2, 'hidden layer': [30, 20], 'activation function': [1, 2, 1]}, 
        {'feature extraction': 3, 'hidden layer': [], 'activation function': [2]}, 
        {'feature extraction': 1, 'hidden layer': [10, 30], 'activation function': [2, 1, 2]}, 
        {'feature extraction': 3, 'hidden layer': [10], 'activation function': [1, 2]}, 
        {'feature extraction': 2, 'hidden layer': [], 'activation function': [1]}]


# model = MLP(input_size, hidden_size, output_size, activation_hidden1, activation_hidden2, activation_output )

#model = MLP_0hidden(input_size, output_size, temp[2]['activation function'])
model = MLP_2hidden(input_size, temp[1]['hidden layer'], temp[1]['activation function'])
#model = MLP_1hidden(input_size, temp[0]['hidden layer'], temp[0]['activation function'])


Step 6: Load a pre-trained feacure extraction model (either ResNet18 or ResNet34 or vgg11)

Step 7: Define the training loop

In [None]:
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
if torch.cuda.is_available():
    model = model.cuda() 

feature_extraction = models.resnet18(pretrained=True) if torch.rand(1) > 0.5 else models.resnet34(pretrained=True)
if temp[1]['feature extraction'] == 1:
    feature_extraction = models.resnet18(pretrained=True)
elif temp[1]['feature extraction'] == 2:
    feature_extraction = models.resnet34(pretrained=True)
elif temp[1]['feature extraction'] == 3:
    # use Vgg11 model
    feature_extraction = models.vgg11(pretrained=True)

feature_extraction = nn.Sequential(*list(feature_extraction.children())[:-1])  # Remove the last fully connected layer

# Move feature extraction model to the same device as the model
feature_extraction = feature_extraction.to(device)

num_epochs = 5

for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    correct = 0
    total = 0

    for images, labels in train_loader:
        img = images.to(device)  # Move input tensor to the same device as the model
        lbl = labels.to(device)  # Move label tensor to the same device as the model

        # Feature extraction using the pre-trained model
        with torch.no_grad():
            features = feature_extraction(img)
            features = features.view(features.size(0), -1)

        # Forward pass
        outputs = model(features)
        loss = criterion(outputs, lbl)

        # Backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        _, predicted = outputs.max(1)
        total += lbl.size(0)
        correct += predicted.eq(lbl).sum().item()

    train_loss /= len(train_loader)
    train_accuracy = correct / total

    print(f"Epoch [{epoch + 1}/{num_epochs}], "
          f"Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.4f}")


Certainly! Let's go through each step in more detail:

Step 1: Import the required libraries
In this step, we import the necessary libraries for our implementation. We import torch for PyTorch functionalities, torch.nn for defining neural network modules, and torchvision for accessing pre-trained models and datasets.

Step 2: Define the MLP model class
Here, we define the MLP class which inherits from nn.Module. This class represents our MLP model. It has three fully connected layers (fc1, fc2, fc3) with sizes input_size, hidden_size, and output_size, respectively. The activation function (activation) is randomly chosen to be either nn.ReLU() or nn.Sigmoid() based on a random number generated using torch.rand(1). The forward method defines the forward pass of the model.

Step 3: Set random seed for reproducibility (optional)
Setting a random seed ensures that the random processes in your code produce the same results every time you run it. It's optional, but it can be useful for reproducibility.

Step 4: Load the CIFAR-10 dataset and apply transformations
In this step, we define the transformations to be applied to the CIFAR-10 dataset. We use transforms.ToTensor() to convert the images into tensors, and transforms.Normalize() to normalize the image tensors. Then, we create train_dataset and test_dataset by loading the CIFAR-10 dataset from the disk and applying the defined transformations. Finally, we create data loaders (train_loader and test_loader) to load the data in batches for training and testing.

Step 5: Create an instance of the MLP model with random configurations
Here, we define the input size to be 512, which corresponds to the size of the output feature vector from the ResNet model. We randomly select the hidden layer size between 10 and 20 using torch.randint(). The output size is set to 10, which is the number of classes in CIFAR-10. Finally, we create an instance of the MLP model using the randomly selected configurations.

Step 6: Load a pre-trained ResNet model (either ResNet18 or ResNet34) for feature extraction
In this step, we randomly select either ResNet18 or ResNet34 using torch.rand(1). Then, we load the chosen pre-trained ResNet model using models.resnet18(pretrained=True) or models.resnet34(pretrained=True). We remove the last fully connected layer from the model using nn.Sequential() and list(resnet_model.children())[:-1]. This modified ResNet model will be used for feature extraction.

Step 7: Define the training loop
Here, we define the training loop for our model. We set the loss criterion as nn.CrossEntropyLoss() and the optimizer as torch.optim.Adam(). We check if CUDA is available and move the model to the corresponding device (either GPU or CPU). Then, we iterate over the specified number of epochs.

Within each epoch, we set the model to training mode using model.train(). We initialize variables to keep track of the loss, correct predictions, and total examples. For each batch in the training data, we move the images and labels to the device, perform feature extraction using the pre-trained ResNet model (with gradients turned off using torch.no_grad()), and pass the extracted features through the MLP model. We calculate the loss, perform backpropagation, and update the model's parameters. Additionally, we calculate the training accuracy by comparing the predicted labels with the true labels.

After each epoch,

we calculate the average loss and training accuracy. Finally, we print the training loss and accuracy for that epoch.

That's a detailed explanation of each step in the implementation. Let me know if you have any further questions!