In [None]:
#This is where the code can be downloaded from: https://github.com/fastai/imagenette

In [None]:
pip install datasets torchsummary

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

!wget -P /content/drive/MyDrive/ https://s3.amazonaws.com/fast-ai-imageclas/imagenette2-160.tgz
!tar -xzf /content/drive/MyDrive/imagenette2-160.tgz

In [None]:
pip install torchinfo

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models, transforms
#from datasets import load_dataset
from tqdm import tqdm
import torchvision.models as models
# from torchvision.datasets import ImageNet
from torchvision.datasets import ImageFolder
# from fastai.vision.all import *
import matplotlib.pyplot as plt
#from fastai.vision.augment import Resize
from torch.utils.data import DataLoader
import numpy as np


In [None]:
path = 'imagenette2-160'

In [None]:
# Define ImageNet normalization statistics
# imagenet_stats = ([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])

# Create common transformations for both training and validation
common_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    # transforms.Normalize(*imagenet_stats)
])

# Apply transformations directly to datasets
train_dataset = ImageFolder(root=path + '/train', transform=common_transforms)
val_dataset = ImageFolder(root=path + '/val', transform=common_transforms)

# Create DataLoaders for training and validation
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

In [None]:
for images, labels in train_loader:
    shape  = images.size()
    print(images.size())  # This will give you the size of the images batch
    break  # Exit after the first batch

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load a pretrained EfficientNet model
model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT)
model.to(device)

# Summarize the model using torchinfo
from torchinfo import summary
summary(model, input_size=(1, 3, 224, 224))
print(model)

In [None]:
num_classes = len(train_dataset.classes)
print(num_classes)

In [None]:
# Training and evaluation functions
def train(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0

    for inputs, labels in tqdm(dataloader):
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    return running_loss / len(dataloader)

def evaluate(model, dataloader, device):
    model.eval()  # Set model to evaluation mode
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():  # Disable gradients for faster inference
        for inputs, labels in tqdm(dataloader):
            inputs, labels = inputs.to(device), labels.to(device)  # Move to device

            outputs = model(inputs)  # Forward pass
            _, predicted = torch.max(outputs, 1)  # Get predictions

            correct_predictions += (predicted == labels).sum().item()
            total_samples += labels.size(0)

    accuracy = correct_predictions / total_samples
    return accuracy

In [None]:
### ORIGINAL - NO CHANGES BESIDES LAST LAYER
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load a pretrained EfficientNet model
model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT)
model.to(device)
# Freeze all parameters
for param in model.parameters():
    param.requires_grad = False

# Replace the classifier (final layer)
num_classes = 10  # Change this to the number of classes in your dataset
model.classifier[1] = nn.Linear(model.classifier[1].in_features, num_classes)
model.classifier[1].to(device)

# Unfreeze the new classifier layer
for param in model.classifier.parameters():
    param.requires_grad = True


optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)
criterion = nn.CrossEntropyLoss()

num_epochs = 10
train_loss = []
val_accuracy = []
total_time = []
for epoch in range(num_epochs):
    start_time = time.time()
    train_loss_epoch = train(model, train_loader, criterion, optimizer, device)
    train_time = time.time() - start_time
    total_time.append(train_time)
    val_accuracy_epoch = evaluate(model, val_loader, device)
    train_loss.append(train_loss_epoch)
    val_accuracy.append(val_accuracy_epoch)
    print(f"Epoch {epoch+1}, Train Loss: {train_loss_epoch:.4f}, Val Accuracy: {val_accuracy_epoch:.4f}")

print("val_accuracy_last_replaced=", val_accuracy)
print("train_loss_last_replaced=", train_loss)
print("time_last_replaced=", total_time)

In [None]:
#First layer replaced with original conv head
import torch
import torch.nn as nn
from torchvision import models
import torch.optim as optim
import time

class CustomEfficientNet(nn.Module):
    def __init__(self, base_model, num_classes):
        super(CustomEfficientNet, self).__init__()

        # Replace the first convolutional layer with a linear layer
        original_conv = base_model.features[0][0]  # Original first convolutional layer
        self.input_flatten_size = 3 * 224 * 224  # Calculate size of flattened input (for 3x224x224 input)

        self.first_layer = nn.Sequential(
            nn.Flatten(),  # Flatten the input to (batch_size, 3 * 224 * 224)
            nn.Linear(self.input_flatten_size, original_conv.out_channels * 7 * 7),  # Map to a smaller feature map
            nn.ReLU(),
            nn.Unflatten(1, (original_conv.out_channels, 7, 7)),  # Reshape to (batch_size, channels, height, width)
            nn.Upsample(size=(112,112), mode="bilinear", align_corners=False)  # Upsample to (224, 224)
        )

        # Adjust the output of the linear layer to match the input for the remaining feature block
        # self.reshape_channels = original_conv.out_channels  # Number of channels expected by the next layer
        self.ensure_32_channels = nn.Conv2d(original_conv.out_channels, 32, kernel_size=1, stride=1, padding=0)
        self.features = nn.Sequential(*base_model.features[1:])


        # Replace the classification layer (classifier head)
        # EfficientNet-B0 has a head composed of an AdaptiveAvgPool2d layer and a final linear classifier
        self.conv_head = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),  # Global Average Pooling
            nn.Flatten(),            # Flatten the feature map
            nn.Linear(1280, num_classes)  # Final fully connected layer
        )

    def forward(self, x):
        print("Input Shape:", x.shape)  # Debugging input shape

        # Pass through the first custom layer
        x = self.first_layer(x)
        print("After First Layer Shape:", x.shape)

        # Ensure 32 channels before passing to EfficientNet
        x = self.ensure_32_channels(x)
        print("After Ensure 32 Channels Shape:", x.shape)

        # Pass through EfficientNet feature extractor
        x = self.features(x)
        print("Features Output Shape:", x.shape)  # Debugging shape after feature extractor

        # Pass through the classification head
        x = self.conv_head(x)
        print("Classifier Output Shape:", x.shape)  # Debugging shape after classification
        return x

# Load pretrained EfficientNet
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
base_model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT).to(device)
num_classes = 10  # Example number of classes

# Freeze EfficientNet features (after the first convolution)
for param in base_model.features[1:].parameters():
    param.requires_grad = False

# Instantiate the custom model
model = CustomEfficientNet(base_model, num_classes).to(device)  # Ensure model is on the correct device

# Check trainable layers
print("Trainable parameters in the model:")
for name, param in model.named_parameters():
    print(f"{name} is trainable: {param.requires_grad}")

# Debugging with dummy input
dummy_input = torch.randn(1, 3, 224, 224).to(device)  # Verify shapes with dummy input
with torch.no_grad():
    output = model(dummy_input)  # Verify shapes step by step

# Define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)

# Example training loop
num_epochs = 10
train_loss = []
val_accuracy = []
total_time = []
for epoch in range(num_epochs):
    start_time = time.time()
    train_loss_epoch = train(model, train_loader, criterion, optimizer, device)
    train_time = time.time() - start_time
    total_time.append(train_time)
    val_accuracy_epoch = evaluate(model, val_loader, device)
    train_loss.append(train_loss_epoch)
    val_accuracy.append(val_accuracy_epoch)
    print(f"Epoch {epoch+1}, Train Loss: {train_loss_epoch:.4f}, Val Accuracy: {val_accuracy_epoch:.4f}")

print("val_accuracy_last_replaced=", val_accuracy)
print("train_loss_last_replaced=", train_loss)
print("time_last_replaced=", total_time)

In [None]:
import time
#Middle layer with original conv head
# Middle replaced
class CustomEfficientNet(nn.Module):
    def __init__(self, base_model, num_classes):
        super(CustomEfficientNet, self).__init__()

        # Extract the EfficientNet feature layers
        self.features = nn.ModuleList(base_model.features)

        # Replace the 6th block (Middle Replacement)
        self.features_before_middle = nn.Sequential(*self.features[:6])  # Layers before the 6th block
        self.features_after_middle = nn.Sequential(*self.features[7:])  # Layers after the 6th block

        # Dynamically calculate input size for middle replacement
        device = next(base_model.parameters()).device  # Ensure same device as base_model
        with torch.no_grad():
            dummy_input = torch.randn(1, 3, 224, 224).to(device)  # Ensure dummy tensor is on the same device
            middle_features = self.features_before_middle(dummy_input)
            self.middle_flatten_size = middle_features.shape[1] * middle_features.shape[2] * middle_features.shape[3]

        self.middle_replacement = nn.Sequential(
            nn.Flatten(),
            nn.Linear(self.middle_flatten_size, 192 * 4 * 4),  # Input dynamically calculated
            nn.ReLU(),
            nn.Unflatten(1, (192, 4, 4)),
        ).to(device)  # Move to correct device

        # Match channels back to 1280 (before the convolutional head)
        self.match_channels = nn.Conv2d(192, 1280, kernel_size=1, stride=1, padding=0).to(device)

        # Replace the classification layer (classifier head)
        # EfficientNet-B0 has a head composed of an AdaptiveAvgPool2d layer and a final linear classifier
        self.conv_head = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),  # Global Average Pooling
            nn.Flatten(),            # Flatten the feature map
            nn.Linear(1280, num_classes)  # Final fully connected layer
        )

    def forward(self, x):
        print("Input Shape:", x.shape)

        # Pass through layers before the middle replacement
        x = self.features_before_middle(x)
        print("After Features Before Middle Replacement Shape:", x.shape)

        # Apply middle block replacement
        x = self.middle_replacement(x)
        print("After Middle Replacement Shape:", x.shape)

        # Adjust the shape of the output to match the next layer (features_after_middle)
        # After middle replacement, the output has shape [batch_size, 192, 4, 4]
        # You need to ensure the shape of this output is correct for passing into features_after_middle
        x = self.match_channels(x)
        print("After Matching Channels Shape:", x.shape)  # Ensure shape matches expected input

        # Process through the convolutional head
        x = self.conv_head(x)
        print("Conv Head Output Shape:", x.shape)

        # # Final classifier
        # x = self.classifier(x)
        # print("Classifier Output Shape:", x.shape)

        return x

# Load pretrained EfficientNet
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
base_model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT).to(device)
num_classes = 10


# Freeze EfficientNet features
for param in base_model.features[1:].parameters():
    param.requires_grad = False

# Instantiate the custom model
model = CustomEfficientNet(base_model, num_classes).to(device)

# Check trainable layers
print("Trainable parameters in the model:")
for name, param in model.named_parameters():
    print(f"{name} is trainable: {param.requires_grad}")

# Debugging with dummy input
dummy_input = torch.randn(1, 3, 224, 224).to(device)  # Verify shapes with dummy input
with torch.no_grad():
    output = model(dummy_input)  # Verify shapes step by step


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)

num_epochs = 10
train_loss = []
val_accuracy = []
total_time =[]
for epoch in range(num_epochs):
    start_time = time.time()
    train_loss_epoch = train(model, train_loader, criterion, optimizer, device)
    train_time = time.time() - start_time
    total_time.append(train_time)
    val_accuracy_epoch = evaluate(model, val_loader, device)
    train_loss.append(train_loss_epoch)
    val_accuracy.append(val_accuracy_epoch)
    print(f"Epoch {epoch+1}, Train Loss: {train_loss_epoch:.4f}, Val Accuracy: {val_accuracy_epoch:.4f}")

print("val_accuray_last_replaced=", val_accuracy)
print("train_loss_last_replaced=", train_loss)
print("time_last_replaced=", total_time)



In [None]:
#Last layer before conv_head with original conv head
class CustomEfficientNet(nn.Module):
    def __init__(self, base_model, num_classes):
        super(CustomEfficientNet, self).__init__()

        # Extract the EfficientNet feature layers up to the last convolutional block
        self.features = nn.ModuleList(base_model.features)

        # Dynamically determine the input size for the replacement_layer
        device = next(self.features[0].parameters()).device
        with torch.no_grad():
            dummy_input = torch.randn(1, 3, 224, 224).to(device)
            dummy_features = nn.Sequential(*self.features[:-1])(dummy_input)
            self.input_flatten_size = dummy_features.shape[1] * dummy_features.shape[2] * dummy_features.shape[3]

        # Replace the last convolutional layer (before conv_head)
        self.replacement_layer = nn.Sequential(
            nn.Flatten(),
            nn.Linear(self.input_flatten_size, 192 * 4 * 4),
            nn.ReLU(),
            nn.Unflatten(1, (192, 4, 4))
        )

        # Match channels from 192 to 320 instead of 1280
        self.match_channels = nn.Conv2d(192, 320, kernel_size=1, stride=1, padding=0)

        # Remaining layers after the last convolutional layer
        self.features_after_replacement = nn.Sequential(*self.features[-1:])  # The last feature block

        # Replace the classification layer (classifier head)
        # EfficientNet-B0 has a head composed of an AdaptiveAvgPool2d layer and a final linear classifier
        self.conv_head = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),  # Global Average Pooling
            nn.Flatten(),            # Flatten the feature map
            nn.Linear(1280, num_classes)  # Final fully connected layer
        )


    def forward(self, x):
        print("Input Shape:", x.shape)  # Debugging input shape

        # Pass through feature layers before the last convolutional block
        x = nn.Sequential(*self.features[:-1])(x)  # All layers except the last block
        print("Features Shape Before Replacement:", x.shape)

        # Replace the last convolutional layer
        x = self.replacement_layer(x)
        print("After Replacement Layer Shape:", x.shape)

        # Match channels from 192 to 1280
        x = self.match_channels(x)
        print("After Matching Channels Shape:", x.shape)

        # Process through the remaining feature block
        x = self.features_after_replacement(x)
        print("Features Shape After Replacement:", x.shape)  # Should be (batch_size, 1280, H, W)

        # Feed into the convolutional head
        x = self.conv_head(x)
        print("Conv Head Output Shape:", x.shape)  # Should be (batch_size, 1024)

        # # Pass through the classifier
        # x = self.classifier(x)
        # print("Classifier Output Shape:", x.shape)  # Should be (batch_size, num_classes)

        return x

# Load pretrained EfficientNet
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
base_model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT).to(device)
num_classes = 10

# Freeze EfficientNet features
for param in base_model.features[1:].parameters():
    param.requires_grad = False

# Instantiate the custom model
model = CustomEfficientNet(base_model, num_classes).to(device)

# Check trainable layers
print("Trainable parameters in the model:")
for name, param in model.named_parameters():
    print(f"{name} is trainable: {param.requires_grad}")

# Debugging with dummy input
dummy_input = torch.randn(1, 3, 224, 224).to(device)  # Verify shapes with dummy input
with torch.no_grad():
    output = model(dummy_input)  # Verify shapes step by step


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)

num_epochs = 10
train_loss = []
val_accuracy = []
total_time =[]
for epoch in range(num_epochs):
    start_time = time.time()
    train_loss_epoch = train(model, train_loader, criterion, optimizer, device)
    train_time = time.time() - start_time
    total_time.append(train_time)
    val_accuracy_epoch = evaluate(model, val_loader, device)
    train_loss.append(train_loss_epoch)
    val_accuracy.append(val_accuracy_epoch)
    print(f"Epoch {epoch+1}, Train Loss: {train_loss_epoch:.4f}, Val Accuracy: {val_accuracy_epoch:.4f}")

print("val_accuray_last_replaced=", val_accuracy)
print("train_loss_last_replaced=", train_loss)
print("time_last_replaced=", total_time)


In [None]:
#Last layer before conv_head
class CustomEfficientNet(nn.Module):
    def __init__(self, base_model, num_classes):
        super(CustomEfficientNet, self).__init__()

        # Extract the EfficientNet feature layers up to the last convolutional block
        self.features = nn.ModuleList(base_model.features)

        # Dynamically determine the input size for the replacement_layer
        device = next(self.features[0].parameters()).device
        with torch.no_grad():
            dummy_input = torch.randn(1, 3, 224, 224).to(device)
            dummy_features = nn.Sequential(*self.features[:-1])(dummy_input)
            self.input_flatten_size = dummy_features.shape[1] * dummy_features.shape[2] * dummy_features.shape[3]

        # Replace the last convolutional layer (before conv_head)
        self.replacement_layer = nn.Sequential(
            nn.Flatten(),
            nn.Linear(self.input_flatten_size, 192 * 4 * 4),
            nn.ReLU(),
            nn.Unflatten(1, (192, 4, 4))
        )

        # Match channels from 192 to 320 instead of 1280
        self.match_channels = nn.Conv2d(192, 320, kernel_size=1, stride=1, padding=0)

        # Remaining layers after the last convolutional layer
        self.features_after_replacement = nn.Sequential(*self.features[-1:])  # The last feature block

        # Define the convolutional head as in EfficientNet
        self.conv_head = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
            nn.Linear(1280, 1024),
            nn.ReLU()
        )

        # Final classifier
        self.classifier = nn.Linear(1024, num_classes)


    def forward(self, x):
        print("Input Shape:", x.shape)  # Debugging input shape

        # Pass through feature layers before the last convolutional block
        x = nn.Sequential(*self.features[:-1])(x)  # All layers except the last block
        print("Features Shape Before Replacement:", x.shape)

        # Replace the last convolutional layer
        x = self.replacement_layer(x)
        print("After Replacement Layer Shape:", x.shape)

        # Match channels from 192 to 1280
        x = self.match_channels(x)
        print("After Matching Channels Shape:", x.shape)

        # Process through the remaining feature block
        x = self.features_after_replacement(x)
        print("Features Shape After Replacement:", x.shape)  # Should be (batch_size, 1280, H, W)

        # Feed into the convolutional head
        x = self.conv_head(x)
        print("Conv Head Output Shape:", x.shape)  # Should be (batch_size, 1024)

        # Pass through the classifier
        x = self.classifier(x)
        print("Classifier Output Shape:", x.shape)  # Should be (batch_size, num_classes)

        return x

# Load pretrained EfficientNet
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
base_model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT).to(device)
num_classes = 10

# Freeze all parameters in the model (this includes everything initially)
for param in model.parameters():
    param.requires_grad = False

# Unfreeze the edited layers explicitly
for param in model.replacement_layer.parameters():
    param.requires_grad = True
for param in model.match_channels.parameters():
    param.requires_grad = True
for param in model.features_after_replacement.parameters():
    param.requires_grad = True

# Also, ensure the `base_model.features` are explicitly frozen
for param in base_model.features.parameters():
    param.requires_grad = False  # Double-checking this

# Instantiate the custom model
model = CustomEfficientNet(base_model, num_classes).to(device)

# Check trainable layers
print("Trainable parameters in the model:")
for name, param in model.named_parameters():
    print(f"{name} is trainable: {param.requires_grad}")

# Debugging with dummy input
dummy_input = torch.randn(1, 3, 224, 224).to(device)  # Verify shapes with dummy input
with torch.no_grad():
    output = model(dummy_input)  # Verify shapes step by step


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)

num_epochs = 10
train_loss = []
val_accuracy = []
total_time =[]
for epoch in range(num_epochs):
    start_time = time.time()
    train_loss_epoch = train(model, train_loader, criterion, optimizer, device)
    train_time = time.time() - start_time
    total_time.append(train_time)
    val_accuracy_epoch = evaluate(model, val_loader, device)
    train_loss.append(train_loss_epoch)
    val_accuracy.append(val_accuracy_epoch)
    print(f"Epoch {epoch+1}, Train Loss: {train_loss_epoch:.4f}, Val Accuracy: {val_accuracy_epoch:.4f}")

print("val_accuray_middle_2ndlast_replaced=", val_accuracy)
print("train_loss_middle_2ndlast_replaced=", train_loss)
print("time_middle_2ndlast_replaced=", total_time)



In [None]:
# Middle & 2nd to last replaced with original conv head
# import torch
import torch.nn as nn
from torchvision import models
import torch.optim as optim


class CustomEfficientNet(nn.Module):
    def __init__(self, base_model, num_classes):
        super(CustomEfficientNet, self).__init__()

        # Extract the EfficientNet feature layers
        self.features = nn.ModuleList(base_model.features)

        # Replace the 6th block (Middle Replacement)
        self.features_before_middle = nn.Sequential(*self.features[:6])  # Layers before the 6th block
        self.features_after_middle = nn.Sequential(*self.features[7:])  # Layers after the 6th block

        # Dynamically calculate input size for middle replacement
        device = next(base_model.parameters()).device  # Ensure same device as base_model
        with torch.no_grad():
            dummy_input = torch.randn(1, 3, 224, 224).to(device)  # Ensure dummy tensor is on the same device
            middle_features = self.features_before_middle(dummy_input)
            self.middle_flatten_size = middle_features.shape[1] * middle_features.shape[2] * middle_features.shape[3]

        self.middle_replacement = nn.Sequential(
            nn.Flatten(),
            nn.Linear(self.middle_flatten_size, 192 * 4 * 4),  # Input dynamically calculated
            nn.ReLU(),
            nn.Unflatten(1, (192, 4, 4)),
        ).to(device)  # Move to correct device

        # Dynamically calculate input size for second-to-last replacement
        with torch.no_grad():
            dummy_middle_output = self.middle_replacement(middle_features)
            second_last_features = self.features_after_middle[:-1](dummy_middle_output)
            self.second_last_flatten_size = (
                second_last_features.shape[1] * second_last_features.shape[2] * second_last_features.shape[3]
            )

        self.second_last_replacement = nn.Sequential(
            nn.Flatten(),
            nn.Linear(self.second_last_flatten_size, 192 * 4 * 4),  # Input dynamically calculated
            nn.ReLU(),
            nn.Unflatten(1, (192, 4, 4)),
        ).to(device)  # Move to correct device

        # Match channels back to 1280 (before the convolutional head)
        self.match_channels = nn.Conv2d(192, 1280, kernel_size=1, stride=1, padding=0).to(device)

         # Replace the classification layer (classifier head)
        # EfficientNet-B0 has a head composed of an AdaptiveAvgPool2d layer and a final linear classifier
        self.conv_head = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),  # Global Average Pooling
            nn.Flatten(),            # Flatten the feature map
            nn.Linear(1280, num_classes)  # Final fully connected layer
        )


    def forward(self, x):
        print("Input Shape:", x.shape)

        # Pass through layers before the middle replacement
        x = self.features_before_middle(x)
        print("After Features Before Middle Replacement Shape:", x.shape)

        # Apply middle block replacement
        x = self.middle_replacement(x)
        print("After Middle Replacement Shape:", x.shape)

        # Pass through layers after the middle block, but before the second-to-last replacement
        x = self.features_after_middle[:-1](x)
        print("After Features After Middle Shape:", x.shape)

        # Apply second-to-last block replacement
        x = self.second_last_replacement(x)
        print("After Second-to-Last Replacement Shape:", x.shape)

        # Match channels to 1280 for the convolutional head
        x = self.match_channels(x)
        print("After Matching Channels Shape:", x.shape)

        # Process through the convolutional head
        x = self.conv_head(x)
        print("Conv Head Output Shape:", x.shape)

        # # Final classifier
        # x = self.classifier(x)
        # print("Classifier Output Shape:", x.shape)

        return x



# Load pretrained EfficientNet
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
base_model = models.efficientnet_b0(weights=models.EfficientNet_B0_Weights.DEFAULT).to(device)
num_classes = 10

# Freeze all parameters in the model (this includes everything initially)
for param in model.parameters():
    param.requires_grad = False

# Unfreeze the edited layers explicitly
for param in model.second_last_replacement.parameters():
    param.requires_grad = True
for param in model.match_channels.parameters():
    param.requires_grad = True
# for param in model.features_after_replacement.parameters():
#     param.requires_grad = True

# Also, ensure the `base_model.features` are explicitly frozen
for param in base_model.features.parameters():
    param.requires_grad = False  # Double-checking this

# Instantiate the custom model
model = CustomEfficientNet(base_model, num_classes).to(device)

# Check trainable layers
print("Trainable parameters in the model:")
for name, param in model.named_parameters():
    print(f"{name} is trainable: {param.requires_grad}")
# Debugging with dummy input
dummy_input = torch.randn(1, 3, 224, 224).to(device)  # Verify shapes with dummy input
with torch.no_grad():
    output = model(dummy_input)  # Verify shapes step by step


criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), lr=0.001)

num_epochs = 10
train_loss = []
val_accuracy = []
total_time =[]
for epoch in range(num_epochs):
    start_time = time.time()
    train_loss_epoch = train(model, train_loader, criterion, optimizer, device)
    train_time = time.time() - start_time
    total_time.append(train_time)
    val_accuracy_epoch = evaluate(model, val_loader, device)
    train_loss.append(train_loss_epoch)
    val_accuracy.append(val_accuracy_epoch)
    print(f"Epoch {epoch+1}, Train Loss: {train_loss_epoch:.4f}, Val Accuracy: {val_accuracy_epoch:.4f}")

print("val_accuray_middle_2ndlast_replaced=", val_accuracy)
print("train_loss_middle_2ndlast_replaced=", train_loss)
print("time_middle_2ndlast_replaced=", total_time)





In [None]:
#code for plotting the graphs below - just copy-pasted the val and training values into lists
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 6))
num_epochs=10
epoch = [i for i in range(1,num_epochs+1)]
val_accuray_baseline=[0.9671, 0.9717, 0.9732, 0.9738, 0.9750, 0.9758, 0.9755, 0.9750, 0.9761, 0.9753]
val_accuray_first_replaced=[0.9477707006369427, 0.9431847133757961, 0.9500636942675159, 0.9513375796178344, 0.9419108280254777, 0.9561783439490446, 0.9561783439490446, 0.9503184713375796, 0.9398726114649681, 0.9587261146496815]
val_accuray_middle_replaced= [0.8822929936305732, 0.8603821656050955, 0.8817834394904459, 0.8657324840764331, 0.8736305732484076, 0.8736305732484076, 0.8642038216560509, 0.8631847133757962, 0.8621656050955414, 0.8578343949044586]
val_accuray_2ndlast_replaced= [0.9462420382165605, 0.9447133757961783, 0.94828025477707, 0.9429299363057325, 0.9406369426751592, 0.9414012738853503, 0.9243312101910828, 0.9492993630573249, 0.9421656050955414, 0.9470063694267515]
val_accuray_last_replaced= [0.944968152866242, 0.9515923566878981, 0.9470063694267515, 0.9515923566878981, 0.9431847133757961, 0.9592356687898089, 0.9485350318471337, 0.9462420382165605, 0.935796178343949, 0.9579617834394905]
val_accuray_middle_2ndlast_replaced= [0.8838216560509554, 0.8963057324840764, 0.9006369426751593, 0.9062420382165605, 0.89171974522293, 0.7370700636942675, 0.4178343949044586, 0.8387261146496815, 0.8514649681528662, 0.8611464968152867]
plt.plot(epoch, val_accuray_baseline, label="Baseline",color='black',linestyle=':')
plt.plot(epoch, val_accuray_first_replaced, label="First Layer Replaced",color='green')
plt.plot(epoch, val_accuray_middle_replaced, label="Middle Layer Replaced",color='purple')
# plt.plot(epoch, val_accuray_last_replaced, label="Last Layer Replaced",color='orange')
plt.plot(epoch, val_accuray_2ndlast_replaced, label="2nd to Last Layer Replaced",color='red')
plt.plot(epoch, val_accuray_middle_2ndlast_replaced, label="Middle and 2nd to Last Layer Replaced",color='blue')
plt.title("Validation Accuracy of EfficientNet on Imagenette")
plt.legend()
plt.ylabel("Accuracy")
plt.xlabel("Epoch")
plt.savefig('EfficientNet_Accuracy_compare.png', dpi=300)
plt.show()

In [None]:

plt.figure(figsize=(10, 6))
train_loss_baseline=[0.5547, 0.1694, 0.1329, 0.1114, 0.1009, 0.0877, 0.0771, 0.0747, 0.0715, 0.0678]
train_loss_first_replaced=[0.30764656423314196, 0.1250429212656879, 0.08540227125096764, 0.08219074263438783, 0.07480136376830775, 0.06013540639007162, 0.05676156625220854, 0.040834894521017184, 0.054327422554755735, 0.049598400308782035]
train_loss_middle_replaced= [0.6640109166100219, 0.18507155268830625, 0.1254911851421049, 0.1252630806152042, 0.11645534864903705, 0.08071976607076263, 0.105886530903533, 0.12237552750923135, 0.09164503595840526, 0.0724388102251986]
train_loss_2ndlast_replaced= [0.31840783251902544, 0.1460295004921185, 0.12614453079279614, 0.09106564843347548, 0.06568232942309633, 0.058039361179170725, 0.0825968105928041, 0.05498372959334882, 0.06580928520329385, 0.04307937768140078]
train_loss_last_replaced= [0.32752813086719124, 0.14467493892722838, 0.11987542980853971, 0.0997143439111627, 0.08797074221168973, 0.08358107432896095, 0.07529915879068368, 0.08856639362371375, 0.07926908930892602, 0.07020333375957971]
train_loss_middle_2ndlast_replaced= [0.7014728227781283, 0.2511954201335037, 0.13203523039024928, 0.09013904797894931, 0.07792432285527498, 0.22602112154473197, 1.155509135534836, 0.6418747562613036, 0.1577919390480462, 0.06119440667877112]
plt.plot(epoch, train_loss_baseline, label="Baseline",color='black',linestyle=':')
plt.plot(epoch, train_loss_first_replaced, label="First Layer Replaced",color='green')
plt.plot(epoch, train_loss_middle_replaced, label="Middle Layer Replaced",color='purple')
plt.plot(epoch, train_loss_last_replaced, label="Last Layer Replaced",color='orange')
plt.plot(epoch, train_loss_2ndlast_replaced, label="2nd to Last Layer Replaced",color='red')
plt.plot(epoch, train_loss_middle_2ndlast_replaced, label="Middle and 2nd to Last Layer Replaced",color='blue')
plt.title("Training Loss of EfficientNet on Imagenette")
plt.ylabel("Training Loss")
plt.xlabel("Epoch")
plt.legend()
plt.savefig('EfficientNet_loss_compare.png', dpi=300)
plt.show()
