In [2]:
# This mounts your Google Drive to the Colab VM.
from google.colab import drive
drive.mount('/content/drive')

# TODO: Enter the foldername in your Drive where you have saved the unzipped
# assignment folder, e.g. 'cs231n/assignments/assignment3/'
FOLDERNAME = 'CS348K Final Project/cs348k-project/'
assert FOLDERNAME is not None, "[!] Enter the foldername."

import os
project_path = os.path.join('/content/drive/My Drive', FOLDERNAME)
assert os.path.exists(project_path), "[!] The specified folder does not exist in Google Drive."


# Now that we've mounted your Drive, this ensures that
# the Python interpreter of the Colab VM can load
# python files from within it.
import sys
sys.path.append('/content/drive/My Drive/{}'.format(FOLDERNAME))



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
from sample import BaseModel, train_predictor, train_selector
from sample import PredictorNetwork
from sample import SelectorNetwork
import torchvision.models as models
import torch
import torch.nn as nn
import torch.optim as optim
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)


cuda


In [4]:
"""Set up the DataLoaders: """
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

# Define the transformation for the validation data
transform = transforms.Compose([
    # transforms.Resize(256),
    # transforms.CenterCrop(224),
    # transforms.ToTensor(),
    # transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    transforms.RandomHorizontalFlip(),
    transforms.RandomCrop(32, padding=4),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])

# Load the validation dataset
val_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Load the full CIFAR-10 training dataset
full_train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)

# Define the sizes for training and validation datasets (80/20 split)
train_size = int(0.02 * len(full_train_dataset))
val_size = len(full_train_dataset) - train_size

# Split the training dataset into training and validation sets
train_dataset, val_dataset = random_split(full_train_dataset, [train_size, val_size])

# Define the DataLoader for the training and validation datasets
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Load the CIFAR-10 test dataset
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=test_transform)

# Optionally, use the entire test set or a smaller portion (e.g., 10%)
test_size = int(0.1 * len(test_dataset))
_, small_test_dataset = random_split(test_dataset, [len(test_dataset) - test_size, test_size])

# Define the DataLoader for the test dataset
test_loader = DataLoader(small_test_dataset, batch_size=32, shuffle=False)

# Print the sizes of the datasets
print(f'Training dataset size: {len(train_dataset)}')
print(f'Validation dataset size: {len(val_dataset)}')
print(f'Test dataset size: {len(small_test_dataset)}')


Files already downloaded and verified
Files already downloaded and verified
Files already downloaded and verified
Training dataset size: 1000
Validation dataset size: 49000
Test dataset size: 1000


In [5]:
# Instantiate models, criterion, and optimizer
# base_model = BaseModel()
base_model = BaseModel()
base_model.eval()
num_classes = 10 # CIFAR10

# Load the pretrained ResNet-50 model
resnet50 = models.resnet50(pretrained=True) # pretrained on ImageNet
# resnet50.eval() # sets this to evaluation mode

resnet18 = models.resnet18(pretrained=True)
# print(resnet18)
for param in resnet18.parameters():
    param.requires_grad = False
resnet18.fc = nn.Linear(resnet18.fc.in_features, num_classes) #rewrites resnet18 final fc layer
#print(resnet18)





In [7]:
import torch
import torch.nn.functional as F
from torchvision import models, datasets, transforms
from torch.utils.data import DataLoader, Dataset

# Dictionary to store hooks
activation = {}

def get_activation(name):
    def hook(model, input, output):
        activation[name] = output.detach()
    return hook

# Load the pretrained ResNet18 model
resnet18 = models.resnet18(pretrained=True)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
resnet18 = resnet18.to(device)
resnet18.eval()

# Register hooks on the layers of interest
hook_handles = []
hook_handles.append(resnet18.layer1.register_forward_hook(get_activation('layer1')))
hook_handles.append(resnet18.layer2.register_forward_hook(get_activation('layer2')))
hook_handles.append(resnet18.layer3.register_forward_hook(get_activation('layer3')))
hook_handles.append(resnet18.layer4.register_forward_hook(get_activation('layer4')))
hook_handles.append(resnet18.fc.register_forward_hook(get_activation('fc')))

# Transformation and data loading
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Initialize lists to collect layer outputs and correctness
layer1_list, layer2_list, layer3_list, layer4_list, fc_list = [], [], [], [], []
outputs_list, binary_list = [], []

with torch.no_grad():
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        outputs = resnet18(images)

        # Append activations
        layer1_list.append(activation['layer1'].flatten())
        layer2_list.append(activation['layer2'].flatten())
        layer3_list.append(activation['layer3'].flatten())
        layer4_list.append(activation['layer4'].flatten())
        fc_list.append(activation['fc'].flatten())

        # Append predictions and correctness
        softmax_outputs = F.softmax(outputs, dim=1)
        _, preds = torch.max(softmax_outputs, 1)
        binary_list.extend((preds == labels).cpu().numpy())

# Detach hooks
for handle in hook_handles:
    handle.remove()


# Save tensors if needed
# torch.save(layer1_tensor, 'layer1_tensor.pth')
# torch.save(layer2_tensor, 'layer2_tensor.pth')
# torch.save(layer3_tensor, 'layer3_tensor.pth')
# torch.save(layer4_tensor, 'layer4_tensor.pth')
# torch.save(fc_tensor, 'fc_tensor.pth')
# torch.save(binary_tensor, 'binary_tensor.pth')

import torch
from torch.utils.data import Dataset, DataLoader


# Create custom datasets
class PredictorDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = inputs
        self.targets = targets

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        return self.inputs[idx], self.targets[idx]

class SelectorDataset(Dataset):
    def __init__(self, predictions, corrects):
        self.predictions = predictions
        self.corrects = corrects

    def __len__(self):
        return len(self.predictions)

    def __getitem__(self, idx):
        return self.predictions[idx], self.corrects[idx]

# Create datasets
predictor_layer1_dataset = PredictorDataset(layer1_list, fc_list)
predictor_layer2_dataset = PredictorDataset(layer2_list, fc_list)
predictor_layer3_dataset = PredictorDataset(layer3_list, fc_list)
predictor_layer4_dataset = PredictorDataset(layer4_list, fc_list)
selector_dataset = SelectorDataset(fc_list, binary_list)

# Create dataloaders
batch_size = 32
predictor_layer1_data_loader = DataLoader(predictor_layer1_dataset, batch_size=batch_size, shuffle=True)
predictor_layer2_data_loader = DataLoader(predictor_layer2_dataset, batch_size=batch_size, shuffle=True)
predictor_layer3_data_loader = DataLoader(predictor_layer3_dataset, batch_size=batch_size, shuffle=True)
predictor_layer4_data_loader = DataLoader(predictor_layer4_dataset, batch_size=batch_size, shuffle=True)
selector_data_loader = DataLoader(selector_dataset, batch_size=batch_size, shuffle=True)

# Define the predictor and selector networks
class FCPredictor(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(FCPredictor, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 512),
            nn.ReLU(),
            nn.Linear(512, output_dim)
        )

    def forward(self, x):
        x = x.view(x.size(0), -1)  # Flatten input
        return self.fc(x)

class SelectorNetwork(nn.Module):
    def __init__(self, input_dim):
        super(SelectorNetwork, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.fc(x)




In [8]:
print(activation['layer1'].shape)
print(activation['layer2'].shape)
print(activation['layer3'].shape)
print(activation['layer4'].shape)
print(activation['fc'].shape)

torch.Size([8, 64, 8, 8])
torch.Size([8, 128, 4, 4])
torch.Size([8, 256, 2, 2])
torch.Size([8, 512, 1, 1])
torch.Size([8, 1000])


In [10]:

# Example training function for predictor and selector networks
def train_predictor(model, dataloader, epochs=10):
    criterion = nn.MSELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch + 1}, Loss: {running_loss / len(dataloader)}")

def train_selector(model, dataloader, epochs=10):
    criterion = nn.BCELoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, targets in dataloader:
            inputs, targets = inputs.to(device), targets.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs.squeeze(), targets)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch + 1}, Loss: {running_loss / len(dataloader)}")

# Example training for one predictor and selector
input_dim = layer1_list[0].shape[0] # Adjust based on the actual shape
output_dim = fc_list[0].shape[0]  # Number of classes, e.g., 10 for CIFAR-10

# Create and train predictor network for layer1 outputs
predictor_model = FCPredictor(input_dim, output_dim).to(device)
train_predictor(predictor_model, predictor_layer1_data_loader)

# Create and train selector network
selector_model = SelectorNetwork(output_dim).to(device)
train_selector(selector_model, selector_data_loader)


RuntimeError: stack expects each tensor to be equal size, but got [131072] at entry 0 and [32768] at entry 29

In [None]:
# MY CODE::::

# from dataset_utils import PredictorDataset, SelectorDataset
# import torch
# """Collect hidden layer output predictions"""
# layer1_list = []
# layer2_list = []
# layer3_list = []
# layer4_list = []
# fc_list = []

# outputs_list = []
# binary_list = [] # 1s vs 0s is hit rate

# # Collect hidden layer outputs and final predictions
# with torch.no_grad():
#     for image, label in val_loader:
#         output = resnet18(image)
#         layer1_list.append(resnet18.layer1)
#         layer2_list.append(resnet18.layer2)
#         layer3_list.append(resnet18.layer3)
#         layer4_list.append(resnet18.layer4)
#         fc_list.append(resnet18.fc)
#         binary_list.append(torch.max(torch.softmax(output)) == label) #hit/miss

# #convert the lists into Tensors?


# # Example batch size
# batch_size = 32

# predictor_layer1_dataset = PredictorDataset(layer1_list, fc_list)
# predictor_layer1_data_loader = DataLoader(predictor_layer1_dataset, batch_size=batch_size, shuffle=True)
# predictor_layer2_dataset = PredictorDataset(layer2_list, fc_list)
# predictor_layer2_data_loader = DataLoader(predictor_layer2_dataset, batch_size=batch_size, shuffle=True)
# predictor_layer3_dataset = PredictorDataset(layer3_list, fc_list)
# predictor_layer3_data_loader = DataLoader(predictor_layer3_dataset, batch_size=batch_size, shuffle=True)
# predictor_layer4_dataset = PredictorDataset(layer4_list, fc_list)
# predictor_layer4_data_loader = DataLoader(predictor_layer4_dataset, batch_size=batch_size, shuffle=True)

# selector_dataset = SelectorDataset(fc_list, binary_list) #  prediction ->   was it correctly chosen
# selector_data_loader = DataLoader(selector_dataset, batch_size=batch_size, shuffle=True)

TypeError: softmax() received an invalid combination of arguments - got (Tensor), but expected one of:
 * (Tensor input, int dim, torch.dtype dtype, *, Tensor out)
 * (Tensor input, name dim, *, torch.dtype dtype)
