In [2]:
from google.colab import drive
from torchvision import transforms
from torchvision.datasets import ImageFolder

# Mount your Google Drive
drive.mount('/content/drive')

base_path = '/content/drive/My Drive/Comp6321 project dataset/'

transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Adjust size as needed
    transforms.ToTensor(),
])

datasets = {}

dataset_names = ['Colorectal Cancer', 'Prostate Cancer', 'Animal Faces']

for dataset_name in dataset_names:
    dataset_path = base_path + dataset_name
    dataset = ImageFolder(root=dataset_path, transform=transform)
    datasets[dataset_name] = dataset


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## using a pre-trained network

In [4]:
torch.cuda.is_available()

True

In [5]:
import torch
import torch.nn as nn
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import torchvision.models as models
from tqdm import tqdm

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define the pre-trained model (e.g., VGG16) and remove the final classification layer
pretrained_model = models.vgg16(pretrained=True).to(device)
feature_extractor = nn.Sequential(*list(pretrained_model.features.children())).to(device)  # Remove the last layer

# Define data transformations for normalization to ImageNet
normalize_transform = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

# Initialize datasets and data loaders
d2_batch_size = 32
d3_batch_size = 32

# Data loaders for the Prostate Cancer and Animal Faces datasets
prostate_data_loader = DataLoader(datasets['Prostate Cancer'], batch_size=d2_batch_size, shuffle=True)
animal_faces_data_loader = DataLoader(datasets['Animal Faces'], batch_size=d3_batch_size, shuffle=True)

# Initialize tqdm progress bars for feature extraction
prostate_pbar = tqdm(total=len(prostate_data_loader), desc="Prostate Features")
animal_faces_pbar = tqdm(total=len(animal_faces_data_loader), desc="Animal Faces Features")

# Extract features from the datasets
prostate_features = []
animal_faces_features = []

for data_loader, features_list, pbar in [(prostate_data_loader, prostate_features, prostate_pbar), (animal_faces_data_loader, animal_faces_features, animal_faces_pbar)]:
    feature_extractor.eval()  # Set the feature extractor to evaluation mode
    progress_counter = 0
    with torch.no_grad():
        for images, _ in data_loader:
            # Move images to the GPU
            images = images.to(device)
            # Apply ImageNet normalization to the images before feature extraction
            images = normalize_transform(images)
            features = feature_extractor(images)
            features_list.append(features)
            progress_counter = progress_counter+1
            if progress_counter % 20 ==1:
              pbar.update(20)  # Update the progress bar
              progress_counter = 0

# Close the tqdm progress bars
prostate_pbar.close()
animal_faces_pbar.close()

# Now train separate classifiers on top of the extracted features for each dataset




Prostate Features:   0%|          | 0/188 [00:00<?, ?it/s][A[A


Animal Faces Features:   0%|          | 0/188 [17:30<?, ?it/s]
Prostate Features:  16%|█▌        | 30/188 [17:30<1:32:13, 35.02s/it]


Prostate Features:   1%|          | 1/188 [00:19<59:59, 19.25s/it][A[A

Prostate Features:   1%|          | 2/188 [00:32<48:13, 15.56s/it][A[A

Prostate Features:   2%|▏         | 3/188 [00:45<44:36, 14.47s/it][A[A

Prostate Features:   2%|▏         | 4/188 [00:57<41:26, 13.51s/it][A[A

Prostate Features:   3%|▎         | 5/188 [01:11<42:10, 13.83s/it][A[A

Prostate Features:   3%|▎         | 6/188 [01:21<37:35, 12.39s/it][A[A

Prostate Features:   4%|▎         | 7/188 [01:36<39:46, 13.19s/it][A[A

Prostate Features:   4%|▍         | 8/188 [01:47<37:51, 12.62s/it][A[A

Prostate Features:   5%|▍         | 9/188 [02:00<37:33, 12.59s/it][A[A

Prostate Features:   5%|▌         | 10/188 [02:10<35:22, 11.93s/it][A[A

Prostate Features:   6%|▌         | 11/188 [02:23<36:09

In [6]:
import pickle

# Define the shared folder path
shared_folder_path = base_path

# Save the extracted features in the shared folder using pickle
with open(shared_folder_path + 'prostate_features.pkl', 'wb') as file:
    pickle.dump(prostate_features, file)

with open(shared_folder_path + 'animal_faces_features.pkl', 'wb') as file:
    pickle.dump(animal_faces_features, file)


In [7]:
# after the first run
import pickle


shared_folder_path = base_path

# Load the saved features from pickle files in the shared folder
with open(shared_folder_path + 'prostate_features.pkl', 'rb') as file:
    prostate_features = pickle.load(file)

with open(shared_folder_path + 'animal_faces_features.pkl', 'rb') as file:
    animal_faces_features = pickle.load(file)
